From c1b7f1d18960b4b4adfc58ed8ebba3a95b3d3cc8 Mon Sep 17 00:00:00 2001 From: DanielWalnut <45447813+hetaoBackend@users.noreply.github.com> Date: Sat, 9 May 2026 09:27:02 +0800 Subject: [PATCH] feat: static system prompt with DynamicContextMiddleware for prefix-cache optimization (#2801) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(middleware): inject dynamic context via DynamicContextMiddleware Move memory and current date out of the system prompt and into a dedicated HumanMessage injected once per session (frozen-snapshot pattern) via a new DynamicContextMiddleware. This keeps the system prompt byte-exact across all users and sessions, enabling maximum Anthropic/Bedrock prefix-cache reuse. Key design decisions: - ID-swap technique: reminder takes the first HumanMessage's ID (replacing it in-place via add_messages), original content gets a derived `{id}__user` ID (appended after). Preserves correct ordering. - hide_from_ui: True on reminder messages so frontend filters them out. - Midnight crossing: date-update reminder injected before the current turn's HumanMessage when the conversation spans midnight. - INFO-level logging for production diagnostics. Also adds prompt-caching breakpoint budget enforcement tests and updates ClaudeChatModel docs to reference the new pattern. Co-Authored-By: Claude Sonnet 4.6 * feat(token-usage): log input/output token detail breakdown in middleware Extend the LLM token usage log line to include input_token_details and output_token_details (cache_creation, cache_read, reasoning, audio, etc.) when present. Adds tests covering Anthropic cache detail logging from both usage_metadata and response_metadata. Co-Authored-By: Claude Sonnet 4.6 * fix: fix nginx * fix(middleware): always inject date; gate memory on injection_enabled Date injection is now unconditional — it is part of the static system prompt replacement and should always be present. Memory injection remains gated by `memory.injection_enabled` in the app config. Previously the entire DynamicContextMiddleware was skipped when injection_enabled was False, which also suppressed the date. Co-Authored-By: Claude Sonnet 4.6 * fix(lint): format files and correct test assertions for token usage middleware - ruff format dynamic_context_middleware.py and test_claude_provider_prompt_caching.py - Remove unused pytest import from test_dynamic_context_middleware.py - Fix two tests that asserted response_metadata fallback logic that doesn't exist: replace with tests that match actual middleware behavior Co-Authored-By: Claude Sonnet 4.6 * fix(middleware): address Copilot review comments on DynamicContextMiddleware - Use additional_kwargs flag for reminder detection instead of content substring matching, so user messages containing '' are not mistakenly treated as injected reminders - Generate stable UUID when original HumanMessage.id is None to prevent ambiguous 'None__user' derived IDs and message collisions - Downgrade per-turn no-op log to DEBUG; keep actual injection events at INFO - Add two new tests: missing-id UUID fallback and user-text false-positive Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Sonnet 4.6 --- .../deerflow/agents/lead_agent/agent.py | 6 + .../deerflow/agents/lead_agent/prompt.py | 16 +- .../middlewares/dynamic_context_middleware.py | 193 +++++++++++ .../middlewares/token_usage_middleware.py | 11 +- .../deerflow/models/claude_provider.py | 4 + backend/tests/test_csrf_middleware.py | 16 + .../tests/test_dynamic_context_middleware.py | 312 ++++++++++++++++++ backend/tests/test_token_usage_middleware.py | 77 +++++ 8 files changed, 623 insertions(+), 12 deletions(-) create mode 100644 backend/packages/harness/deerflow/agents/middlewares/dynamic_context_middleware.py create mode 100644 backend/tests/test_dynamic_context_middleware.py diff --git a/backend/packages/harness/deerflow/agents/lead_agent/agent.py b/backend/packages/harness/deerflow/agents/lead_agent/agent.py index 7540cdcdc..f4330abc1 100644 --- a/backend/packages/harness/deerflow/agents/lead_agent/agent.py +++ b/backend/packages/harness/deerflow/agents/lead_agent/agent.py @@ -258,6 +258,12 @@ def _build_middlewares( resolved_app_config = app_config or get_app_config() middlewares = build_lead_runtime_middlewares(app_config=resolved_app_config, lazy_init=True) + # Always inject current date (and optionally memory) as into the + # first HumanMessage to keep the system prompt fully static for prefix-cache reuse. + from deerflow.agents.middlewares.dynamic_context_middleware import DynamicContextMiddleware + + middlewares.append(DynamicContextMiddleware(agent_name=agent_name, app_config=resolved_app_config)) + # Add summarization middleware if enabled summarization_middleware = _create_summarization_middleware(app_config=resolved_app_config) if summarization_middleware is not None: diff --git a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py index b255e962d..6a82b8269 100644 --- a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py +++ b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py @@ -3,7 +3,6 @@ from __future__ import annotations import asyncio import logging import threading -from datetime import datetime from functools import lru_cache from typing import TYPE_CHECKING @@ -368,8 +367,6 @@ You are {agent_name}, an open-source super agent. {soul} {self_update_section} -{memory_context} - - Think concisely and strategically about the user's request BEFORE taking action - Break down the task: What is clear? What is ambiguous? What is missing? @@ -776,9 +773,6 @@ def apply_prompt_template( available_skills: set[str] | None = None, app_config: AppConfig | None = None, ) -> str: - # Get memory context - memory_context = _get_memory_context(agent_name, app_config=app_config) - # Include subagent section only if enabled (from runtime parameter) n = max_concurrent_subagents subagent_section = _build_subagent_section(n, app_config=app_config) if subagent_enabled else "" @@ -812,18 +806,18 @@ def apply_prompt_template( custom_mounts_section = _build_custom_mounts_section(app_config=app_config) acp_and_mounts_section = "\n".join(section for section in (acp_section, custom_mounts_section) if section) - # Format the prompt with dynamic skills and memory - prompt = SYSTEM_PROMPT_TEMPLATE.format( + # Build and return the fully static system prompt. + # Memory and current date are injected per-turn via DynamicContextMiddleware + # as a in the first HumanMessage, keeping this prompt + # identical across users and sessions for maximum prefix-cache reuse. + return SYSTEM_PROMPT_TEMPLATE.format( agent_name=agent_name or "DeerFlow 2.0", soul=get_agent_soul(agent_name), self_update_section=_build_self_update_section(agent_name), skills_section=skills_section, deferred_tools_section=deferred_tools_section, - memory_context=memory_context, subagent_section=subagent_section, subagent_reminder=subagent_reminder, subagent_thinking=subagent_thinking, acp_section=acp_and_mounts_section, ) - - return prompt + f"\n{datetime.now().strftime('%Y-%m-%d, %A')}" diff --git a/backend/packages/harness/deerflow/agents/middlewares/dynamic_context_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/dynamic_context_middleware.py new file mode 100644 index 000000000..628a60e88 --- /dev/null +++ b/backend/packages/harness/deerflow/agents/middlewares/dynamic_context_middleware.py @@ -0,0 +1,193 @@ +"""Middleware to inject dynamic context (memory, current date) as a system-reminder. + +The system prompt is kept fully static for maximum prefix-cache reuse across users +and sessions. The current date is always injected. Per-user memory is also injected +when ``memory.injection_enabled`` is True in the app config. Both are delivered once +per conversation as a dedicated HumanMessage inserted before the +first user message (frozen-snapshot pattern). + +When a conversation spans midnight the middleware detects the date change and injects +a lightweight date-update reminder as a separate HumanMessage before the current turn. +This correction is persisted so subsequent turns on the new day see a consistent history +and do not re-inject. + +Reminder format: + + + ... + + 2026-05-08, Friday + + +Date-update format: + + + 2026-05-09, Saturday + +""" + +from __future__ import annotations + +import logging +import re +import uuid +from datetime import datetime +from typing import TYPE_CHECKING, override + +from langchain.agents.middleware import AgentMiddleware +from langchain_core.messages import HumanMessage +from langgraph.runtime import Runtime + +if TYPE_CHECKING: + from deerflow.config.app_config import AppConfig + +logger = logging.getLogger(__name__) + +_DATE_RE = re.compile(r"([^<]+)") +_DYNAMIC_CONTEXT_REMINDER_KEY = "dynamic_context_reminder" + + +def _extract_date(content: str) -> str | None: + """Return the first value found in *content*, or None.""" + m = _DATE_RE.search(content) + return m.group(1) if m else None + + +def _last_injected_date(messages: list) -> str | None: + """Scan messages in reverse and return the most recently injected date. + + Detection uses the ``dynamic_context_reminder`` additional_kwargs flag rather + than content substring matching, so user messages containing ```` + are not mistakenly treated as injected reminders. + """ + for msg in reversed(messages): + if isinstance(msg, HumanMessage) and msg.additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY): + content_str = msg.content if isinstance(msg.content, str) else str(msg.content) + return _extract_date(content_str) + return None + + +class DynamicContextMiddleware(AgentMiddleware): + """Inject memory and current date into HumanMessages as a . + + First turn + ---------- + Prepends a full system-reminder (memory + date) to the first HumanMessage and + persists it (same message ID). The first message is then frozen for the whole + session — its content never changes again, so the prefix cache can hit on every + subsequent turn. + + Midnight crossing + ----------------- + If the conversation spans midnight, the current date differs from the date that + was injected earlier. In that case a lightweight date-update reminder is prepended + to the **current** (last) HumanMessage and persisted. Subsequent turns on the new + day see the corrected date in history and skip re-injection. + """ + + def __init__(self, agent_name: str | None = None, *, app_config: AppConfig | None = None): + super().__init__() + self._agent_name = agent_name + self._app_config = app_config + + def _build_full_reminder(self) -> str: + from deerflow.agents.lead_agent.prompt import _get_memory_context + + # Memory injection is gated by injection_enabled; date is always included. + injection_enabled = self._app_config.memory.injection_enabled if self._app_config else True + memory_context = _get_memory_context(self._agent_name, app_config=self._app_config) if injection_enabled else "" + current_date = datetime.now().strftime("%Y-%m-%d, %A") + + lines: list[str] = [""] + if memory_context: + lines.append(memory_context.strip()) + lines.append("") # blank line separating memory from date + lines.append(f"{current_date}") + lines.append("") + + return "\n".join(lines) + + def _build_date_update_reminder(self) -> str: + current_date = datetime.now().strftime("%Y-%m-%d, %A") + return "\n".join( + [ + "", + f"{current_date}", + "", + ] + ) + + @staticmethod + def _make_reminder_and_user_messages(original: HumanMessage, reminder_content: str) -> tuple[HumanMessage, HumanMessage]: + """Return (reminder_msg, user_msg) using the ID-swap technique. + + reminder_msg takes the original message's ID so that add_messages replaces it + in-place (preserving position). user_msg carries the original content with a + derived ``{id}__user`` ID and is appended immediately after by add_messages. + + If the original message has no ID a stable UUID is generated so the derived + ``{id}__user`` ID never collapses to the ambiguous ``None__user`` string. + """ + stable_id = original.id or str(uuid.uuid4()) + reminder_msg = HumanMessage( + content=reminder_content, + id=stable_id, + additional_kwargs={"hide_from_ui": True, _DYNAMIC_CONTEXT_REMINDER_KEY: True}, + ) + user_msg = HumanMessage( + content=original.content, + id=f"{stable_id}__user", + name=original.name, + additional_kwargs=original.additional_kwargs, + ) + return reminder_msg, user_msg + + def _inject(self, state) -> dict | None: + messages = list(state.get("messages", [])) + if not messages: + return None + + current_date = datetime.now().strftime("%Y-%m-%d, %A") + last_date = _last_injected_date(messages) + logger.debug( + "DynamicContextMiddleware._inject: msg_count=%d last_date=%r current_date=%r", + len(messages), + last_date, + current_date, + ) + + if last_date is None: + # ── First turn: inject full reminder as a separate HumanMessage ───── + first_idx = next((i for i, m in enumerate(messages) if isinstance(m, HumanMessage)), None) + if first_idx is None: + return None + full_reminder = self._build_full_reminder() + logger.info( + "DynamicContextMiddleware: injecting full reminder (len=%d, has_memory=%s) into first HumanMessage id=%r", + len(full_reminder), + "" in full_reminder, + messages[first_idx].id, + ) + reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[first_idx], full_reminder) + return {"messages": [reminder_msg, user_msg]} + + if last_date == current_date: + # ── Same day: nothing to do ────────────────────────────────────────── + return None + + # ── Midnight crossed: inject date-update reminder as a separate HumanMessage ── + last_human_idx = next((i for i in reversed(range(len(messages))) if isinstance(messages[i], HumanMessage)), None) + if last_human_idx is None: + return None + + reminder_msg, user_msg = self._make_reminder_and_user_messages(messages[last_human_idx], self._build_date_update_reminder()) + logger.info("DynamicContextMiddleware: midnight crossing detected — injected date update before current turn") + return {"messages": [reminder_msg, user_msg]} + + @override + def before_agent(self, state, runtime: Runtime) -> dict | None: + return self._inject(state) + + @override + async def abefore_agent(self, state, runtime: Runtime) -> dict | None: + return self._inject(state) diff --git a/backend/packages/harness/deerflow/agents/middlewares/token_usage_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/token_usage_middleware.py index 3fd6d6132..f59e7f2b7 100644 --- a/backend/packages/harness/deerflow/agents/middlewares/token_usage_middleware.py +++ b/backend/packages/harness/deerflow/agents/middlewares/token_usage_middleware.py @@ -267,11 +267,20 @@ class TokenUsageMiddleware(AgentMiddleware): usage = getattr(last, "usage_metadata", None) if usage: + input_token_details = usage.get("input_token_details") or {} + output_token_details = usage.get("output_token_details") or {} + detail_parts = [] + if input_token_details: + detail_parts.append(f"input_token_details={input_token_details}") + if output_token_details: + detail_parts.append(f"output_token_details={output_token_details}") + detail_suffix = f" {' '.join(detail_parts)}" if detail_parts else "" logger.info( - "LLM token usage: input=%s output=%s total=%s", + "LLM token usage: input=%s output=%s total=%s%s", usage.get("input_tokens", "?"), usage.get("output_tokens", "?"), usage.get("total_tokens", "?"), + detail_suffix, ) todos = state.get("todos") or [] diff --git a/backend/packages/harness/deerflow/models/claude_provider.py b/backend/packages/harness/deerflow/models/claude_provider.py index 35a15494d..7116ad13e 100644 --- a/backend/packages/harness/deerflow/models/claude_provider.py +++ b/backend/packages/harness/deerflow/models/claude_provider.py @@ -196,6 +196,10 @@ class ClaudeChatModel(ChatAnthropic): enforced by both the Anthropic API and AWS Bedrock. Breakpoints are placed on the *last* eligible blocks because later breakpoints cover a larger prefix and yield better cache hit rates. + + The system prompt is expected to be fully static (no per-user memory or + current date). Dynamic context is injected per-turn via + DynamicContextMiddleware as a in the first HumanMessage. """ MAX_CACHE_BREAKPOINTS = 4 diff --git a/backend/tests/test_csrf_middleware.py b/backend/tests/test_csrf_middleware.py index 247e24cda..28a65c8d7 100644 --- a/backend/tests/test_csrf_middleware.py +++ b/backend/tests/test_csrf_middleware.py @@ -110,6 +110,22 @@ def test_auth_post_allows_forwarded_same_origin(): assert response.cookies.get("csrf_token") +def test_auth_post_allows_forwarded_same_origin_with_non_default_port(): + client = TestClient(_make_app(), base_url="http://internal:8000") + + response = client.post( + "/api/v1/auth/login/local", + headers={ + "Origin": "http://localhost:2026", + "X-Forwarded-Proto": "http", + "X-Forwarded-Host": "localhost:2026", + }, + ) + + assert response.status_code == 200 + assert response.cookies.get("csrf_token") + + def test_auth_post_allows_rfc_forwarded_same_origin(): client = TestClient(_make_app(), base_url="http://internal:8000") diff --git a/backend/tests/test_dynamic_context_middleware.py b/backend/tests/test_dynamic_context_middleware.py new file mode 100644 index 000000000..267460fd7 --- /dev/null +++ b/backend/tests/test_dynamic_context_middleware.py @@ -0,0 +1,312 @@ +"""Tests for DynamicContextMiddleware. + +Verifies that memory and current date are injected as a into +the first HumanMessage exactly once per session (frozen-snapshot pattern). +""" + +from types import SimpleNamespace +from unittest import mock + +from langchain_core.messages import AIMessage, HumanMessage + +from deerflow.agents.middlewares.dynamic_context_middleware import ( + _DYNAMIC_CONTEXT_REMINDER_KEY, + DynamicContextMiddleware, +) + +_SYSTEM_REMINDER_TAG = "" + + +def _make_middleware(**kwargs) -> DynamicContextMiddleware: + return DynamicContextMiddleware(**kwargs) + + +def _fake_runtime(): + return SimpleNamespace(context={}) + + +def _reminder_msg(content: str, msg_id: str) -> HumanMessage: + """Build a reminder HumanMessage the way the middleware would produce it.""" + return HumanMessage( + content=content, + id=msg_id, + additional_kwargs={"hide_from_ui": True, _DYNAMIC_CONTEXT_REMINDER_KEY: True}, + ) + + +# --------------------------------------------------------------------------- +# Basic injection +# --------------------------------------------------------------------------- + + +def test_injects_system_reminder_into_first_human_message(): + mw = _make_middleware() + state = {"messages": [HumanMessage(content="Hello", id="msg-1")]} + + with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + assert result is not None + updated_msgs = result["messages"] + assert len(updated_msgs) == 2 + + reminder_msg = updated_msgs[0] + assert isinstance(reminder_msg, HumanMessage) + assert reminder_msg.id == "msg-1" # takes the original ID (position swap) + assert reminder_msg.additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True + assert _SYSTEM_REMINDER_TAG in reminder_msg.content + assert "2026-05-08, Friday" in reminder_msg.content + assert "Hello" not in reminder_msg.content # reminder only — no user text + + user_msg = updated_msgs[1] + assert isinstance(user_msg, HumanMessage) + assert user_msg.id == "msg-1__user" # derived ID + assert user_msg.content == "Hello" + + +def test_memory_included_when_present(): + mw = _make_middleware() + state = {"messages": [HumanMessage(content="Hi", id="msg-1")]} + + with ( + mock.patch( + "deerflow.agents.lead_agent.prompt._get_memory_context", + return_value="\nUser prefers Python.\n", + ), + mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt, + ): + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + # Reminder is the first returned message; user query is the second + reminder_content = result["messages"][0].content + assert "User prefers Python." in reminder_content + assert "2026-05-08, Friday" in reminder_content + assert result["messages"][1].content == "Hi" + + +# --------------------------------------------------------------------------- +# Frozen-snapshot: no re-injection within a session +# --------------------------------------------------------------------------- + + +def test_skips_injection_if_already_present(): + """Second turn: separate reminder message already present → no update.""" + mw = _make_middleware() + reminder_content = "\n2026-05-08, Friday\n" + state = { + "messages": [ + _reminder_msg(reminder_content, "msg-1"), + HumanMessage(content="Hello", id="msg-1__user"), + AIMessage(content="Hi there"), + HumanMessage(content="Follow-up", id="msg-2"), + ] + } + + with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + assert result is None # no update needed + + +def test_injects_only_into_first_human_message_not_later_ones(): + """Reminder targets the first HumanMessage; subsequent messages are not touched.""" + mw = _make_middleware() + state = { + "messages": [ + HumanMessage(content="First", id="msg-1"), + AIMessage(content="Reply"), + HumanMessage(content="Second", id="msg-2"), + ] + } + + with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + assert result is not None + msgs = result["messages"] + # Only the two injected messages are returned (reminder + original first query) + assert len(msgs) == 2 + assert msgs[0].id == "msg-1" # reminder takes first message's ID + assert msgs[0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True + assert _SYSTEM_REMINDER_TAG in msgs[0].content + assert msgs[1].id == "msg-1__user" # original content with derived ID + assert msgs[1].content == "First" + # "Second" (msg-2) is not in the returned update — it is left unchanged + assert all(m.id != "msg-2" for m in msgs) + + +# --------------------------------------------------------------------------- +# Edge cases +# --------------------------------------------------------------------------- + + +def test_no_messages_returns_none(): + mw = _make_middleware() + result = mw.before_agent({"messages": []}, _fake_runtime()) + assert result is None + + +def test_no_human_message_returns_none(): + mw = _make_middleware() + state = {"messages": [AIMessage(content="assistant only")]} + with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""): + result = mw.before_agent(state, _fake_runtime()) + assert result is None + + +def test_list_content_message_handled_as_separate_reminder(): + """List-content (e.g. multi-modal) messages remain intact; reminder is a separate message.""" + mw = _make_middleware() + original_content = [{"type": "text", "text": "Hello"}] + state = {"messages": [HumanMessage(content=original_content, id="msg-1")]} + + with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + assert result is not None + msgs = result["messages"] + assert len(msgs) == 2 + # Reminder is a plain string message with the flag set + assert isinstance(msgs[0].content, str) + assert msgs[0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True + assert _SYSTEM_REMINDER_TAG in msgs[0].content + # Original list-content message is untouched + assert msgs[1].content == original_content + + +def test_reminder_uses_original_id_user_message_uses_derived_id(): + """Reminder takes original ID (position swap); user message gets {id}__user.""" + mw = _make_middleware() + original_id = "original-id-abc" + state = {"messages": [HumanMessage(content="Hello", id=original_id)]} + + with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + assert result["messages"][0].id == original_id + assert result["messages"][1].id == f"{original_id}__user" + + +def test_message_without_id_gets_stable_uuid(): + """If the original HumanMessage has no ID, a UUID is generated and used consistently.""" + mw = _make_middleware() + state = {"messages": [HumanMessage(content="Hello", id=None)]} + + with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + assert result is not None + reminder_id = result["messages"][0].id + user_id = result["messages"][1].id + assert reminder_id is not None + assert reminder_id != "None" + assert user_id == f"{reminder_id}__user" + + +def test_user_message_containing_system_reminder_tag_does_not_prevent_injection(): + """A user message containing '' must not be mistaken for a reminder.""" + mw = _make_middleware() + state = { + "messages": [ + HumanMessage(content="What is ?", id="msg-1"), + ] + } + + with mock.patch("deerflow.agents.lead_agent.prompt._get_memory_context", return_value=""), mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-08, Friday" + result = mw.before_agent(state, _fake_runtime()) + + # Injection must happen — the user message does NOT carry the reminder flag + assert result is not None + assert result["messages"][0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True + + +# --------------------------------------------------------------------------- +# Midnight crossing +# --------------------------------------------------------------------------- + + +def test_midnight_crossing_injects_date_update_as_separate_message(): + """When the date has changed, a separate date-update reminder is injected before + the current turn's HumanMessage using the ID-swap technique.""" + mw = _make_middleware() + reminder_content = "\n2026-05-08, Friday\n" + state = { + "messages": [ + _reminder_msg(reminder_content, "msg-1"), + HumanMessage(content="Hello", id="msg-1__user"), + AIMessage(content="Response"), + HumanMessage(content="Good morning", id="msg-2"), + ] + } + + with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-09, Saturday" + result = mw.before_agent(state, _fake_runtime()) + + assert result is not None + msgs = result["messages"] + assert len(msgs) == 2 + + # Date-update reminder takes the current message's ID + assert msgs[0].id == "msg-2" + assert msgs[0].additional_kwargs.get(_DYNAMIC_CONTEXT_REMINDER_KEY) is True + assert _SYSTEM_REMINDER_TAG in msgs[0].content + assert "2026-05-09, Saturday" in msgs[0].content + assert "Good morning" not in msgs[0].content # reminder only + + # Original user text appended with derived ID + assert msgs[1].id == "msg-2__user" + assert msgs[1].content == "Good morning" + + +def test_midnight_crossing_id_swap(): + """Date-update reminder uses original ID; user message uses {id}__user.""" + mw = _make_middleware() + reminder_content = "\n2026-05-08, Friday\n" + state = { + "messages": [ + _reminder_msg(reminder_content, "msg-1"), + HumanMessage(content="Next day message", id="msg-2"), + ] + } + + with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-09, Saturday" + result = mw.before_agent(state, _fake_runtime()) + + assert result["messages"][0].id == "msg-2" + assert result["messages"][1].id == "msg-2__user" + + +def test_no_second_midnight_injection_once_date_updated(): + """After a midnight update is persisted, the same-day path skips re-injection.""" + mw = _make_middleware() + date_update_content = "\n2026-05-09, Saturday\n" + state = { + "messages": [ + _reminder_msg( + "\n2026-05-08, Friday\n", + "msg-1", + ), + HumanMessage(content="Hello", id="msg-1__user"), + AIMessage(content="Response"), + _reminder_msg(date_update_content, "msg-2"), + HumanMessage(content="Good morning", id="msg-2__user"), + AIMessage(content="Good morning!"), + HumanMessage(content="Third turn", id="msg-3"), + ] + } + + with mock.patch("deerflow.agents.middlewares.dynamic_context_middleware.datetime") as mock_dt: + mock_dt.now.return_value.strftime.return_value = "2026-05-09, Saturday" + result = mw.before_agent(state, _fake_runtime()) + + assert result is None # same day as last injected date → no update diff --git a/backend/tests/test_token_usage_middleware.py b/backend/tests/test_token_usage_middleware.py index c3b1ffc4e..b24ff7b16 100644 --- a/backend/tests/test_token_usage_middleware.py +++ b/backend/tests/test_token_usage_middleware.py @@ -1,5 +1,6 @@ """Tests for TokenUsageMiddleware attribution annotations.""" +import logging from unittest.mock import MagicMock from langchain_core.messages import AIMessage @@ -17,6 +18,82 @@ def _make_runtime(): class TestTokenUsageMiddleware: + def test_logs_cache_token_details(self, caplog): + middleware = TokenUsageMiddleware() + message = AIMessage( + content="Here is the final answer.", + usage_metadata={ + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, + "input_token_details": { + "audio": 10, + "cache_creation": 200, + "cache_read": 100, + }, + "output_token_details": { + "audio": 10, + "reasoning": 200, + }, + }, + ) + + with caplog.at_level( + logging.INFO, + logger="deerflow.agents.middlewares.token_usage_middleware", + ): + result = middleware.after_model({"messages": [message]}, _make_runtime()) + + assert result is not None + assert "LLM token usage: input=350 output=240 total=590" in caplog.text + assert "input_token_details={'audio': 10, 'cache_creation': 200, 'cache_read': 100}" in caplog.text + assert "output_token_details={'audio': 10, 'reasoning': 200}" in caplog.text + + def test_logs_basic_tokens_when_no_detail_fields_in_usage_metadata(self, caplog): + """When usage_metadata has only totals (no input_token_details), log just the counts.""" + middleware = TokenUsageMiddleware() + message = AIMessage( + content="Here is the final answer.", + usage_metadata={ + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, + }, + ) + + with caplog.at_level( + logging.INFO, + logger="deerflow.agents.middlewares.token_usage_middleware", + ): + result = middleware.after_model({"messages": [message]}, _make_runtime()) + + assert result is not None + assert "LLM token usage: input=350 output=240 total=590" in caplog.text + assert "input_token_details" not in caplog.text + + def test_no_log_when_usage_metadata_is_missing(self, caplog): + """When usage_metadata is absent, no token usage line is logged.""" + middleware = TokenUsageMiddleware() + message = AIMessage( + content="Here is the final answer.", + response_metadata={ + "usage": { + "input_tokens": 350, + "output_tokens": 240, + "total_tokens": 590, + } + }, + ) + + with caplog.at_level( + logging.INFO, + logger="deerflow.agents.middlewares.token_usage_middleware", + ): + result = middleware.after_model({"messages": [message]}, _make_runtime()) + + assert result is not None + assert "LLM token usage" not in caplog.text + def test_annotates_todo_updates_with_structured_actions(self): middleware = TokenUsageMiddleware() message = AIMessage(