mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-10 02:38:26 +00:00
* feat(middleware): inject dynamic context via DynamicContextMiddleware
Move memory and current date out of the system prompt and into a
dedicated <system-reminder> HumanMessage injected once per session
(frozen-snapshot pattern) via a new DynamicContextMiddleware.
This keeps the system prompt byte-exact across all users and sessions,
enabling maximum Anthropic/Bedrock prefix-cache reuse.
Key design decisions:
- ID-swap technique: reminder takes the first HumanMessage's ID
(replacing it in-place via add_messages), original content gets a
derived `{id}__user` ID (appended after). Preserves correct ordering.
- hide_from_ui: True on reminder messages so frontend filters them out.
- Midnight crossing: date-update reminder injected before the current
turn's HumanMessage when the conversation spans midnight.
- INFO-level logging for production diagnostics.
Also adds prompt-caching breakpoint budget enforcement tests and
updates ClaudeChatModel docs to reference the new pattern.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* feat(token-usage): log input/output token detail breakdown in middleware
Extend the LLM token usage log line to include input_token_details and
output_token_details (cache_creation, cache_read, reasoning, audio, etc.)
when present. Adds tests covering Anthropic cache detail logging from
both usage_metadata and response_metadata.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix: fix nginx
* fix(middleware): always inject date; gate memory on injection_enabled
Date injection is now unconditional — it is part of the static system
prompt replacement and should always be present. Memory injection
remains gated by `memory.injection_enabled` in the app config.
Previously the entire DynamicContextMiddleware was skipped when
injection_enabled was False, which also suppressed the date.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix(lint): format files and correct test assertions for token usage middleware
- ruff format dynamic_context_middleware.py and test_claude_provider_prompt_caching.py
- Remove unused pytest import from test_dynamic_context_middleware.py
- Fix two tests that asserted response_metadata fallback logic that
doesn't exist: replace with tests that match actual middleware behavior
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
* fix(middleware): address Copilot review comments on DynamicContextMiddleware
- Use additional_kwargs flag for reminder detection instead of content
substring matching, so user messages containing '<system-reminder>'
are not mistakenly treated as injected reminders
- Generate stable UUID when original HumanMessage.id is None to prevent
ambiguous 'None__user' derived IDs and message collisions
- Downgrade per-turn no-op log to DEBUG; keep actual injection events at INFO
- Add two new tests: missing-id UUID fallback and user-text false-positive
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---------
Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
235 lines
8.1 KiB
Python
235 lines
8.1 KiB
Python
"""Tests for TokenUsageMiddleware attribution annotations."""
|
|
|
|
import logging
|
|
from unittest.mock import MagicMock
|
|
|
|
from langchain_core.messages import AIMessage
|
|
|
|
from deerflow.agents.middlewares.token_usage_middleware import (
|
|
TOKEN_USAGE_ATTRIBUTION_KEY,
|
|
TokenUsageMiddleware,
|
|
)
|
|
|
|
|
|
def _make_runtime():
|
|
runtime = MagicMock()
|
|
runtime.context = {"thread_id": "test-thread"}
|
|
return runtime
|
|
|
|
|
|
class TestTokenUsageMiddleware:
|
|
def test_logs_cache_token_details(self, caplog):
|
|
middleware = TokenUsageMiddleware()
|
|
message = AIMessage(
|
|
content="Here is the final answer.",
|
|
usage_metadata={
|
|
"input_tokens": 350,
|
|
"output_tokens": 240,
|
|
"total_tokens": 590,
|
|
"input_token_details": {
|
|
"audio": 10,
|
|
"cache_creation": 200,
|
|
"cache_read": 100,
|
|
},
|
|
"output_token_details": {
|
|
"audio": 10,
|
|
"reasoning": 200,
|
|
},
|
|
},
|
|
)
|
|
|
|
with caplog.at_level(
|
|
logging.INFO,
|
|
logger="deerflow.agents.middlewares.token_usage_middleware",
|
|
):
|
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
|
|
|
assert result is not None
|
|
assert "LLM token usage: input=350 output=240 total=590" in caplog.text
|
|
assert "input_token_details={'audio': 10, 'cache_creation': 200, 'cache_read': 100}" in caplog.text
|
|
assert "output_token_details={'audio': 10, 'reasoning': 200}" in caplog.text
|
|
|
|
def test_logs_basic_tokens_when_no_detail_fields_in_usage_metadata(self, caplog):
|
|
"""When usage_metadata has only totals (no input_token_details), log just the counts."""
|
|
middleware = TokenUsageMiddleware()
|
|
message = AIMessage(
|
|
content="Here is the final answer.",
|
|
usage_metadata={
|
|
"input_tokens": 350,
|
|
"output_tokens": 240,
|
|
"total_tokens": 590,
|
|
},
|
|
)
|
|
|
|
with caplog.at_level(
|
|
logging.INFO,
|
|
logger="deerflow.agents.middlewares.token_usage_middleware",
|
|
):
|
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
|
|
|
assert result is not None
|
|
assert "LLM token usage: input=350 output=240 total=590" in caplog.text
|
|
assert "input_token_details" not in caplog.text
|
|
|
|
def test_no_log_when_usage_metadata_is_missing(self, caplog):
|
|
"""When usage_metadata is absent, no token usage line is logged."""
|
|
middleware = TokenUsageMiddleware()
|
|
message = AIMessage(
|
|
content="Here is the final answer.",
|
|
response_metadata={
|
|
"usage": {
|
|
"input_tokens": 350,
|
|
"output_tokens": 240,
|
|
"total_tokens": 590,
|
|
}
|
|
},
|
|
)
|
|
|
|
with caplog.at_level(
|
|
logging.INFO,
|
|
logger="deerflow.agents.middlewares.token_usage_middleware",
|
|
):
|
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
|
|
|
assert result is not None
|
|
assert "LLM token usage" not in caplog.text
|
|
|
|
def test_annotates_todo_updates_with_structured_actions(self):
|
|
middleware = TokenUsageMiddleware()
|
|
message = AIMessage(
|
|
content="",
|
|
tool_calls=[
|
|
{
|
|
"id": "write_todos:1",
|
|
"name": "write_todos",
|
|
"args": {
|
|
"todos": [
|
|
{"content": "Inspect streaming path", "status": "completed"},
|
|
{"content": "Design token attribution schema", "status": "in_progress"},
|
|
]
|
|
},
|
|
}
|
|
],
|
|
usage_metadata={"input_tokens": 100, "output_tokens": 20, "total_tokens": 120},
|
|
)
|
|
|
|
state = {
|
|
"messages": [message],
|
|
"todos": [
|
|
{"content": "Inspect streaming path", "status": "in_progress"},
|
|
{"content": "Design token attribution schema", "status": "pending"},
|
|
],
|
|
}
|
|
|
|
result = middleware.after_model(state, _make_runtime())
|
|
|
|
assert result is not None
|
|
updated_message = result["messages"][0]
|
|
attribution = updated_message.additional_kwargs[TOKEN_USAGE_ATTRIBUTION_KEY]
|
|
assert attribution["kind"] == "tool_batch"
|
|
assert attribution["shared_attribution"] is True
|
|
assert attribution["tool_call_ids"] == ["write_todos:1"]
|
|
assert attribution["actions"] == [
|
|
{
|
|
"kind": "todo_complete",
|
|
"content": "Inspect streaming path",
|
|
"tool_call_id": "write_todos:1",
|
|
},
|
|
{
|
|
"kind": "todo_start",
|
|
"content": "Design token attribution schema",
|
|
"tool_call_id": "write_todos:1",
|
|
},
|
|
]
|
|
|
|
def test_annotates_subagent_and_search_steps(self):
|
|
middleware = TokenUsageMiddleware()
|
|
message = AIMessage(
|
|
content="",
|
|
tool_calls=[
|
|
{
|
|
"id": "task:1",
|
|
"name": "task",
|
|
"args": {
|
|
"description": "spec-coder patch message grouping",
|
|
"subagent_type": "general-purpose",
|
|
},
|
|
},
|
|
{
|
|
"id": "web_search:1",
|
|
"name": "web_search",
|
|
"args": {"query": "LangGraph useStream messages tuple"},
|
|
},
|
|
],
|
|
)
|
|
|
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
|
|
|
assert result is not None
|
|
attribution = result["messages"][0].additional_kwargs[TOKEN_USAGE_ATTRIBUTION_KEY]
|
|
assert attribution["kind"] == "tool_batch"
|
|
assert attribution["shared_attribution"] is True
|
|
assert attribution["actions"] == [
|
|
{
|
|
"kind": "subagent",
|
|
"description": "spec-coder patch message grouping",
|
|
"subagent_type": "general-purpose",
|
|
"tool_call_id": "task:1",
|
|
},
|
|
{
|
|
"kind": "search",
|
|
"tool_name": "web_search",
|
|
"query": "LangGraph useStream messages tuple",
|
|
"tool_call_id": "web_search:1",
|
|
},
|
|
]
|
|
|
|
def test_marks_final_answer_when_no_tools(self):
|
|
middleware = TokenUsageMiddleware()
|
|
message = AIMessage(content="Here is the final answer.")
|
|
|
|
result = middleware.after_model({"messages": [message]}, _make_runtime())
|
|
|
|
assert result is not None
|
|
attribution = result["messages"][0].additional_kwargs[TOKEN_USAGE_ATTRIBUTION_KEY]
|
|
assert attribution["kind"] == "final_answer"
|
|
assert attribution["shared_attribution"] is False
|
|
assert attribution["actions"] == []
|
|
|
|
def test_annotates_removed_todos(self):
|
|
middleware = TokenUsageMiddleware()
|
|
message = AIMessage(
|
|
content="",
|
|
tool_calls=[
|
|
{
|
|
"id": "write_todos:remove",
|
|
"name": "write_todos",
|
|
"args": {
|
|
"todos": [],
|
|
},
|
|
}
|
|
],
|
|
)
|
|
|
|
result = middleware.after_model(
|
|
{
|
|
"messages": [message],
|
|
"todos": [
|
|
{"content": "Archive obsolete plan", "status": "pending"},
|
|
],
|
|
},
|
|
_make_runtime(),
|
|
)
|
|
|
|
assert result is not None
|
|
attribution = result["messages"][0].additional_kwargs[TOKEN_USAGE_ATTRIBUTION_KEY]
|
|
assert attribution["kind"] == "todo_update"
|
|
assert attribution["shared_attribution"] is False
|
|
assert attribution["actions"] == [
|
|
{
|
|
"kind": "todo_remove",
|
|
"content": "Archive obsolete plan",
|
|
"tool_call_id": "write_todos:remove",
|
|
}
|
|
]
|