From 866d1ca4098cd5fbd81ff83ebe02954d9f54967b Mon Sep 17 00:00:00 2001 From: KiteEater <145987840+Kiteeater@users.noreply.github.com> Date: Sat, 2 May 2026 11:16:03 +0800 Subject: [PATCH] Populate Codex usage metadata for token accounting (#2585) --- .../deerflow/models/openai_codex_provider.py | 3 ++ backend/tests/test_codex_provider.py | 30 +++++++++++++++++ backend/tests/test_token_usage_middleware.py | 32 +++++++++++++++++++ 3 files changed, 65 insertions(+) create mode 100644 backend/tests/test_token_usage_middleware.py diff --git a/backend/packages/harness/deerflow/models/openai_codex_provider.py b/backend/packages/harness/deerflow/models/openai_codex_provider.py index 86dee0fc6..d8e46c2ae 100644 --- a/backend/packages/harness/deerflow/models/openai_codex_provider.py +++ b/backend/packages/harness/deerflow/models/openai_codex_provider.py @@ -21,6 +21,7 @@ from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage from langchain_core.outputs import ChatGeneration, ChatResult +from langchain_openai.chat_models.base import _create_usage_metadata_responses from deerflow.models.credential_loader import CodexCliCredential, load_codex_cli_credential @@ -346,6 +347,7 @@ class CodexChatModel(BaseChatModel): ) usage = response.get("usage", {}) + usage_metadata = _create_usage_metadata_responses(usage) if usage else None additional_kwargs = {} if reasoning_content: additional_kwargs["reasoning_content"] = reasoning_content @@ -355,6 +357,7 @@ class CodexChatModel(BaseChatModel): tool_calls=tool_calls if tool_calls else [], invalid_tool_calls=invalid_tool_calls, additional_kwargs=additional_kwargs, + usage_metadata=usage_metadata, response_metadata={ "model": response.get("model", self.model), "usage": usage, diff --git a/backend/tests/test_codex_provider.py b/backend/tests/test_codex_provider.py index 65e53a21d..512154564 100644 --- a/backend/tests/test_codex_provider.py +++ b/backend/tests/test_codex_provider.py @@ -82,6 +82,36 @@ def test_parse_response_text_content(): assert result.generations[0].message.content == "Hello world" +def test_parse_response_populates_usage_metadata(): + model = _make_model() + response = { + "output": [ + { + "type": "message", + "content": [{"type": "output_text", "text": "Hello world"}], + } + ], + "usage": { + "input_tokens": 10, + "output_tokens": 5, + "total_tokens": 15, + "input_tokens_details": {"cached_tokens": 3}, + "output_tokens_details": {"reasoning_tokens": 2}, + }, + "model": "gpt-5.4", + } + + result = model._parse_response(response) + + assert result.generations[0].message.usage_metadata == { + "input_tokens": 10, + "output_tokens": 5, + "total_tokens": 15, + "input_token_details": {"cache_read": 3}, + "output_token_details": {"reasoning": 2}, + } + + def test_parse_response_reasoning_content(): model = _make_model() response = { diff --git a/backend/tests/test_token_usage_middleware.py b/backend/tests/test_token_usage_middleware.py new file mode 100644 index 000000000..66a1f2229 --- /dev/null +++ b/backend/tests/test_token_usage_middleware.py @@ -0,0 +1,32 @@ +from unittest.mock import MagicMock, patch + +from langchain_core.messages import AIMessage + +from deerflow.agents.middlewares.token_usage_middleware import TokenUsageMiddleware + + +def test_after_model_logs_usage_metadata_counts(): + middleware = TokenUsageMiddleware() + state = { + "messages": [ + AIMessage( + content="done", + usage_metadata={ + "input_tokens": 10, + "output_tokens": 5, + "total_tokens": 15, + }, + ) + ] + } + + with patch("deerflow.agents.middlewares.token_usage_middleware.logger.info") as info_mock: + result = middleware.after_model(state=state, runtime=MagicMock()) + + assert result is None + info_mock.assert_called_once_with( + "LLM token usage: input=%s output=%s total=%s", + 10, + 5, + 15, + )