Populate Codex usage metadata for token accounting (#2585)

This commit is contained in:
KiteEater 2026-05-02 11:16:03 +08:00 committed by GitHub
parent 8ba01dfd83
commit 866d1ca409
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 65 additions and 0 deletions

View File

@ -21,6 +21,7 @@ from langchain_core.callbacks import CallbackManagerForLLMRun
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
from langchain_core.outputs import ChatGeneration, ChatResult
from langchain_openai.chat_models.base import _create_usage_metadata_responses
from deerflow.models.credential_loader import CodexCliCredential, load_codex_cli_credential
@ -346,6 +347,7 @@ class CodexChatModel(BaseChatModel):
)
usage = response.get("usage", {})
usage_metadata = _create_usage_metadata_responses(usage) if usage else None
additional_kwargs = {}
if reasoning_content:
additional_kwargs["reasoning_content"] = reasoning_content
@ -355,6 +357,7 @@ class CodexChatModel(BaseChatModel):
tool_calls=tool_calls if tool_calls else [],
invalid_tool_calls=invalid_tool_calls,
additional_kwargs=additional_kwargs,
usage_metadata=usage_metadata,
response_metadata={
"model": response.get("model", self.model),
"usage": usage,

View File

@ -82,6 +82,36 @@ def test_parse_response_text_content():
assert result.generations[0].message.content == "Hello world"
def test_parse_response_populates_usage_metadata():
model = _make_model()
response = {
"output": [
{
"type": "message",
"content": [{"type": "output_text", "text": "Hello world"}],
}
],
"usage": {
"input_tokens": 10,
"output_tokens": 5,
"total_tokens": 15,
"input_tokens_details": {"cached_tokens": 3},
"output_tokens_details": {"reasoning_tokens": 2},
},
"model": "gpt-5.4",
}
result = model._parse_response(response)
assert result.generations[0].message.usage_metadata == {
"input_tokens": 10,
"output_tokens": 5,
"total_tokens": 15,
"input_token_details": {"cache_read": 3},
"output_token_details": {"reasoning": 2},
}
def test_parse_response_reasoning_content():
model = _make_model()
response = {

View File

@ -0,0 +1,32 @@
from unittest.mock import MagicMock, patch
from langchain_core.messages import AIMessage
from deerflow.agents.middlewares.token_usage_middleware import TokenUsageMiddleware
def test_after_model_logs_usage_metadata_counts():
middleware = TokenUsageMiddleware()
state = {
"messages": [
AIMessage(
content="done",
usage_metadata={
"input_tokens": 10,
"output_tokens": 5,
"total_tokens": 15,
},
)
]
}
with patch("deerflow.agents.middlewares.token_usage_middleware.logger.info") as info_mock:
result = middleware.after_model(state=state, runtime=MagicMock())
assert result is None
info_mock.assert_called_once_with(
"LLM token usage: input=%s output=%s total=%s",
10,
5,
15,
)