mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-03 07:18:25 +00:00
Populate Codex usage metadata for token accounting (#2585)
This commit is contained in:
parent
8ba01dfd83
commit
866d1ca409
@ -21,6 +21,7 @@ from langchain_core.callbacks import CallbackManagerForLLMRun
|
||||
from langchain_core.language_models.chat_models import BaseChatModel
|
||||
from langchain_core.messages import AIMessage, BaseMessage, HumanMessage, SystemMessage, ToolMessage
|
||||
from langchain_core.outputs import ChatGeneration, ChatResult
|
||||
from langchain_openai.chat_models.base import _create_usage_metadata_responses
|
||||
|
||||
from deerflow.models.credential_loader import CodexCliCredential, load_codex_cli_credential
|
||||
|
||||
@ -346,6 +347,7 @@ class CodexChatModel(BaseChatModel):
|
||||
)
|
||||
|
||||
usage = response.get("usage", {})
|
||||
usage_metadata = _create_usage_metadata_responses(usage) if usage else None
|
||||
additional_kwargs = {}
|
||||
if reasoning_content:
|
||||
additional_kwargs["reasoning_content"] = reasoning_content
|
||||
@ -355,6 +357,7 @@ class CodexChatModel(BaseChatModel):
|
||||
tool_calls=tool_calls if tool_calls else [],
|
||||
invalid_tool_calls=invalid_tool_calls,
|
||||
additional_kwargs=additional_kwargs,
|
||||
usage_metadata=usage_metadata,
|
||||
response_metadata={
|
||||
"model": response.get("model", self.model),
|
||||
"usage": usage,
|
||||
|
||||
@ -82,6 +82,36 @@ def test_parse_response_text_content():
|
||||
assert result.generations[0].message.content == "Hello world"
|
||||
|
||||
|
||||
def test_parse_response_populates_usage_metadata():
|
||||
model = _make_model()
|
||||
response = {
|
||||
"output": [
|
||||
{
|
||||
"type": "message",
|
||||
"content": [{"type": "output_text", "text": "Hello world"}],
|
||||
}
|
||||
],
|
||||
"usage": {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5,
|
||||
"total_tokens": 15,
|
||||
"input_tokens_details": {"cached_tokens": 3},
|
||||
"output_tokens_details": {"reasoning_tokens": 2},
|
||||
},
|
||||
"model": "gpt-5.4",
|
||||
}
|
||||
|
||||
result = model._parse_response(response)
|
||||
|
||||
assert result.generations[0].message.usage_metadata == {
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5,
|
||||
"total_tokens": 15,
|
||||
"input_token_details": {"cache_read": 3},
|
||||
"output_token_details": {"reasoning": 2},
|
||||
}
|
||||
|
||||
|
||||
def test_parse_response_reasoning_content():
|
||||
model = _make_model()
|
||||
response = {
|
||||
|
||||
32
backend/tests/test_token_usage_middleware.py
Normal file
32
backend/tests/test_token_usage_middleware.py
Normal file
@ -0,0 +1,32 @@
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from langchain_core.messages import AIMessage
|
||||
|
||||
from deerflow.agents.middlewares.token_usage_middleware import TokenUsageMiddleware
|
||||
|
||||
|
||||
def test_after_model_logs_usage_metadata_counts():
|
||||
middleware = TokenUsageMiddleware()
|
||||
state = {
|
||||
"messages": [
|
||||
AIMessage(
|
||||
content="done",
|
||||
usage_metadata={
|
||||
"input_tokens": 10,
|
||||
"output_tokens": 5,
|
||||
"total_tokens": 15,
|
||||
},
|
||||
)
|
||||
]
|
||||
}
|
||||
|
||||
with patch("deerflow.agents.middlewares.token_usage_middleware.logger.info") as info_mock:
|
||||
result = middleware.after_model(state=state, runtime=MagicMock())
|
||||
|
||||
assert result is None
|
||||
info_mock.assert_called_once_with(
|
||||
"LLM token usage: input=%s output=%s total=%s",
|
||||
10,
|
||||
5,
|
||||
15,
|
||||
)
|
||||
Loading…
x
Reference in New Issue
Block a user