From 105db0098784ed3c44158420938052c51b1691f3 Mon Sep 17 00:00:00 2001 From: YuJitang Date: Thu, 16 Apr 2026 08:56:49 +0800 Subject: [PATCH] feat: show token usage per assistant response (#2270) * feat: show token usage per assistant response * fix: align client models response with token usage * fix: address token usage review feedback * docs: clarify token usage config example --------- Co-authored-by: Willem Jiang --- backend/app/gateway/routers/models.py | 27 +++++- backend/packages/harness/deerflow/client.py | 7 +- backend/tests/test_client.py | 5 + config.example.yaml | 7 +- .../[agent_name]/chats/[thread_id]/page.tsx | 8 +- .../app/workspace/chats/[thread_id]/page.tsx | 8 +- .../workspace/messages/message-list-item.tsx | 16 ++++ .../workspace/messages/message-list.tsx | 54 ++++++++--- .../messages/message-token-usage.tsx | 91 +++++++++++++++++++ .../workspace/token-usage-indicator.tsx | 58 +++++++----- frontend/src/core/i18n/locales/en-US.ts | 4 + frontend/src/core/i18n/locales/types.ts | 3 + frontend/src/core/i18n/locales/zh-CN.ts | 4 + frontend/src/core/messages/usage.ts | 2 +- frontend/src/core/models/api.ts | 11 ++- frontend/src/core/models/hooks.ts | 7 +- frontend/src/core/models/types.ts | 9 ++ 17 files changed, 271 insertions(+), 50 deletions(-) create mode 100644 frontend/src/components/workspace/messages/message-token-usage.tsx diff --git a/backend/app/gateway/routers/models.py b/backend/app/gateway/routers/models.py index 6579230f6..11a87a872 100644 --- a/backend/app/gateway/routers/models.py +++ b/backend/app/gateway/routers/models.py @@ -17,10 +17,17 @@ class ModelResponse(BaseModel): supports_reasoning_effort: bool = Field(default=False, description="Whether model supports reasoning effort") +class TokenUsageResponse(BaseModel): + """Token usage display configuration.""" + + enabled: bool = Field(default=False, description="Whether token usage display is enabled") + + class ModelsListResponse(BaseModel): """Response model for listing all models.""" models: list[ModelResponse] + token_usage: TokenUsageResponse @router.get( @@ -36,7 +43,7 @@ async def list_models() -> ModelsListResponse: excluding sensitive fields like API keys and internal configuration. Returns: - A list of all configured models with their metadata. + A list of all configured models with their metadata and token usage display settings. Example Response: ```json @@ -44,17 +51,24 @@ async def list_models() -> ModelsListResponse: "models": [ { "name": "gpt-4", + "model": "gpt-4", "display_name": "GPT-4", "description": "OpenAI GPT-4 model", - "supports_thinking": false + "supports_thinking": false, + "supports_reasoning_effort": false }, { "name": "claude-3-opus", + "model": "claude-3-opus", "display_name": "Claude 3 Opus", "description": "Anthropic Claude 3 Opus model", - "supports_thinking": true + "supports_thinking": true, + "supports_reasoning_effort": false } - ] + ], + "token_usage": { + "enabled": true + } } ``` """ @@ -70,7 +84,10 @@ async def list_models() -> ModelsListResponse: ) for model in config.models ] - return ModelsListResponse(models=models) + return ModelsListResponse( + models=models, + token_usage=TokenUsageResponse(enabled=config.token_usage.enabled), + ) @router.get( diff --git a/backend/packages/harness/deerflow/client.py b/backend/packages/harness/deerflow/client.py index 1c64ba52a..a26d838af 100644 --- a/backend/packages/harness/deerflow/client.py +++ b/backend/packages/harness/deerflow/client.py @@ -722,6 +722,10 @@ class DeerFlowClient: Dict with "models" key containing list of model info dicts, matching the Gateway API ``ModelsListResponse`` schema. """ + token_usage_enabled = getattr(getattr(self._app_config, "token_usage", None), "enabled", False) + if not isinstance(token_usage_enabled, bool): + token_usage_enabled = False + return { "models": [ { @@ -733,7 +737,8 @@ class DeerFlowClient: "supports_reasoning_effort": getattr(model, "supports_reasoning_effort", False), } for model in self._app_config.models - ] + ], + "token_usage": {"enabled": token_usage_enabled}, } def list_skills(self, enabled_only: bool = False) -> dict: diff --git a/backend/tests/test_client.py b/backend/tests/test_client.py index a6d2ebfb3..14b52d077 100644 --- a/backend/tests/test_client.py +++ b/backend/tests/test_client.py @@ -38,6 +38,7 @@ def mock_app_config(): config = MagicMock() config.models = [model] + config.token_usage.enabled = False return config @@ -107,6 +108,7 @@ class TestConfigQueries: def test_list_models(self, client): result = client.list_models() assert "models" in result + assert result["token_usage"] == {"enabled": False} assert len(result["models"]) == 1 assert result["models"][0]["name"] == "test-model" # Verify Gateway-aligned fields are present @@ -2196,7 +2198,9 @@ class TestGatewayConformance: model.display_name = "Test Model" model.description = "A test model" model.supports_thinking = False + model.supports_reasoning_effort = False mock_app_config.models = [model] + mock_app_config.token_usage.enabled = True with patch("deerflow.client.get_app_config", return_value=mock_app_config): client = DeerFlowClient() @@ -2206,6 +2210,7 @@ class TestGatewayConformance: assert len(parsed.models) == 1 assert parsed.models[0].name == "test-model" assert parsed.models[0].model == "gpt-test" + assert parsed.token_usage.enabled is True def test_get_model(self, mock_app_config): model = MagicMock() diff --git a/config.example.yaml b/config.example.yaml index 9d2328530..dbaa0e160 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -21,10 +21,11 @@ config_version: 7 log_level: info # ============================================================================ -# Token Usage Tracking +# Token Usage # ============================================================================ -# Track LLM token usage per model call (input/output/total tokens) -# Logs at info level via TokenUsageMiddleware +# Enable token usage collection and display. +# When enabled, DeerFlow records input/output/total tokens per model call +# and shows usage metadata in the workspace UI when providers return it. token_usage: enabled: false diff --git a/frontend/src/app/workspace/agents/[agent_name]/chats/[thread_id]/page.tsx b/frontend/src/app/workspace/agents/[agent_name]/chats/[thread_id]/page.tsx index 7b288a40d..9f049e768 100644 --- a/frontend/src/app/workspace/agents/[agent_name]/chats/[thread_id]/page.tsx +++ b/frontend/src/app/workspace/agents/[agent_name]/chats/[thread_id]/page.tsx @@ -23,6 +23,7 @@ import { TokenUsageIndicator } from "@/components/workspace/token-usage-indicato import { Tooltip } from "@/components/workspace/tooltip"; import { useAgent } from "@/core/agents"; import { useI18n } from "@/core/i18n/hooks"; +import { useModels } from "@/core/models/hooks"; import { useNotification } from "@/core/notification/hooks"; import { useThreadSettings } from "@/core/settings"; import { useThreadStream } from "@/core/threads/hooks"; @@ -44,6 +45,7 @@ export default function AgentChatPage() { const { threadId, setThreadId, isNewThread, setIsNewThread } = useThreadChat(); const [settings, setSettings] = useThreadSettings(threadId); + const { tokenUsageEnabled } = useModels(); const { showNotification } = useNotification(); const [thread, sendMessage] = useThreadStream({ @@ -128,7 +130,10 @@ export default function AgentChatPage() { {t.agents.newChat} - + @@ -141,6 +146,7 @@ export default function AgentChatPage() { threadId={threadId} thread={thread} paddingBottom={messageListPaddingBottom} + tokenUsageEnabled={tokenUsageEnabled} /> diff --git a/frontend/src/app/workspace/chats/[thread_id]/page.tsx b/frontend/src/app/workspace/chats/[thread_id]/page.tsx index c5ff83dec..9b90ab720 100644 --- a/frontend/src/app/workspace/chats/[thread_id]/page.tsx +++ b/frontend/src/app/workspace/chats/[thread_id]/page.tsx @@ -22,6 +22,7 @@ import { TodoList } from "@/components/workspace/todo-list"; import { TokenUsageIndicator } from "@/components/workspace/token-usage-indicator"; import { Welcome } from "@/components/workspace/welcome"; import { useI18n } from "@/core/i18n/hooks"; +import { useModels } from "@/core/models/hooks"; import { useNotification } from "@/core/notification/hooks"; import { useThreadSettings } from "@/core/settings"; import { useThreadStream } from "@/core/threads/hooks"; @@ -36,6 +37,7 @@ export default function ChatPage() { useThreadChat(); const [settings, setSettings] = useThreadSettings(threadId); const [mounted, setMounted] = useState(false); + const { tokenUsageEnabled } = useModels(); useSpecificChatMode(); useEffect(() => { @@ -103,7 +105,10 @@ export default function ChatPage() {
- +
@@ -115,6 +120,7 @@ export default function ChatPage() { threadId={threadId} thread={thread} paddingBottom={messageListPaddingBottom} + tokenUsageEnabled={tokenUsageEnabled} />
diff --git a/frontend/src/components/workspace/messages/message-list-item.tsx b/frontend/src/components/workspace/messages/message-list-item.tsx index 3312e0f82..ca96eac4a 100644 --- a/frontend/src/components/workspace/messages/message-list-item.tsx +++ b/frontend/src/components/workspace/messages/message-list-item.tsx @@ -38,17 +38,20 @@ import { cn } from "@/lib/utils"; import { CopyButton } from "../copy-button"; import { MarkdownContent } from "./markdown-content"; +import { MessageTokenUsage } from "./message-token-usage"; export function MessageListItem({ className, message, isLoading, threadId, + tokenUsageEnabled = false, }: { className?: string; message: Message; isLoading?: boolean; threadId: string; + tokenUsageEnabled?: boolean; }) { const isHuman = message.type === "human"; return ( @@ -61,6 +64,7 @@ export function MessageListItem({ message={message} isLoading={isLoading} threadId={threadId} + tokenUsageEnabled={tokenUsageEnabled} /> {!isLoading && ( {reasoningContent} + ); } @@ -238,6 +249,11 @@ function MessageContent_({ className="my-3" components={components} /> + ); } diff --git a/frontend/src/components/workspace/messages/message-list.tsx b/frontend/src/components/workspace/messages/message-list.tsx index b7089bb72..d1d02c6d0 100644 --- a/frontend/src/components/workspace/messages/message-list.tsx +++ b/frontend/src/components/workspace/messages/message-list.tsx @@ -13,6 +13,7 @@ import { hasContent, hasPresentFiles, hasReasoning, + hasToolCalls, } from "@/core/messages/utils"; import { useRehypeSplitWordsIntoSpans } from "@/core/rehype"; import type { Subtask } from "@/core/tasks"; @@ -26,6 +27,7 @@ import { StreamingIndicator } from "../streaming-indicator"; import { MarkdownContent } from "./markdown-content"; import { MessageGroup } from "./message-group"; import { MessageListItem } from "./message-list-item"; +import { MessageTokenUsageList } from "./message-token-usage"; import { MessageListSkeleton } from "./skeleton"; import { SubtaskCard } from "./subtask-card"; @@ -37,11 +39,13 @@ export function MessageList({ threadId, thread, paddingBottom = MESSAGE_LIST_DEFAULT_PADDING_BOTTOM, + tokenUsageEnabled = false, }: { className?: string; threadId: string; thread: BaseStream; paddingBottom?: number; + tokenUsageEnabled?: boolean; }) { const { t } = useI18n(); const rehypePlugins = useRehypeSplitWordsIntoSpans(thread.isLoading); @@ -64,6 +68,7 @@ export function MessageList({ message={msg} isLoading={thread.isLoading} threadId={threadId} + tokenUsageEnabled={tokenUsageEnabled} /> ); }); @@ -71,12 +76,18 @@ export function MessageList({ const message = group.messages[0]; if (message && hasContent(message)) { return ( - +
+ + +
); } return null; @@ -99,6 +110,11 @@ export function MessageList({ /> )} +
); } else if (group.type === "assistant:subagent") { @@ -191,15 +207,31 @@ export function MessageList({ className="relative z-1 flex flex-col gap-2" > {results} + ); } + const tokenUsageMessages = group.messages.filter( + (message) => + message.type === "ai" && + (hasToolCalls(message) ? true : !hasContent(message)), + ); return ( - +
+ + +
); })} {thread.isLoading && } diff --git a/frontend/src/components/workspace/messages/message-token-usage.tsx b/frontend/src/components/workspace/messages/message-token-usage.tsx new file mode 100644 index 000000000..7b2c4fde6 --- /dev/null +++ b/frontend/src/components/workspace/messages/message-token-usage.tsx @@ -0,0 +1,91 @@ +import type { Message } from "@langchain/langgraph-sdk"; +import { CoinsIcon } from "lucide-react"; + +import { useI18n } from "@/core/i18n/hooks"; +import { formatTokenCount, getUsageMetadata } from "@/core/messages/usage"; +import { cn } from "@/lib/utils"; + +export function MessageTokenUsage({ + className, + enabled = false, + isLoading = false, + message, +}: { + className?: string; + enabled?: boolean; + isLoading?: boolean; + message: Message; +}) { + const { t } = useI18n(); + + if (!enabled || isLoading || message.type !== "ai") { + return null; + } + + const usage = getUsageMetadata(message); + + return ( +
+ + + {t.tokenUsage.label} + + {usage ? ( + <> + + {t.tokenUsage.input}: {formatTokenCount(usage.inputTokens)} + + + {t.tokenUsage.output}: {formatTokenCount(usage.outputTokens)} + + + {t.tokenUsage.total}: {formatTokenCount(usage.totalTokens)} + + + ) : ( + {t.tokenUsage.unavailableShort} + )} +
+ ); +} + +export function MessageTokenUsageList({ + className, + enabled = false, + isLoading = false, + messages, +}: { + className?: string; + enabled?: boolean; + isLoading?: boolean; + messages: Message[]; +}) { + if (!enabled || isLoading) { + return null; + } + + const aiMessages = messages.filter((message) => message.type === "ai"); + + if (aiMessages.length === 0) { + return null; + } + + return ( + <> + {aiMessages.map((message, index) => ( + + ))} + + ); +} diff --git a/frontend/src/components/workspace/token-usage-indicator.tsx b/frontend/src/components/workspace/token-usage-indicator.tsx index 9f0b02f73..00ab166d3 100644 --- a/frontend/src/components/workspace/token-usage-indicator.tsx +++ b/frontend/src/components/workspace/token-usage-indicator.tsx @@ -15,18 +15,20 @@ import { cn } from "@/lib/utils"; interface TokenUsageIndicatorProps { messages: Message[]; + enabled?: boolean; className?: string; } export function TokenUsageIndicator({ messages, + enabled = false, className, }: TokenUsageIndicatorProps) { const { t } = useI18n(); const usage = useMemo(() => accumulateUsage(messages), [messages]); - if (!usage) { + if (!enabled) { return null; } @@ -36,37 +38,49 @@ export function TokenUsageIndicator({
{t.tokenUsage.title}
-
- {t.tokenUsage.input} - - {formatTokenCount(usage.inputTokens)} - -
-
- {t.tokenUsage.output} - - {formatTokenCount(usage.outputTokens)} - -
-
-
- {t.tokenUsage.total} - - {formatTokenCount(usage.totalTokens)} - + {usage ? ( + <> +
+ {t.tokenUsage.input} + + {formatTokenCount(usage.inputTokens)} + +
+
+ {t.tokenUsage.output} + + {formatTokenCount(usage.outputTokens)} + +
+
+
+ {t.tokenUsage.total} + + {formatTokenCount(usage.totalTokens)} + +
+
+ + ) : ( +
+ {t.tokenUsage.unavailable}
-
+ )}
diff --git a/frontend/src/core/i18n/locales/en-US.ts b/frontend/src/core/i18n/locales/en-US.ts index f031098fe..de94e0c98 100644 --- a/frontend/src/core/i18n/locales/en-US.ts +++ b/frontend/src/core/i18n/locales/en-US.ts @@ -298,9 +298,13 @@ export const enUS: Translations = { // Token Usage tokenUsage: { title: "Token Usage", + label: "Tokens", input: "Input", output: "Output", total: "Total", + unavailable: + "No token usage yet. Usage appears only after a successful model response when the provider returns usage_metadata.", + unavailableShort: "No usage returned", }, // Shortcuts diff --git a/frontend/src/core/i18n/locales/types.ts b/frontend/src/core/i18n/locales/types.ts index 37a08c9f9..a8d99e4c7 100644 --- a/frontend/src/core/i18n/locales/types.ts +++ b/frontend/src/core/i18n/locales/types.ts @@ -229,9 +229,12 @@ export interface Translations { // Token Usage tokenUsage: { title: string; + label: string; input: string; output: string; total: string; + unavailable: string; + unavailableShort: string; }; // Shortcuts diff --git a/frontend/src/core/i18n/locales/zh-CN.ts b/frontend/src/core/i18n/locales/zh-CN.ts index a0db84381..600cb8f07 100644 --- a/frontend/src/core/i18n/locales/zh-CN.ts +++ b/frontend/src/core/i18n/locales/zh-CN.ts @@ -284,9 +284,13 @@ export const zhCN: Translations = { // Token Usage tokenUsage: { title: "Token 用量", + label: "Tokens", input: "输入", output: "输出", total: "总计", + unavailable: + "暂无 Token 用量。只有模型成功返回且供应商提供 usage_metadata 时才会显示。", + unavailableShort: "未返回用量", }, // Shortcuts diff --git a/frontend/src/core/messages/usage.ts b/frontend/src/core/messages/usage.ts index 9bf585c98..a61b78dad 100644 --- a/frontend/src/core/messages/usage.ts +++ b/frontend/src/core/messages/usage.ts @@ -10,7 +10,7 @@ export interface TokenUsage { * Extract usage_metadata from an AI message if present. * The field is added by the backend (PR #1218) but not typed in the SDK. */ -function getUsageMetadata(message: Message): TokenUsage | null { +export function getUsageMetadata(message: Message): TokenUsage | null { if (message.type !== "ai") { return null; } diff --git a/frontend/src/core/models/api.ts b/frontend/src/core/models/api.ts index 362bb0d79..46675bf6d 100644 --- a/frontend/src/core/models/api.ts +++ b/frontend/src/core/models/api.ts @@ -1,9 +1,12 @@ import { getBackendBaseURL } from "../config"; -import type { Model } from "./types"; +import type { ModelsResponse } from "./types"; -export async function loadModels() { +export async function loadModels(): Promise { const res = await fetch(`${getBackendBaseURL()}/api/models`); - const { models } = (await res.json()) as { models: Model[] }; - return models; + const data = (await res.json()) as Partial; + return { + models: data.models ?? [], + token_usage: data.token_usage ?? { enabled: false }, + }; } diff --git a/frontend/src/core/models/hooks.ts b/frontend/src/core/models/hooks.ts index 2becbbbc9..48abc26df 100644 --- a/frontend/src/core/models/hooks.ts +++ b/frontend/src/core/models/hooks.ts @@ -9,5 +9,10 @@ export function useModels({ enabled = true }: { enabled?: boolean } = {}) { enabled, refetchOnWindowFocus: false, }); - return { models: data ?? [], isLoading, error }; + return { + models: data?.models ?? [], + tokenUsageEnabled: data?.token_usage.enabled ?? false, + isLoading, + error, + }; } diff --git a/frontend/src/core/models/types.ts b/frontend/src/core/models/types.ts index 0b9ea5a15..2c888ed8d 100644 --- a/frontend/src/core/models/types.ts +++ b/frontend/src/core/models/types.ts @@ -7,3 +7,12 @@ export interface Model { supports_thinking?: boolean; supports_reasoning_effort?: boolean; } + +export interface TokenUsageSettings { + enabled: boolean; +} + +export interface ModelsResponse { + models: Model[]; + token_usage: TokenUsageSettings; +}