From e82b2fb4d0a8feb333c4b533292e5ea16a136f08 Mon Sep 17 00:00:00 2001 From: YuJitang Date: Mon, 11 May 2026 07:17:49 +0800 Subject: [PATCH] docs: clarify token usage accounting semantics (#2845) --- .../en/application/workspace-usage.mdx | 20 +++++++++++++++++++ .../zh/application/workspace-usage.mdx | 11 ++++++++++ frontend/src/core/i18n/locales/en-US.ts | 2 +- frontend/src/core/i18n/locales/zh-CN.ts | 2 +- 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/frontend/src/content/en/application/workspace-usage.mdx b/frontend/src/content/en/application/workspace-usage.mdx index 686614aa7..253519af8 100644 --- a/frontend/src/content/en/application/workspace-usage.mdx +++ b/frontend/src/content/en/application/workspace-usage.mdx @@ -67,6 +67,26 @@ Each agent response in the conversation may contain: Tool calls and thinking steps are collapsed by default. Click to expand them. +## Understanding token usage + +If token usage display is enabled, DeerFlow shows one conversation-level total in +the header and optional per-turn or debug summaries in the message list. + +- **Header total**: the persisted thread-level total from the backend. While the + current run is still streaming, the header may also include the visible + in-flight usage for that unfinished response. +- **Per-turn / debug usage**: usage derived from the assistant messages that are + currently visible in the conversation view. + +This means the header total and the visible per-turn totals do **not** need to +add up exactly. The header is a thread ledger; the per-turn view is a rendering +of the messages you can currently see. + +These totals may also differ from your provider's billing page. Common reasons +include retries, failed requests, cached input tokens, reasoning tokens, +provider-specific billing rules, and internal calls that do not appear as normal +chat messages. + ## Switching agents If you have created custom agents, use the **Agent** selector in the input bar to switch to a different agent. The selected agent persists for the duration of the thread. diff --git a/frontend/src/content/zh/application/workspace-usage.mdx b/frontend/src/content/zh/application/workspace-usage.mdx index e4e3fb541..35cafcc84 100644 --- a/frontend/src/content/zh/application/workspace-usage.mdx +++ b/frontend/src/content/zh/application/workspace-usage.mdx @@ -70,6 +70,17 @@ DeerFlow 工作区是一个基于浏览器的对话界面,你可以在其中 点击消息旁边的展开箭头查看完整的推理链。 +## 理解 Token 用量 + +如果启用了 Token 用量显示,DeerFlow 会在顶部显示一个对话级总量,并在消息列表中按配置显示每轮或调试级别的用量摘要。 + +- **顶部总量**:后端持久化的线程级总账。当当前回复仍在流式返回时,顶部还可能临时叠加这条未完成回复的可见进行中用量。 +- **每轮 / 调试用量**:根据当前界面里可见的 assistant 消息计算出来的用量。 + +因此,顶部总量和当前可见的每轮总和**不要求完全相等**。顶部展示的是整个线程的总账;每轮展示的是你当前能看到的消息视图。 + +这些数字也可能与模型供应商的账单页不同。常见原因包括重试请求、失败请求、缓存输入 token、推理 token、供应商自己的计费口径,以及不会以普通聊天消息形式显示的内部调用。 + ## 查看产出物 当 Agent 生成文件(报告、图表、代码文件、演示文稿)时,它们会以**产出物**的形式出现在对话中。 diff --git a/frontend/src/core/i18n/locales/en-US.ts b/frontend/src/core/i18n/locales/en-US.ts index 1daaa21b0..b6ce0c76a 100644 --- a/frontend/src/core/i18n/locales/en-US.ts +++ b/frontend/src/core/i18n/locales/en-US.ts @@ -310,7 +310,7 @@ export const enUS: Translations = { unavailable: "No token usage yet. Usage appears only after a successful model response when the provider returns usage_metadata.", unavailableShort: "No usage returned", - note: "Header totals use persisted thread usage when available. Per-turn and debug usage come from visible messages. Totals may differ from provider billing pages.", + note: "Header totals use persisted thread usage, plus visible in-flight usage while a run is still streaming. Per-turn and debug usage come from currently visible messages only. Totals may differ from provider billing pages.", presets: { off: "Off", summary: "Summary", diff --git a/frontend/src/core/i18n/locales/zh-CN.ts b/frontend/src/core/i18n/locales/zh-CN.ts index aadedad65..105aca551 100644 --- a/frontend/src/core/i18n/locales/zh-CN.ts +++ b/frontend/src/core/i18n/locales/zh-CN.ts @@ -296,7 +296,7 @@ export const zhCN: Translations = { unavailable: "暂无 Token 用量。只有模型成功返回且供应商提供 usage_metadata 时才会显示。", unavailableShort: "未返回用量", - note: "顶部总量优先使用后端持久化的线程用量。每轮和调试用量来自当前可见消息,可能与平台账单页不完全一致。", + note: "顶部总量优先使用后端持久化的线程用量;当当前回复仍在流式返回时,还会叠加可见的进行中用量。每轮和调试用量只来自当前可见消息,可能与平台账单页不完全一致。", presets: { off: "关闭", summary: "总览",