docs: clarify token usage accounting semantics (#2845)

2026-07-04 21:35:11 +00:00 · 2026-05-11 07:17:49 +08:00 · 2026-05-11 07:17:49 +08:00 · e82b2fb4d0
commit e82b2fb4d0
parent 30a5846219
4 changed files with 33 additions and 2 deletions
--- a/frontend/src/content/en/application/workspace-usage.mdx
+++ b/frontend/src/content/en/application/workspace-usage.mdx
@ -67,6 +67,26 @@ Each agent response in the conversation may contain:

 Tool calls and thinking steps are collapsed by default. Click to expand them.

+## Understanding token usage
+
+If token usage display is enabled, DeerFlow shows one conversation-level total in
+the header and optional per-turn or debug summaries in the message list.
+
+- **Header total**: the persisted thread-level total from the backend. While the
+  current run is still streaming, the header may also include the visible
+  in-flight usage for that unfinished response.
+- **Per-turn / debug usage**: usage derived from the assistant messages that are
+  currently visible in the conversation view.
+
+This means the header total and the visible per-turn totals do **not** need to
+add up exactly. The header is a thread ledger; the per-turn view is a rendering
+of the messages you can currently see.
+
+These totals may also differ from your provider's billing page. Common reasons
+include retries, failed requests, cached input tokens, reasoning tokens,
+provider-specific billing rules, and internal calls that do not appear as normal
+chat messages.
+
 ## Switching agents

 If you have created custom agents, use the **Agent** selector in the input bar to switch to a different agent. The selected agent persists for the duration of the thread.
--- a/frontend/src/content/zh/application/workspace-usage.mdx
+++ b/frontend/src/content/zh/application/workspace-usage.mdx
@ -70,6 +70,17 @@ DeerFlow 工作区是一个基于浏览器的对话界面，你可以在其中

 点击消息旁边的展开箭头查看完整的推理链。

+## 理解 Token 用量
+
+如果启用了 Token 用量显示，DeerFlow 会在顶部显示一个对话级总量，并在消息列表中按配置显示每轮或调试级别的用量摘要。
+
+- **顶部总量**：后端持久化的线程级总账。当当前回复仍在流式返回时，顶部还可能临时叠加这条未完成回复的可见进行中用量。
+- **每轮 / 调试用量**：根据当前界面里可见的 assistant 消息计算出来的用量。
+
+因此，顶部总量和当前可见的每轮总和**不要求完全相等**。顶部展示的是整个线程的总账；每轮展示的是你当前能看到的消息视图。
+
+这些数字也可能与模型供应商的账单页不同。常见原因包括重试请求、失败请求、缓存输入 token、推理 token、供应商自己的计费口径，以及不会以普通聊天消息形式显示的内部调用。
+
 ## 查看产出物

 当 Agent 生成文件（报告、图表、代码文件、演示文稿）时，它们会以**产出物**的形式出现在对话中。
--- a/frontend/src/core/i18n/locales/en-US.ts
+++ b/frontend/src/core/i18n/locales/en-US.ts
@ -310,7 +310,7 @@ export const enUS: Translations = {
    unavailable:
      "No token usage yet. Usage appears only after a successful model response when the provider returns usage_metadata.",
    unavailableShort: "No usage returned",
-    note: "Header totals use persisted thread usage when available. Per-turn and debug usage come from visible messages. Totals may differ from provider billing pages.",
+    note: "Header totals use persisted thread usage, plus visible in-flight usage while a run is still streaming. Per-turn and debug usage come from currently visible messages only. Totals may differ from provider billing pages.",
    presets: {
      off: "Off",
      summary: "Summary",
--- a/frontend/src/core/i18n/locales/zh-CN.ts
+++ b/frontend/src/core/i18n/locales/zh-CN.ts
@ -296,7 +296,7 @@ export const zhCN: Translations = {
    unavailable:
      "暂无 Token 用量。只有模型成功返回且供应商提供 usage_metadata 时才会显示。",
    unavailableShort: "未返回用量",
-    note: "顶部总量优先使用后端持久化的线程用量。每轮和调试用量来自当前可见消息，可能与平台账单页不完全一致。",
+    note: "顶部总量优先使用后端持久化的线程用量；当当前回复仍在流式返回时，还会叠加可见的进行中用量。每轮和调试用量只来自当前可见消息，可能与平台账单页不完全一致。",
    presets: {
      off: "关闭",
      summary: "总览",