From e82b2fb4d0a8feb333c4b533292e5ea16a136f08 Mon Sep 17 00:00:00 2001
From: YuJitang <yujitang_2006@qq.com>
Date: Mon, 11 May 2026 07:17:49 +0800
Subject: [PATCH] docs: clarify token usage accounting semantics (#2845)

---
 .../en/application/workspace-usage.mdx        | 20 +++++++++++++++++++
 .../zh/application/workspace-usage.mdx        | 11 ++++++++++
 frontend/src/core/i18n/locales/en-US.ts       |  2 +-
 frontend/src/core/i18n/locales/zh-CN.ts       |  2 +-
 4 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/frontend/src/content/en/application/workspace-usage.mdx b/frontend/src/content/en/application/workspace-usage.mdx
index 686614aa7..253519af8 100644
--- a/frontend/src/content/en/application/workspace-usage.mdx
+++ b/frontend/src/content/en/application/workspace-usage.mdx
@@ -67,6 +67,26 @@ Each agent response in the conversation may contain:
 
 Tool calls and thinking steps are collapsed by default. Click to expand them.
 
+## Understanding token usage
+
+If token usage display is enabled, DeerFlow shows one conversation-level total in
+the header and optional per-turn or debug summaries in the message list.
+
+- **Header total**: the persisted thread-level total from the backend. While the
+  current run is still streaming, the header may also include the visible
+  in-flight usage for that unfinished response.
+- **Per-turn / debug usage**: usage derived from the assistant messages that are
+  currently visible in the conversation view.
+
+This means the header total and the visible per-turn totals do **not** need to
+add up exactly. The header is a thread ledger; the per-turn view is a rendering
+of the messages you can currently see.
+
+These totals may also differ from your provider's billing page. Common reasons
+include retries, failed requests, cached input tokens, reasoning tokens,
+provider-specific billing rules, and internal calls that do not appear as normal
+chat messages.
+
 ## Switching agents
 
 If you have created custom agents, use the **Agent** selector in the input bar to switch to a different agent. The selected agent persists for the duration of the thread.
diff --git a/frontend/src/content/zh/application/workspace-usage.mdx b/frontend/src/content/zh/application/workspace-usage.mdx
index e4e3fb541..35cafcc84 100644
--- a/frontend/src/content/zh/application/workspace-usage.mdx
+++ b/frontend/src/content/zh/application/workspace-usage.mdx
@@ -70,6 +70,17 @@ DeerFlow 工作区是一个基于浏览器的对话界面，你可以在其中
 
 点击消息旁边的展开箭头查看完整的推理链。
 
+## 理解 Token 用量
+
+如果启用了 Token 用量显示，DeerFlow 会在顶部显示一个对话级总量，并在消息列表中按配置显示每轮或调试级别的用量摘要。
+
+- **顶部总量**：后端持久化的线程级总账。当当前回复仍在流式返回时，顶部还可能临时叠加这条未完成回复的可见进行中用量。
+- **每轮 / 调试用量**：根据当前界面里可见的 assistant 消息计算出来的用量。
+
+因此，顶部总量和当前可见的每轮总和**不要求完全相等**。顶部展示的是整个线程的总账；每轮展示的是你当前能看到的消息视图。
+
+这些数字也可能与模型供应商的账单页不同。常见原因包括重试请求、失败请求、缓存输入 token、推理 token、供应商自己的计费口径，以及不会以普通聊天消息形式显示的内部调用。
+
 ## 查看产出物
 
 当 Agent 生成文件（报告、图表、代码文件、演示文稿）时，它们会以**产出物**的形式出现在对话中。
diff --git a/frontend/src/core/i18n/locales/en-US.ts b/frontend/src/core/i18n/locales/en-US.ts
index 1daaa21b0..b6ce0c76a 100644
--- a/frontend/src/core/i18n/locales/en-US.ts
+++ b/frontend/src/core/i18n/locales/en-US.ts
@@ -310,7 +310,7 @@ export const enUS: Translations = {
     unavailable:
       "No token usage yet. Usage appears only after a successful model response when the provider returns usage_metadata.",
     unavailableShort: "No usage returned",
-    note: "Header totals use persisted thread usage when available. Per-turn and debug usage come from visible messages. Totals may differ from provider billing pages.",
+    note: "Header totals use persisted thread usage, plus visible in-flight usage while a run is still streaming. Per-turn and debug usage come from currently visible messages only. Totals may differ from provider billing pages.",
     presets: {
       off: "Off",
       summary: "Summary",
diff --git a/frontend/src/core/i18n/locales/zh-CN.ts b/frontend/src/core/i18n/locales/zh-CN.ts
index aadedad65..105aca551 100644
--- a/frontend/src/core/i18n/locales/zh-CN.ts
+++ b/frontend/src/core/i18n/locales/zh-CN.ts
@@ -296,7 +296,7 @@ export const zhCN: Translations = {
     unavailable:
       "暂无 Token 用量。只有模型成功返回且供应商提供 usage_metadata 时才会显示。",
     unavailableShort: "未返回用量",
-    note: "顶部总量优先使用后端持久化的线程用量。每轮和调试用量来自当前可见消息，可能与平台账单页不完全一致。",
+    note: "顶部总量优先使用后端持久化的线程用量；当当前回复仍在流式返回时，还会叠加可见的进行中用量。每轮和调试用量只来自当前可见消息，可能与平台账单页不完全一致。",
     presets: {
       off: "关闭",
       summary: "总览",