From c91785dd68e907eda621ae42b83a4c4b00ff15ee Mon Sep 17 00:00:00 2001
From: Octopus <liyuan851277048@icloud.com>
Date: Tue, 14 Apr 2026 09:51:39 +0800
Subject: [PATCH] fix(title): strip <think> tags from title model responses and
 assistant context (#1927)

* fix(title): strip <think> tags from title model responses and assistant context

Reasoning models (e.g. minimax M2.7, DeepSeek-R1) emit <think>...</think>
blocks before their actual output. When such a model is used as the title
model (or as the main agent), the raw thinking content leaked into the thread
title stored in state, so the chat list showed the internal monologue instead
of a meaningful title.

Fixes #1884

- Add `_strip_think_tags()` helper using a regex to remove all <think>...</think> blocks
- Apply it in `_parse_title()` so the title model response is always clean
- Apply it to the assistant message in `_build_title_prompt()` so thinking
  content from the first AI turn is not fed back to the title model
- Add four new unit tests covering: stripping in parse, think-only response,
  assistant prompt stripping, and end-to-end async flow with think tags

* Fix the lint error

---------

Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
---
 .../agents/middlewares/title_middleware.py    |  8 +++-
 .../tests/test_title_middleware_core_logic.py | 47 +++++++++++++++++++
 2 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py
index 42f465f01..dd131ac28 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py
@@ -1,6 +1,7 @@
 """Middleware for automatic thread title generation."""
 
 import logging
+import re
 from typing import NotRequired, override
 
 from langchain.agents import AgentState
@@ -77,7 +78,7 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
         assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "")
 
         user_msg = self._normalize_content(user_msg_content)
-        assistant_msg = self._normalize_content(assistant_msg_content)
+        assistant_msg = self._strip_think_tags(self._normalize_content(assistant_msg_content))
 
         prompt = config.prompt_template.format(
             max_words=config.max_words,
@@ -86,10 +87,15 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
         )
         return prompt, user_msg
 
+    def _strip_think_tags(self, text: str) -> str:
+        """Remove <think>...</think> blocks emitted by reasoning models (e.g. minimax, DeepSeek-R1)."""
+        return re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE).strip()
+
     def _parse_title(self, content: object) -> str:
         """Normalize model output into a clean title string."""
         config = get_title_config()
         title_content = self._normalize_content(content)
+        title_content = self._strip_think_tags(title_content)
         title = title_content.strip().strip('"').strip("'")
         return title[: config.max_chars] if len(title) > config.max_chars else title
 
diff --git a/backend/tests/test_title_middleware_core_logic.py b/backend/tests/test_title_middleware_core_logic.py
index 3b2b5926f..ce7376e2e 100644
--- a/backend/tests/test_title_middleware_core_logic.py
+++ b/backend/tests/test_title_middleware_core_logic.py
@@ -181,3 +181,50 @@ class TestTitleMiddlewareCoreLogic:
         result = middleware._generate_title_result(state)
         assert result["title"].endswith("...")
         assert result["title"].startswith("这是一个非常长的问题描述")
+
+    def test_parse_title_strips_think_tags(self):
+        """Title model responses with <think>...</think> blocks are stripped before use."""
+        middleware = TitleMiddleware()
+        raw = "<think>用户想要研究贵阳发展情况。我需要使用 deep-research skill。</think>贵阳近5年发展报告研究"
+        result = middleware._parse_title(raw)
+        assert "<think>" not in result
+        assert result == "贵阳近5年发展报告研究"
+
+    def test_parse_title_strips_think_tags_only_response(self):
+        """If model only outputs a think block and nothing else, title is empty string."""
+        middleware = TitleMiddleware()
+        raw = "<think>just thinking, no real title</think>"
+        result = middleware._parse_title(raw)
+        assert result == ""
+
+    def test_build_title_prompt_strips_assistant_think_tags(self):
+        """<think> blocks in assistant messages are stripped before being included in the title prompt."""
+        _set_test_title_config(enabled=True)
+        middleware = TitleMiddleware()
+        state = {
+            "messages": [
+                HumanMessage(content="贵阳发展报告研究"),
+                AIMessage(content="<think>分析用户需求</think>我将为您研究贵阳的发展情况。"),
+            ]
+        }
+        prompt, _ = middleware._build_title_prompt(state)
+        assert "<think>" not in prompt
+
+    def test_generate_title_async_strips_think_tags_in_response(self, monkeypatch):
+        """Async title generation strips <think> blocks from the model response."""
+        _set_test_title_config(max_chars=50)
+        middleware = TitleMiddleware()
+        model = MagicMock()
+        model.ainvoke = AsyncMock(return_value=AIMessage(content="<think>用户想研究贵阳。</think>贵阳发展研究"))
+        monkeypatch.setattr(title_middleware_module, "create_chat_model", MagicMock(return_value=model))
+
+        state = {
+            "messages": [
+                HumanMessage(content="请帮我研究贵阳近5年发展情况"),
+                AIMessage(content="好的"),
+            ]
+        }
+        result = asyncio.run(middleware._agenerate_title_result(state))
+        assert result is not None
+        assert "<think>" not in result["title"]
+        assert result["title"] == "贵阳发展研究"