diff --git a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py index 42f465f01..dd131ac28 100644 --- a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py +++ b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py @@ -1,6 +1,7 @@ """Middleware for automatic thread title generation.""" import logging +import re from typing import NotRequired, override from langchain.agents import AgentState @@ -77,7 +78,7 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]): assistant_msg_content = next((m.content for m in messages if m.type == "ai"), "") user_msg = self._normalize_content(user_msg_content) - assistant_msg = self._normalize_content(assistant_msg_content) + assistant_msg = self._strip_think_tags(self._normalize_content(assistant_msg_content)) prompt = config.prompt_template.format( max_words=config.max_words, @@ -86,10 +87,15 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]): ) return prompt, user_msg + def _strip_think_tags(self, text: str) -> str: + """Remove ... blocks emitted by reasoning models (e.g. minimax, DeepSeek-R1).""" + return re.sub(r"[\s\S]*?", "", text, flags=re.IGNORECASE).strip() + def _parse_title(self, content: object) -> str: """Normalize model output into a clean title string.""" config = get_title_config() title_content = self._normalize_content(content) + title_content = self._strip_think_tags(title_content) title = title_content.strip().strip('"').strip("'") return title[: config.max_chars] if len(title) > config.max_chars else title diff --git a/backend/tests/test_title_middleware_core_logic.py b/backend/tests/test_title_middleware_core_logic.py index 3b2b5926f..ce7376e2e 100644 --- a/backend/tests/test_title_middleware_core_logic.py +++ b/backend/tests/test_title_middleware_core_logic.py @@ -181,3 +181,50 @@ class TestTitleMiddlewareCoreLogic: result = middleware._generate_title_result(state) assert result["title"].endswith("...") assert result["title"].startswith("这是一个非常长的问题描述") + + def test_parse_title_strips_think_tags(self): + """Title model responses with ... blocks are stripped before use.""" + middleware = TitleMiddleware() + raw = "用户想要研究贵阳发展情况。我需要使用 deep-research skill。贵阳近5年发展报告研究" + result = middleware._parse_title(raw) + assert "" not in result + assert result == "贵阳近5年发展报告研究" + + def test_parse_title_strips_think_tags_only_response(self): + """If model only outputs a think block and nothing else, title is empty string.""" + middleware = TitleMiddleware() + raw = "just thinking, no real title" + result = middleware._parse_title(raw) + assert result == "" + + def test_build_title_prompt_strips_assistant_think_tags(self): + """ blocks in assistant messages are stripped before being included in the title prompt.""" + _set_test_title_config(enabled=True) + middleware = TitleMiddleware() + state = { + "messages": [ + HumanMessage(content="贵阳发展报告研究"), + AIMessage(content="分析用户需求我将为您研究贵阳的发展情况。"), + ] + } + prompt, _ = middleware._build_title_prompt(state) + assert "" not in prompt + + def test_generate_title_async_strips_think_tags_in_response(self, monkeypatch): + """Async title generation strips blocks from the model response.""" + _set_test_title_config(max_chars=50) + middleware = TitleMiddleware() + model = MagicMock() + model.ainvoke = AsyncMock(return_value=AIMessage(content="用户想研究贵阳。贵阳发展研究")) + monkeypatch.setattr(title_middleware_module, "create_chat_model", MagicMock(return_value=model)) + + state = { + "messages": [ + HumanMessage(content="请帮我研究贵阳近5年发展情况"), + AIMessage(content="好的"), + ] + } + result = asyncio.run(middleware._agenerate_title_result(state)) + assert result is not None + assert "" not in result["title"] + assert result["title"] == "贵阳发展研究"