From 44677c5eb4a8d9ecd81a6f3d115fcf0a62cd4578 Mon Sep 17 00:00:00 2001 From: AochenShen99 Date: Thu, 28 May 2026 18:24:32 +0800 Subject: [PATCH] feat(provider) Add patched MiMo reasoning content support (#3298) * Add patched MiMo reasoning content support * Clarify MiMo patched model coverage * Remove unused MiMo payload index * Address MiMo review nits --- backend/docs/CONFIGURATION.md | 33 ++++ .../harness/deerflow/models/patched_mimo.py | 150 ++++++++++++++++ backend/tests/test_model_factory.py | 35 ++++ backend/tests/test_patched_mimo.py | 169 ++++++++++++++++++ config.example.yaml | 32 ++++ 5 files changed, 419 insertions(+) create mode 100644 backend/packages/harness/deerflow/models/patched_mimo.py create mode 100644 backend/tests/test_patched_mimo.py diff --git a/backend/docs/CONFIGURATION.md b/backend/docs/CONFIGURATION.md index 26137951f..2f26c8128 100644 --- a/backend/docs/CONFIGURATION.md +++ b/backend/docs/CONFIGURATION.md @@ -36,6 +36,7 @@ models: - OpenAI (`langchain_openai:ChatOpenAI`) - Anthropic (`langchain_anthropic:ChatAnthropic`) - DeepSeek (`langchain_deepseek:ChatDeepSeek`) +- Xiaomi MiMo (`deerflow.models.patched_mimo:PatchedChatMiMo`) - Claude Code OAuth (`deerflow.models.claude_provider:ClaudeChatModel`) - Codex CLI (`deerflow.models.openai_codex_provider:CodexChatModel`) - Any LangChain-compatible provider @@ -166,6 +167,37 @@ models: For Gemini accessed **without** thinking (e.g. via OpenRouter where thinking is not activated), the plain `langchain_openai:ChatOpenAI` with `supports_thinking: false` is sufficient and no patch is needed. +**MiMo with thinking via OpenAI-compatible API**: + +MiMo returns `reasoning_content` on assistant messages in thinking mode. In multi-turn agent conversations with tool calls, subsequent requests must preserve that historical `reasoning_content` on assistant messages or the MiMo API can return HTTP 400. Standard `langchain_openai:ChatOpenAI` drops this provider-specific field, so use `deerflow.models.patched_mimo:PatchedChatMiMo`: + +For pay-as-you-go API keys (`sk-...`), use `https://api.xiaomimimo.com/v1`. For Token Plan keys (`tp-...`), use the regional Token Plan Base URL shown in the MiMo console, such as `https://token-plan-cn.xiaomimimo.com/v1`. MiMo documents these key types as separate and non-interchangeable. + +`PatchedChatMiMo` is model-id agnostic. Use it for every MiMo thinking model entry you configure, including model entries referenced by `subagents.*.model` overrides (for example `mimo-v2.5-pro`, `mimo-v2.5`, `mimo-v2-pro`, `mimo-v2-omni`, or `mimo-v2-flash`). + +```yaml +models: + - name: mimo-v2.5-pro + display_name: MiMo V2.5 Pro + use: deerflow.models.patched_mimo:PatchedChatMiMo + model: mimo-v2.5-pro + api_key: $MIMO_API_KEY + base_url: https://api.xiaomimimo.com/v1 + max_tokens: 8192 + supports_thinking: true + supports_vision: false + when_thinking_enabled: + extra_body: + thinking: + type: enabled + when_thinking_disabled: + extra_body: + thinking: + type: disabled +``` + +`PatchedChatMiMo` preserves MiMo's `choices[].message.reasoning_content`, streaming `delta.reasoning_content`, and request-history assistant `reasoning_content` fields. It does not reuse the DeepSeek provider. + ### Tool Groups Organize tools into logical groups: @@ -319,6 +351,7 @@ models: - `OPENAI_API_KEY` - OpenAI API key - `ANTHROPIC_API_KEY` - Anthropic API key - `DEEPSEEK_API_KEY` - DeepSeek API key +- `MIMO_API_KEY` - Xiaomi MiMo API key - `NOVITA_API_KEY` - Novita API key (OpenAI-compatible endpoint) - `TAVILY_API_KEY` - Tavily search API key - `DEER_FLOW_PROJECT_ROOT` - Project root for relative runtime paths diff --git a/backend/packages/harness/deerflow/models/patched_mimo.py b/backend/packages/harness/deerflow/models/patched_mimo.py new file mode 100644 index 000000000..7589af78f --- /dev/null +++ b/backend/packages/harness/deerflow/models/patched_mimo.py @@ -0,0 +1,150 @@ +"""Patched ChatOpenAI adapter for Xiaomi MiMo reasoning_content replay. + +MiMo's OpenAI-compatible API returns ``reasoning_content`` in thinking mode and +requires that value to be replayed on historical assistant messages in +multi-turn agent conversations. Standard ``langchain_openai.ChatOpenAI`` drops +that provider-specific field, which can cause HTTP 400 errors once tool calls +enter the conversation history. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from langchain_core.language_models import LanguageModelInput +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from langchain_openai import ChatOpenAI + +_MISSING = object() + + +def _extract_reasoning_content(value: Any) -> str | object: + """Return reasoning_content from a dict/Pydantic object, preserving empty strings.""" + if isinstance(value, Mapping): + if "reasoning_content" in value and value["reasoning_content"] is not None: + return value["reasoning_content"] + return _MISSING + + reasoning = getattr(value, "reasoning_content", _MISSING) + if reasoning is not _MISSING and reasoning is not None: + return reasoning + + model_extra = getattr(value, "model_extra", None) + if isinstance(model_extra, Mapping) and "reasoning_content" in model_extra and model_extra["reasoning_content"] is not None: + return model_extra["reasoning_content"] + + return _MISSING + + +def _with_reasoning_content(message: AIMessage | AIMessageChunk, reasoning: str) -> AIMessage | AIMessageChunk: + additional_kwargs = dict(message.additional_kwargs) + if additional_kwargs.get("reasoning_content") != reasoning: + additional_kwargs["reasoning_content"] = reasoning + return message.model_copy(update={"additional_kwargs": additional_kwargs}) + + +def _restore_reasoning_content(payload_msg: dict, orig_msg: AIMessage) -> None: + reasoning = orig_msg.additional_kwargs.get("reasoning_content") + if reasoning is not None: + payload_msg["reasoning_content"] = reasoning + + +def _get_typed_choice_message(response: Any, index: int) -> Any: + choices = getattr(response, "choices", None) + if choices is None: + return None + try: + return choices[index].message + except (AttributeError, IndexError, TypeError): + return None + + +class PatchedChatMiMo(ChatOpenAI): + """ChatOpenAI with ``reasoning_content`` preservation for MiMo thinking mode.""" + + @classmethod + def is_lc_serializable(cls) -> bool: + return True + + @property + def lc_secrets(self) -> dict[str, str]: + return {"api_key": "MIMO_API_KEY", "openai_api_key": "MIMO_API_KEY"} + + def _get_request_payload( + self, + input_: LanguageModelInput, + *, + stop: list[str] | None = None, + **kwargs: Any, + ) -> dict: + original_messages = self._convert_input(input_).to_messages() + payload = super()._get_request_payload(input_, stop=stop, **kwargs) + payload_messages = payload.get("messages", []) + + if len(payload_messages) == len(original_messages): + for payload_msg, orig_msg in zip(payload_messages, original_messages): + if payload_msg.get("role") == "assistant" and isinstance(orig_msg, AIMessage): + _restore_reasoning_content(payload_msg, orig_msg) + else: + ai_messages = [m for m in original_messages if isinstance(m, AIMessage)] + assistant_payloads = [m for m in payload_messages if m.get("role") == "assistant"] + for payload_msg, ai_msg in zip(assistant_payloads, ai_messages): + _restore_reasoning_content(payload_msg, ai_msg) + + return payload + + def _convert_chunk_to_generation_chunk( + self, + chunk: dict, + default_chunk_class: type, + base_generation_info: dict | None, + ) -> ChatGenerationChunk | None: + generation_chunk = super()._convert_chunk_to_generation_chunk( + chunk, + default_chunk_class, + base_generation_info, + ) + if generation_chunk is None: + return None + + choices = chunk.get("choices", []) + if choices: + delta = choices[0].get("delta") or {} + reasoning = _extract_reasoning_content(delta) + if reasoning is not _MISSING and isinstance(generation_chunk.message, AIMessageChunk): + generation_chunk = ChatGenerationChunk( + message=_with_reasoning_content(generation_chunk.message, reasoning), + generation_info=generation_chunk.generation_info, + ) + + return generation_chunk + + def _create_chat_result( + self, + response: dict | Any, + generation_info: dict | None = None, + ) -> ChatResult: + result = super()._create_chat_result(response, generation_info) + response_dict = response if isinstance(response, dict) else response.model_dump() + choices = response_dict.get("choices", []) + + patched_generations: list[ChatGeneration] | None = None + for index, generation in enumerate(result.generations): + choice = choices[index] if index < len(choices) else {} + choice_message = choice.get("message", {}) if isinstance(choice, Mapping) else {} + reasoning = _extract_reasoning_content(choice_message) + if reasoning is _MISSING and not isinstance(response, dict): + reasoning = _extract_reasoning_content(_get_typed_choice_message(response, index)) + + message = generation.message + if reasoning is not _MISSING and isinstance(message, AIMessage): + if patched_generations is None: + patched_generations = list(result.generations) + patched_generations[index] = ChatGeneration( + message=_with_reasoning_content(message, reasoning), + generation_info=generation.generation_info, + ) + + return ChatResult(generations=patched_generations or result.generations, llm_output=result.llm_output) diff --git a/backend/tests/test_model_factory.py b/backend/tests/test_model_factory.py index c8dbe0791..554cbc47b 100644 --- a/backend/tests/test_model_factory.py +++ b/backend/tests/test_model_factory.py @@ -995,6 +995,41 @@ def test_openai_responses_api_settings_are_passed_to_chatopenai(monkeypatch): assert captured.get("output_version") == "responses/v1" +# --------------------------------------------------------------------------- +# Provider class path resolution +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize("model_id", ["mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-flash"]) +def test_create_chat_model_resolves_patched_mimo_provider(model_id): + from deerflow.models.patched_mimo import PatchedChatMiMo + + model = ModelConfig( + name=f"{model_id}-thinking", + display_name=f"{model_id} Thinking", + description=None, + use="deerflow.models.patched_mimo:PatchedChatMiMo", + model=model_id, + api_key="test-key", + base_url="https://api.xiaomimimo.com/v1", + supports_thinking=True, + when_thinking_enabled={"extra_body": {"thinking": {"type": "enabled"}}}, + supports_vision=False, + ) + cfg = _make_app_config([model]) + + chat_model = factory_module.create_chat_model( + name=f"{model_id}-thinking", + thinking_enabled=True, + app_config=cfg, + attach_tracing=False, + ) + + assert isinstance(chat_model, PatchedChatMiMo) + assert chat_model.model_name == model_id + assert chat_model.extra_body["thinking"]["type"] == "enabled" + + # --------------------------------------------------------------------------- # Duplicate keyword argument collision (issue #1977) # --------------------------------------------------------------------------- diff --git a/backend/tests/test_patched_mimo.py b/backend/tests/test_patched_mimo.py new file mode 100644 index 000000000..d83f1c520 --- /dev/null +++ b/backend/tests/test_patched_mimo.py @@ -0,0 +1,169 @@ +"""Tests for deerflow.models.patched_mimo.PatchedChatMiMo.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage + + +def _make_model(**kwargs): + from deerflow.models.patched_mimo import PatchedChatMiMo + + return PatchedChatMiMo( + model="mimo-v2.5-pro", + api_key="test-key", + base_url="https://api.xiaomimimo.com/v1", + **kwargs, + ) + + +def test_is_lc_serializable_returns_true(): + from deerflow.models.patched_mimo import PatchedChatMiMo + + assert PatchedChatMiMo.is_lc_serializable() is True + + +def test_lc_secrets_contains_mimo_api_key_mapping(): + model = _make_model() + + assert model.lc_secrets["api_key"] == "MIMO_API_KEY" + assert model.lc_secrets["openai_api_key"] == "MIMO_API_KEY" + + +def test_reasoning_content_injected_into_assistant_tool_call_message(): + model = _make_model() + + human = HumanMessage(content="Check Beijing weather.") + ai = AIMessage( + content="", + additional_kwargs={"reasoning_content": "I need to call the weather tool."}, + ) + payload_message = { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_weather", + "type": "function", + "function": {"name": "get_weather", "arguments": '{"location":"Beijing"}'}, + } + ], + } + base_payload = { + "messages": [ + {"role": "user", "content": "Check Beijing weather."}, + payload_message, + ] + } + + with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload): + with patch.object(model, "_convert_input") as mock_convert: + mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai]) + payload = model._get_request_payload([human, ai]) + + assert payload["messages"][1]["reasoning_content"] == "I need to call the weather tool." + + +def test_reasoning_content_is_noop_when_missing(): + model = _make_model() + + human = HumanMessage(content="hello") + ai = AIMessage(content="hi", additional_kwargs={}) + base_payload = { + "messages": [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + } + + with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload): + with patch.object(model, "_convert_input") as mock_convert: + mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai]) + payload = model._get_request_payload([human, ai]) + + assert "reasoning_content" not in payload["messages"][1] + + +def test_create_chat_result_maps_message_reasoning_content(): + model = _make_model() + response = { + "choices": [ + { + "message": { + "role": "assistant", + "content": "The weather is sunny.", + "reasoning_content": "The tool returned sunny weather, so answer directly.", + "tool_calls": None, + }, + "finish_reason": "stop", + } + ], + "model": "mimo-v2.5-pro", + } + + result = model._create_chat_result(response) + message = result.generations[0].message + + assert message.content == "The weather is sunny." + assert message.additional_kwargs["reasoning_content"] == "The tool returned sunny weather, so answer directly." + + +def test_create_chat_result_reads_reasoning_content_from_message_attribute(): + model = _make_model() + + class FakeMessage: + reasoning_content = "Reasoning stored on the SDK message object." + + class FakeChoice: + message = FakeMessage() + + class FakeResponse: + choices = [FakeChoice()] + + def model_dump(self, **kwargs): + return { + "choices": [ + { + "message": { + "role": "assistant", + "content": "Answer.", + }, + "finish_reason": "stop", + } + ], + "model": "mimo-v2.5-pro", + } + + result = model._create_chat_result(FakeResponse()) + + assert result.generations[0].message.additional_kwargs["reasoning_content"] == "Reasoning stored on the SDK message object." + + +def test_convert_chunk_to_generation_chunk_preserves_reasoning_deltas(): + model = _make_model() + + first = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"role": "assistant", "reasoning_content": "I need "}}]}, + AIMessageChunk, + {}, + ) + second = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"reasoning_content": "a tool."}}]}, + AIMessageChunk, + {}, + ) + answer = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"content": "Done."}, "finish_reason": "stop"}], "model": "mimo-v2.5-pro"}, + AIMessageChunk, + {}, + ) + + assert first is not None + assert second is not None + assert answer is not None + + combined = first.message + second.message + answer.message + + assert combined.additional_kwargs["reasoning_content"] == "I need a tool." + assert combined.content == "Done." diff --git a/config.example.yaml b/config.example.yaml index 4e5a1abce..118b1be4d 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -177,6 +177,38 @@ models: # thinking: # type: disabled + # Example: Xiaomi MiMo model (with thinking support) + # MiMo thinking mode returns reasoning_content and requires that field to be + # replayed on historical assistant messages in multi-turn agent/tool-call + # conversations. Use PatchedChatMiMo instead of plain ChatOpenAI. + # Use https://api.xiaomimimo.com/v1 with pay-as-you-go `sk-...` keys. + # Use your Token Plan regional URL (for example + # https://token-plan-cn.xiaomimimo.com/v1) with Token Plan `tp-...` keys. + # PatchedChatMiMo is model-id agnostic; use it for every MiMo thinking model + # entry you configure (for example mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro, + # mimo-v2-omni, or mimo-v2-flash), including models referenced by subagent + # model overrides. + # See: https://platform.xiaomimimo.com/docs/en-US/usage-guide/passing-back-reasoning_content + # - name: mimo-v2.5-pro + # display_name: MiMo V2.5 Pro + # use: deerflow.models.patched_mimo:PatchedChatMiMo + # model: mimo-v2.5-pro + # api_key: $MIMO_API_KEY + # base_url: https://api.xiaomimimo.com/v1 + # request_timeout: 600.0 + # max_retries: 2 + # max_tokens: 8192 + # supports_thinking: true + # supports_vision: false + # when_thinking_enabled: + # extra_body: + # thinking: + # type: enabled + # when_thinking_disabled: + # extra_body: + # thinking: + # type: disabled + # Example: DeepSeek model (with thinking support) # - name: deepseek-v3 # display_name: DeepSeek V3 (Thinking)