mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-30 20:38:09 +00:00
feat(provider) Add patched MiMo reasoning content support (#3298)
* Add patched MiMo reasoning content support * Clarify MiMo patched model coverage * Remove unused MiMo payload index * Address MiMo review nits
This commit is contained in:
parent
2fdfff0db3
commit
44677c5eb4
@ -36,6 +36,7 @@ models:
|
||||
- OpenAI (`langchain_openai:ChatOpenAI`)
|
||||
- Anthropic (`langchain_anthropic:ChatAnthropic`)
|
||||
- DeepSeek (`langchain_deepseek:ChatDeepSeek`)
|
||||
- Xiaomi MiMo (`deerflow.models.patched_mimo:PatchedChatMiMo`)
|
||||
- Claude Code OAuth (`deerflow.models.claude_provider:ClaudeChatModel`)
|
||||
- Codex CLI (`deerflow.models.openai_codex_provider:CodexChatModel`)
|
||||
- Any LangChain-compatible provider
|
||||
@ -166,6 +167,37 @@ models:
|
||||
|
||||
For Gemini accessed **without** thinking (e.g. via OpenRouter where thinking is not activated), the plain `langchain_openai:ChatOpenAI` with `supports_thinking: false` is sufficient and no patch is needed.
|
||||
|
||||
**MiMo with thinking via OpenAI-compatible API**:
|
||||
|
||||
MiMo returns `reasoning_content` on assistant messages in thinking mode. In multi-turn agent conversations with tool calls, subsequent requests must preserve that historical `reasoning_content` on assistant messages or the MiMo API can return HTTP 400. Standard `langchain_openai:ChatOpenAI` drops this provider-specific field, so use `deerflow.models.patched_mimo:PatchedChatMiMo`:
|
||||
|
||||
For pay-as-you-go API keys (`sk-...`), use `https://api.xiaomimimo.com/v1`. For Token Plan keys (`tp-...`), use the regional Token Plan Base URL shown in the MiMo console, such as `https://token-plan-cn.xiaomimimo.com/v1`. MiMo documents these key types as separate and non-interchangeable.
|
||||
|
||||
`PatchedChatMiMo` is model-id agnostic. Use it for every MiMo thinking model entry you configure, including model entries referenced by `subagents.*.model` overrides (for example `mimo-v2.5-pro`, `mimo-v2.5`, `mimo-v2-pro`, `mimo-v2-omni`, or `mimo-v2-flash`).
|
||||
|
||||
```yaml
|
||||
models:
|
||||
- name: mimo-v2.5-pro
|
||||
display_name: MiMo V2.5 Pro
|
||||
use: deerflow.models.patched_mimo:PatchedChatMiMo
|
||||
model: mimo-v2.5-pro
|
||||
api_key: $MIMO_API_KEY
|
||||
base_url: https://api.xiaomimimo.com/v1
|
||||
max_tokens: 8192
|
||||
supports_thinking: true
|
||||
supports_vision: false
|
||||
when_thinking_enabled:
|
||||
extra_body:
|
||||
thinking:
|
||||
type: enabled
|
||||
when_thinking_disabled:
|
||||
extra_body:
|
||||
thinking:
|
||||
type: disabled
|
||||
```
|
||||
|
||||
`PatchedChatMiMo` preserves MiMo's `choices[].message.reasoning_content`, streaming `delta.reasoning_content`, and request-history assistant `reasoning_content` fields. It does not reuse the DeepSeek provider.
|
||||
|
||||
### Tool Groups
|
||||
|
||||
Organize tools into logical groups:
|
||||
@ -319,6 +351,7 @@ models:
|
||||
- `OPENAI_API_KEY` - OpenAI API key
|
||||
- `ANTHROPIC_API_KEY` - Anthropic API key
|
||||
- `DEEPSEEK_API_KEY` - DeepSeek API key
|
||||
- `MIMO_API_KEY` - Xiaomi MiMo API key
|
||||
- `NOVITA_API_KEY` - Novita API key (OpenAI-compatible endpoint)
|
||||
- `TAVILY_API_KEY` - Tavily search API key
|
||||
- `DEER_FLOW_PROJECT_ROOT` - Project root for relative runtime paths
|
||||
|
||||
150
backend/packages/harness/deerflow/models/patched_mimo.py
Normal file
150
backend/packages/harness/deerflow/models/patched_mimo.py
Normal file
@ -0,0 +1,150 @@
|
||||
"""Patched ChatOpenAI adapter for Xiaomi MiMo reasoning_content replay.
|
||||
|
||||
MiMo's OpenAI-compatible API returns ``reasoning_content`` in thinking mode and
|
||||
requires that value to be replayed on historical assistant messages in
|
||||
multi-turn agent conversations. Standard ``langchain_openai.ChatOpenAI`` drops
|
||||
that provider-specific field, which can cause HTTP 400 errors once tool calls
|
||||
enter the conversation history.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from collections.abc import Mapping
|
||||
from typing import Any
|
||||
|
||||
from langchain_core.language_models import LanguageModelInput
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk
|
||||
from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
_MISSING = object()
|
||||
|
||||
|
||||
def _extract_reasoning_content(value: Any) -> str | object:
|
||||
"""Return reasoning_content from a dict/Pydantic object, preserving empty strings."""
|
||||
if isinstance(value, Mapping):
|
||||
if "reasoning_content" in value and value["reasoning_content"] is not None:
|
||||
return value["reasoning_content"]
|
||||
return _MISSING
|
||||
|
||||
reasoning = getattr(value, "reasoning_content", _MISSING)
|
||||
if reasoning is not _MISSING and reasoning is not None:
|
||||
return reasoning
|
||||
|
||||
model_extra = getattr(value, "model_extra", None)
|
||||
if isinstance(model_extra, Mapping) and "reasoning_content" in model_extra and model_extra["reasoning_content"] is not None:
|
||||
return model_extra["reasoning_content"]
|
||||
|
||||
return _MISSING
|
||||
|
||||
|
||||
def _with_reasoning_content(message: AIMessage | AIMessageChunk, reasoning: str) -> AIMessage | AIMessageChunk:
|
||||
additional_kwargs = dict(message.additional_kwargs)
|
||||
if additional_kwargs.get("reasoning_content") != reasoning:
|
||||
additional_kwargs["reasoning_content"] = reasoning
|
||||
return message.model_copy(update={"additional_kwargs": additional_kwargs})
|
||||
|
||||
|
||||
def _restore_reasoning_content(payload_msg: dict, orig_msg: AIMessage) -> None:
|
||||
reasoning = orig_msg.additional_kwargs.get("reasoning_content")
|
||||
if reasoning is not None:
|
||||
payload_msg["reasoning_content"] = reasoning
|
||||
|
||||
|
||||
def _get_typed_choice_message(response: Any, index: int) -> Any:
|
||||
choices = getattr(response, "choices", None)
|
||||
if choices is None:
|
||||
return None
|
||||
try:
|
||||
return choices[index].message
|
||||
except (AttributeError, IndexError, TypeError):
|
||||
return None
|
||||
|
||||
|
||||
class PatchedChatMiMo(ChatOpenAI):
|
||||
"""ChatOpenAI with ``reasoning_content`` preservation for MiMo thinking mode."""
|
||||
|
||||
@classmethod
|
||||
def is_lc_serializable(cls) -> bool:
|
||||
return True
|
||||
|
||||
@property
|
||||
def lc_secrets(self) -> dict[str, str]:
|
||||
return {"api_key": "MIMO_API_KEY", "openai_api_key": "MIMO_API_KEY"}
|
||||
|
||||
def _get_request_payload(
|
||||
self,
|
||||
input_: LanguageModelInput,
|
||||
*,
|
||||
stop: list[str] | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
original_messages = self._convert_input(input_).to_messages()
|
||||
payload = super()._get_request_payload(input_, stop=stop, **kwargs)
|
||||
payload_messages = payload.get("messages", [])
|
||||
|
||||
if len(payload_messages) == len(original_messages):
|
||||
for payload_msg, orig_msg in zip(payload_messages, original_messages):
|
||||
if payload_msg.get("role") == "assistant" and isinstance(orig_msg, AIMessage):
|
||||
_restore_reasoning_content(payload_msg, orig_msg)
|
||||
else:
|
||||
ai_messages = [m for m in original_messages if isinstance(m, AIMessage)]
|
||||
assistant_payloads = [m for m in payload_messages if m.get("role") == "assistant"]
|
||||
for payload_msg, ai_msg in zip(assistant_payloads, ai_messages):
|
||||
_restore_reasoning_content(payload_msg, ai_msg)
|
||||
|
||||
return payload
|
||||
|
||||
def _convert_chunk_to_generation_chunk(
|
||||
self,
|
||||
chunk: dict,
|
||||
default_chunk_class: type,
|
||||
base_generation_info: dict | None,
|
||||
) -> ChatGenerationChunk | None:
|
||||
generation_chunk = super()._convert_chunk_to_generation_chunk(
|
||||
chunk,
|
||||
default_chunk_class,
|
||||
base_generation_info,
|
||||
)
|
||||
if generation_chunk is None:
|
||||
return None
|
||||
|
||||
choices = chunk.get("choices", [])
|
||||
if choices:
|
||||
delta = choices[0].get("delta") or {}
|
||||
reasoning = _extract_reasoning_content(delta)
|
||||
if reasoning is not _MISSING and isinstance(generation_chunk.message, AIMessageChunk):
|
||||
generation_chunk = ChatGenerationChunk(
|
||||
message=_with_reasoning_content(generation_chunk.message, reasoning),
|
||||
generation_info=generation_chunk.generation_info,
|
||||
)
|
||||
|
||||
return generation_chunk
|
||||
|
||||
def _create_chat_result(
|
||||
self,
|
||||
response: dict | Any,
|
||||
generation_info: dict | None = None,
|
||||
) -> ChatResult:
|
||||
result = super()._create_chat_result(response, generation_info)
|
||||
response_dict = response if isinstance(response, dict) else response.model_dump()
|
||||
choices = response_dict.get("choices", [])
|
||||
|
||||
patched_generations: list[ChatGeneration] | None = None
|
||||
for index, generation in enumerate(result.generations):
|
||||
choice = choices[index] if index < len(choices) else {}
|
||||
choice_message = choice.get("message", {}) if isinstance(choice, Mapping) else {}
|
||||
reasoning = _extract_reasoning_content(choice_message)
|
||||
if reasoning is _MISSING and not isinstance(response, dict):
|
||||
reasoning = _extract_reasoning_content(_get_typed_choice_message(response, index))
|
||||
|
||||
message = generation.message
|
||||
if reasoning is not _MISSING and isinstance(message, AIMessage):
|
||||
if patched_generations is None:
|
||||
patched_generations = list(result.generations)
|
||||
patched_generations[index] = ChatGeneration(
|
||||
message=_with_reasoning_content(message, reasoning),
|
||||
generation_info=generation.generation_info,
|
||||
)
|
||||
|
||||
return ChatResult(generations=patched_generations or result.generations, llm_output=result.llm_output)
|
||||
@ -995,6 +995,41 @@ def test_openai_responses_api_settings_are_passed_to_chatopenai(monkeypatch):
|
||||
assert captured.get("output_version") == "responses/v1"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Provider class path resolution
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize("model_id", ["mimo-v2.5-pro", "mimo-v2.5", "mimo-v2-flash"])
|
||||
def test_create_chat_model_resolves_patched_mimo_provider(model_id):
|
||||
from deerflow.models.patched_mimo import PatchedChatMiMo
|
||||
|
||||
model = ModelConfig(
|
||||
name=f"{model_id}-thinking",
|
||||
display_name=f"{model_id} Thinking",
|
||||
description=None,
|
||||
use="deerflow.models.patched_mimo:PatchedChatMiMo",
|
||||
model=model_id,
|
||||
api_key="test-key",
|
||||
base_url="https://api.xiaomimimo.com/v1",
|
||||
supports_thinking=True,
|
||||
when_thinking_enabled={"extra_body": {"thinking": {"type": "enabled"}}},
|
||||
supports_vision=False,
|
||||
)
|
||||
cfg = _make_app_config([model])
|
||||
|
||||
chat_model = factory_module.create_chat_model(
|
||||
name=f"{model_id}-thinking",
|
||||
thinking_enabled=True,
|
||||
app_config=cfg,
|
||||
attach_tracing=False,
|
||||
)
|
||||
|
||||
assert isinstance(chat_model, PatchedChatMiMo)
|
||||
assert chat_model.model_name == model_id
|
||||
assert chat_model.extra_body["thinking"]["type"] == "enabled"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Duplicate keyword argument collision (issue #1977)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
169
backend/tests/test_patched_mimo.py
Normal file
169
backend/tests/test_patched_mimo.py
Normal file
@ -0,0 +1,169 @@
|
||||
"""Tests for deerflow.models.patched_mimo.PatchedChatMiMo."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage
|
||||
|
||||
|
||||
def _make_model(**kwargs):
|
||||
from deerflow.models.patched_mimo import PatchedChatMiMo
|
||||
|
||||
return PatchedChatMiMo(
|
||||
model="mimo-v2.5-pro",
|
||||
api_key="test-key",
|
||||
base_url="https://api.xiaomimimo.com/v1",
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def test_is_lc_serializable_returns_true():
|
||||
from deerflow.models.patched_mimo import PatchedChatMiMo
|
||||
|
||||
assert PatchedChatMiMo.is_lc_serializable() is True
|
||||
|
||||
|
||||
def test_lc_secrets_contains_mimo_api_key_mapping():
|
||||
model = _make_model()
|
||||
|
||||
assert model.lc_secrets["api_key"] == "MIMO_API_KEY"
|
||||
assert model.lc_secrets["openai_api_key"] == "MIMO_API_KEY"
|
||||
|
||||
|
||||
def test_reasoning_content_injected_into_assistant_tool_call_message():
|
||||
model = _make_model()
|
||||
|
||||
human = HumanMessage(content="Check Beijing weather.")
|
||||
ai = AIMessage(
|
||||
content="",
|
||||
additional_kwargs={"reasoning_content": "I need to call the weather tool."},
|
||||
)
|
||||
payload_message = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"tool_calls": [
|
||||
{
|
||||
"id": "call_weather",
|
||||
"type": "function",
|
||||
"function": {"name": "get_weather", "arguments": '{"location":"Beijing"}'},
|
||||
}
|
||||
],
|
||||
}
|
||||
base_payload = {
|
||||
"messages": [
|
||||
{"role": "user", "content": "Check Beijing weather."},
|
||||
payload_message,
|
||||
]
|
||||
}
|
||||
|
||||
with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload):
|
||||
with patch.object(model, "_convert_input") as mock_convert:
|
||||
mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai])
|
||||
payload = model._get_request_payload([human, ai])
|
||||
|
||||
assert payload["messages"][1]["reasoning_content"] == "I need to call the weather tool."
|
||||
|
||||
|
||||
def test_reasoning_content_is_noop_when_missing():
|
||||
model = _make_model()
|
||||
|
||||
human = HumanMessage(content="hello")
|
||||
ai = AIMessage(content="hi", additional_kwargs={})
|
||||
base_payload = {
|
||||
"messages": [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
]
|
||||
}
|
||||
|
||||
with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload):
|
||||
with patch.object(model, "_convert_input") as mock_convert:
|
||||
mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai])
|
||||
payload = model._get_request_payload([human, ai])
|
||||
|
||||
assert "reasoning_content" not in payload["messages"][1]
|
||||
|
||||
|
||||
def test_create_chat_result_maps_message_reasoning_content():
|
||||
model = _make_model()
|
||||
response = {
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "The weather is sunny.",
|
||||
"reasoning_content": "The tool returned sunny weather, so answer directly.",
|
||||
"tool_calls": None,
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"model": "mimo-v2.5-pro",
|
||||
}
|
||||
|
||||
result = model._create_chat_result(response)
|
||||
message = result.generations[0].message
|
||||
|
||||
assert message.content == "The weather is sunny."
|
||||
assert message.additional_kwargs["reasoning_content"] == "The tool returned sunny weather, so answer directly."
|
||||
|
||||
|
||||
def test_create_chat_result_reads_reasoning_content_from_message_attribute():
|
||||
model = _make_model()
|
||||
|
||||
class FakeMessage:
|
||||
reasoning_content = "Reasoning stored on the SDK message object."
|
||||
|
||||
class FakeChoice:
|
||||
message = FakeMessage()
|
||||
|
||||
class FakeResponse:
|
||||
choices = [FakeChoice()]
|
||||
|
||||
def model_dump(self, **kwargs):
|
||||
return {
|
||||
"choices": [
|
||||
{
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "Answer.",
|
||||
},
|
||||
"finish_reason": "stop",
|
||||
}
|
||||
],
|
||||
"model": "mimo-v2.5-pro",
|
||||
}
|
||||
|
||||
result = model._create_chat_result(FakeResponse())
|
||||
|
||||
assert result.generations[0].message.additional_kwargs["reasoning_content"] == "Reasoning stored on the SDK message object."
|
||||
|
||||
|
||||
def test_convert_chunk_to_generation_chunk_preserves_reasoning_deltas():
|
||||
model = _make_model()
|
||||
|
||||
first = model._convert_chunk_to_generation_chunk(
|
||||
{"choices": [{"delta": {"role": "assistant", "reasoning_content": "I need "}}]},
|
||||
AIMessageChunk,
|
||||
{},
|
||||
)
|
||||
second = model._convert_chunk_to_generation_chunk(
|
||||
{"choices": [{"delta": {"reasoning_content": "a tool."}}]},
|
||||
AIMessageChunk,
|
||||
{},
|
||||
)
|
||||
answer = model._convert_chunk_to_generation_chunk(
|
||||
{"choices": [{"delta": {"content": "Done."}, "finish_reason": "stop"}], "model": "mimo-v2.5-pro"},
|
||||
AIMessageChunk,
|
||||
{},
|
||||
)
|
||||
|
||||
assert first is not None
|
||||
assert second is not None
|
||||
assert answer is not None
|
||||
|
||||
combined = first.message + second.message + answer.message
|
||||
|
||||
assert combined.additional_kwargs["reasoning_content"] == "I need a tool."
|
||||
assert combined.content == "Done."
|
||||
@ -177,6 +177,38 @@ models:
|
||||
# thinking:
|
||||
# type: disabled
|
||||
|
||||
# Example: Xiaomi MiMo model (with thinking support)
|
||||
# MiMo thinking mode returns reasoning_content and requires that field to be
|
||||
# replayed on historical assistant messages in multi-turn agent/tool-call
|
||||
# conversations. Use PatchedChatMiMo instead of plain ChatOpenAI.
|
||||
# Use https://api.xiaomimimo.com/v1 with pay-as-you-go `sk-...` keys.
|
||||
# Use your Token Plan regional URL (for example
|
||||
# https://token-plan-cn.xiaomimimo.com/v1) with Token Plan `tp-...` keys.
|
||||
# PatchedChatMiMo is model-id agnostic; use it for every MiMo thinking model
|
||||
# entry you configure (for example mimo-v2.5-pro, mimo-v2.5, mimo-v2-pro,
|
||||
# mimo-v2-omni, or mimo-v2-flash), including models referenced by subagent
|
||||
# model overrides.
|
||||
# See: https://platform.xiaomimimo.com/docs/en-US/usage-guide/passing-back-reasoning_content
|
||||
# - name: mimo-v2.5-pro
|
||||
# display_name: MiMo V2.5 Pro
|
||||
# use: deerflow.models.patched_mimo:PatchedChatMiMo
|
||||
# model: mimo-v2.5-pro
|
||||
# api_key: $MIMO_API_KEY
|
||||
# base_url: https://api.xiaomimimo.com/v1
|
||||
# request_timeout: 600.0
|
||||
# max_retries: 2
|
||||
# max_tokens: 8192
|
||||
# supports_thinking: true
|
||||
# supports_vision: false
|
||||
# when_thinking_enabled:
|
||||
# extra_body:
|
||||
# thinking:
|
||||
# type: enabled
|
||||
# when_thinking_disabled:
|
||||
# extra_body:
|
||||
# thinking:
|
||||
# type: disabled
|
||||
|
||||
# Example: DeepSeek model (with thinking support)
|
||||
# - name: deepseek-v3
|
||||
# display_name: DeepSeek V3 (Thinking)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user