From 37337b77f91b829ec23b9055cecc54aae38b243b Mon Sep 17 00:00:00 2001 From: hataa <79907651+hata33@users.noreply.github.com> Date: Tue, 9 Jun 2026 18:01:43 +0800 Subject: [PATCH] feat(models): add StepFun reasoning model adapter (#3461) Add PatchedChatStepFun adapter for StepFun reasoning models (step-3.7-flash, step-3.5-flash). Captures reasoning from both streaming and non-streaming responses and replays it on historical assistant messages for multi-turn tool-call conversations. - New: PatchedChatStepFun adapter with streaming/non-streaming reasoning capture - Support both reasoning and reasoning_content field names - 17 unit tests covering all response paths - Updated: config.example.yaml with StepFun configuration example --- .env.example | 1 + .../deerflow/models/patched_stepfun.py | 175 ++++++++++ backend/tests/test_patched_stepfun.py | 305 ++++++++++++++++++ config.example.yaml | 26 ++ 4 files changed, 507 insertions(+) create mode 100644 backend/packages/harness/deerflow/models/patched_stepfun.py create mode 100644 backend/tests/test_patched_stepfun.py diff --git a/.env.example b/.env.example index c4dbe326e..aec43adcf 100644 --- a/.env.example +++ b/.env.example @@ -21,6 +21,7 @@ INFOQUEST_API_KEY=your-infoquest-api-key # DEEPSEEK_API_KEY=your-deepseek-api-key # NOVITA_API_KEY=your-novita-api-key # OpenAI-compatible, see https://novita.ai # MINIMAX_API_KEY=your-minimax-api-key # OpenAI-compatible, see https://platform.minimax.io +# STEPFUN_API_KEY=your-stepfun-api-key # OpenAI-compatible, see https://platform.stepfun.com # VLLM_API_KEY=your-vllm-api-key # OpenAI-compatible # FEISHU_APP_ID=your-feishu-app-id # FEISHU_APP_SECRET=your-feishu-app-secret diff --git a/backend/packages/harness/deerflow/models/patched_stepfun.py b/backend/packages/harness/deerflow/models/patched_stepfun.py new file mode 100644 index 000000000..1a30332a2 --- /dev/null +++ b/backend/packages/harness/deerflow/models/patched_stepfun.py @@ -0,0 +1,175 @@ +"""Patched ChatOpenAI adapter for StepFun reasoning models. + +StepFun returns ``reasoning`` (or ``reasoning_content`` with deepseek-style) in +both streaming deltas and non-streaming responses. Standard ``ChatOpenAI`` +ignores these non-standard fields, so reasoning content is silently dropped. +This adapter captures reasoning from all response paths and replays it on +historical assistant messages for multi-turn tool-call conversations. +""" + +from __future__ import annotations + +from collections.abc import Mapping +from typing import Any + +from langchain_core.language_models import LanguageModelInput +from langchain_core.messages import AIMessage, AIMessageChunk +from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult +from langchain_openai import ChatOpenAI + +from deerflow.models.assistant_payload_replay import ( + restore_assistant_payloads, + restore_reasoning_content, +) + +_MISSING = object() + + +def _extract_reasoning(value: Any) -> str | object: + """Return reasoning content from a dict/Pydantic object. + + StepFun may return reasoning via ``reasoning`` (default) or + ``reasoning_content`` (deepseek-style). Check both fields. + """ + if isinstance(value, Mapping): + # Check reasoning_content first (deepseek-style), then reasoning (default) + for field in ("reasoning_content", "reasoning"): + if field in value and value[field] is not None: + return value[field] + return _MISSING + + # Pydantic / SDK object attributes + for field in ("reasoning_content", "reasoning"): + attr = getattr(value, field, _MISSING) + if attr is not _MISSING and attr is not None: + return attr + + # Some SDK versions store extra fields in model_extra + model_extra = getattr(value, "model_extra", None) + if isinstance(model_extra, Mapping): + for field in ("reasoning_content", "reasoning"): + if field in model_extra and model_extra[field] is not None: + return model_extra[field] + + return _MISSING + + +def _with_reasoning_content(message: AIMessage | AIMessageChunk, reasoning: str) -> AIMessage | AIMessageChunk: + """Return a copy of *message* with reasoning_content stored in additional_kwargs.""" + additional_kwargs = dict(message.additional_kwargs) + if additional_kwargs.get("reasoning_content") != reasoning: + additional_kwargs["reasoning_content"] = reasoning + return message.model_copy(update={"additional_kwargs": additional_kwargs}) + + +def _get_typed_choice_message(response: Any, index: int) -> Any: + """Extract the SDK-typed choice message at *index*, if available.""" + choices = getattr(response, "choices", None) + if choices is None: + return None + try: + return choices[index].message + except (AttributeError, IndexError, TypeError): + return None + + +class PatchedChatStepFun(ChatOpenAI): + """ChatOpenAI with full reasoning support for StepFun models. + + Captures ``reasoning`` / ``reasoning_content`` from both streaming and + non-streaming responses and replays it on historical assistant messages in + multi-turn tool-call conversations. + """ + + @classmethod + def is_lc_serializable(cls) -> bool: + return True + + @property + def lc_secrets(self) -> dict[str, str]: + return {"api_key": "STEPFUN_API_KEY", "openai_api_key": "STEPFUN_API_KEY"} + + # --- Request payload replay --- + + def _get_request_payload( + self, + input_: LanguageModelInput, + *, + stop: list[str] | None = None, + **kwargs: Any, + ) -> dict: + """Restore ``reasoning_content`` on historical assistant messages.""" + original_messages = self._convert_input(input_).to_messages() + payload = super()._get_request_payload(input_, stop=stop, **kwargs) + + restore_assistant_payloads( + payload.get("messages", []), + original_messages, + restore_reasoning_content, + ) + + return payload + + # --- Streaming reasoning capture --- + + def _convert_chunk_to_generation_chunk( + self, + chunk: dict, + default_chunk_class: type, + base_generation_info: dict | None, + ) -> ChatGenerationChunk | None: + """Capture ``reasoning`` / ``reasoning_content`` from streaming deltas.""" + generation_chunk = super()._convert_chunk_to_generation_chunk( + chunk, + default_chunk_class, + base_generation_info, + ) + if generation_chunk is None: + return None + + choices = chunk.get("choices", []) + if choices: + delta = choices[0].get("delta") or {} + reasoning = _extract_reasoning(delta) + if reasoning is not _MISSING and isinstance(generation_chunk.message, AIMessageChunk): + generation_chunk = ChatGenerationChunk( + message=_with_reasoning_content(generation_chunk.message, reasoning), + generation_info=generation_chunk.generation_info, + ) + + return generation_chunk + + # --- Non-streaming reasoning capture --- + + def _create_chat_result( + self, + response: dict | Any, + generation_info: dict | None = None, + ) -> ChatResult: + """Extract ``reasoning`` / ``reasoning_content`` from non-streaming responses.""" + result = super()._create_chat_result(response, generation_info) + response_dict = response if isinstance(response, dict) else response.model_dump() + choices = response_dict.get("choices", []) + + patched_generations: list[ChatGeneration] | None = None + for index, generation in enumerate(result.generations): + choice = choices[index] if index < len(choices) else {} + choice_message = choice.get("message", {}) if isinstance(choice, Mapping) else {} + reasoning = _extract_reasoning(choice_message) + + if reasoning is _MISSING and not isinstance(response, dict): + reasoning = _extract_reasoning(_get_typed_choice_message(response, index)) + + message = generation.message + if reasoning is not _MISSING and isinstance(message, AIMessage): + if patched_generations is None: + patched_generations = list(result.generations) + patched_generations[index] = ChatGeneration( + message=_with_reasoning_content(message, reasoning), + generation_info=generation.generation_info, + ) + + return ChatResult( + generations=patched_generations or result.generations, + llm_output=result.llm_output, + ) diff --git a/backend/tests/test_patched_stepfun.py b/backend/tests/test_patched_stepfun.py new file mode 100644 index 000000000..cc6221695 --- /dev/null +++ b/backend/tests/test_patched_stepfun.py @@ -0,0 +1,305 @@ +"""Tests for deerflow.models.patched_stepfun.PatchedChatStepFun.""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from langchain_core.messages import AIMessage, AIMessageChunk, HumanMessage + + +def _make_model(**kwargs): + from deerflow.models.patched_stepfun import PatchedChatStepFun + + return PatchedChatStepFun( + model="step-3.7-flash", + api_key="test-key", + base_url="https://api.stepfun.com/v1", + **kwargs, + ) + + +# --------------------------------------------------------------------------- +# Basic properties +# --------------------------------------------------------------------------- + + +def test_is_lc_serializable_returns_true(): + from deerflow.models.patched_stepfun import PatchedChatStepFun + + assert PatchedChatStepFun.is_lc_serializable() is True + + +def test_lc_secrets_contains_stepfun_api_key_mapping(): + model = _make_model() + assert model.lc_secrets["api_key"] == "STEPFUN_API_KEY" + assert model.lc_secrets["openai_api_key"] == "STEPFUN_API_KEY" + + +# --------------------------------------------------------------------------- +# _extract_reasoning helper +# --------------------------------------------------------------------------- + + +def test_extract_reasoning_from_dict_with_reasoning(): + from deerflow.models.patched_stepfun import _extract_reasoning + + assert _extract_reasoning({"reasoning": "thinking..."}) == "thinking..." + + +def test_extract_reasoning_from_dict_with_reasoning_content(): + from deerflow.models.patched_stepfun import _extract_reasoning + + assert _extract_reasoning({"reasoning_content": "thinking..."}) == "thinking..." + + +def test_extract_reasoning_prefers_reasoning_content_over_reasoning(): + from deerflow.models.patched_stepfun import _extract_reasoning + + result = _extract_reasoning({"reasoning_content": "deepseek", "reasoning": "native"}) + assert result == "deepseek" + + +def test_extract_reasoning_missing_returns_sentinel(): + from deerflow.models.patched_stepfun import _MISSING, _extract_reasoning + + assert _extract_reasoning({}) is _MISSING + assert _extract_reasoning({"reasoning": None}) is _MISSING + + +# --------------------------------------------------------------------------- +# Request payload replay (_get_request_payload) +# --------------------------------------------------------------------------- + + +def test_reasoning_content_injected_into_assistant_tool_call_message(): + model = _make_model() + + human = HumanMessage(content="Check Beijing weather.") + ai = AIMessage( + content="", + additional_kwargs={"reasoning_content": "I need to call the weather tool."}, + ) + payload_message = { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "call_weather", + "type": "function", + "function": {"name": "get_weather", "arguments": '{"location":"Beijing"}'}, + } + ], + } + base_payload = { + "messages": [ + {"role": "user", "content": "Check Beijing weather."}, + payload_message, + ] + } + + with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload): + with patch.object(model, "_convert_input") as mock_convert: + mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai]) + payload = model._get_request_payload([human, ai]) + + assert payload["messages"][1]["reasoning_content"] == "I need to call the weather tool." + + +def test_reasoning_content_is_noop_when_missing(): + model = _make_model() + + human = HumanMessage(content="hello") + ai = AIMessage(content="hi", additional_kwargs={}) + base_payload = { + "messages": [ + {"role": "user", "content": "hello"}, + {"role": "assistant", "content": "hi"}, + ] + } + + with patch.object(type(model).__bases__[0], "_get_request_payload", return_value=base_payload): + with patch.object(model, "_convert_input") as mock_convert: + mock_convert.return_value = MagicMock(to_messages=lambda: [human, ai]) + payload = model._get_request_payload([human, ai]) + + assert "reasoning_content" not in payload["messages"][1] + + +# --------------------------------------------------------------------------- +# Streaming reasoning capture (_convert_chunk_to_generation_chunk) +# --------------------------------------------------------------------------- + + +def test_convert_chunk_captures_reasoning_field(): + """StepFun default format: delta.reasoning.""" + model = _make_model() + + chunk = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"role": "assistant", "reasoning": "I need "}}]}, + AIMessageChunk, + {}, + ) + + assert chunk is not None + assert chunk.message.additional_kwargs["reasoning_content"] == "I need " + + +def test_convert_chunk_captures_reasoning_content_field(): + """StepFun deepseek-style format: delta.reasoning_content.""" + model = _make_model() + + chunk = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"role": "assistant", "reasoning_content": "I need "}}]}, + AIMessageChunk, + {}, + ) + + assert chunk is not None + assert chunk.message.additional_kwargs["reasoning_content"] == "I need " + + +def test_convert_chunk_streams_reasoning_then_content(): + """Full streaming flow: reasoning deltas followed by content.""" + model = _make_model() + + first = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"role": "assistant", "reasoning": "I need "}}]}, + AIMessageChunk, + {}, + ) + second = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"reasoning": "a tool."}}]}, + AIMessageChunk, + {}, + ) + answer = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"content": "Done."}, "finish_reason": "stop"}], "model": "step-3.7-flash"}, + AIMessageChunk, + {}, + ) + + assert first is not None + assert second is not None + assert answer is not None + + combined = first.message + second.message + answer.message + assert combined.additional_kwargs["reasoning_content"] == "I need a tool." + assert combined.content == "Done." + + +def test_convert_chunk_noop_when_no_reasoning(): + model = _make_model() + + chunk = model._convert_chunk_to_generation_chunk( + {"choices": [{"delta": {"content": "Hello."}, "finish_reason": "stop"}], "model": "step-3.7-flash"}, + AIMessageChunk, + {}, + ) + + assert chunk is not None + assert "reasoning_content" not in chunk.message.additional_kwargs + + +# --------------------------------------------------------------------------- +# Non-streaming reasoning capture (_create_chat_result) +# --------------------------------------------------------------------------- + + +def test_create_chat_result_extracts_reasoning_field(): + """StepFun default format: message.reasoning.""" + model = _make_model() + response = { + "choices": [ + { + "message": { + "role": "assistant", + "content": "The weather is sunny.", + "reasoning": "The tool returned sunny weather.", + }, + "finish_reason": "stop", + } + ], + "model": "step-3.7-flash", + } + + result = model._create_chat_result(response) + message = result.generations[0].message + + assert message.content == "The weather is sunny." + assert message.additional_kwargs["reasoning_content"] == "The tool returned sunny weather." + + +def test_create_chat_result_extracts_reasoning_content_field(): + """StepFun deepseek-style format: message.reasoning_content.""" + model = _make_model() + response = { + "choices": [ + { + "message": { + "role": "assistant", + "content": "The weather is sunny.", + "reasoning_content": "The tool returned sunny weather.", + }, + "finish_reason": "stop", + } + ], + "model": "step-3.7-flash", + } + + result = model._create_chat_result(response) + message = result.generations[0].message + + assert message.content == "The weather is sunny." + assert message.additional_kwargs["reasoning_content"] == "The tool returned sunny weather." + + +def test_create_chat_result_reads_reasoning_from_sdk_object(): + """When the response is a Pydantic model, reasoning is an attribute.""" + model = _make_model() + + class FakeMessage: + reasoning = "Reasoning stored on the SDK message object." + reasoning_content = None + model_extra = None + + class FakeChoice: + message = FakeMessage() + + class FakeResponse: + choices = [FakeChoice()] + + def model_dump(self, **kwargs): + return { + "choices": [ + { + "message": { + "role": "assistant", + "content": "Answer.", + }, + "finish_reason": "stop", + } + ], + "model": "step-3.7-flash", + } + + result = model._create_chat_result(FakeResponse()) + assert result.generations[0].message.additional_kwargs["reasoning_content"] == "Reasoning stored on the SDK message object." + + +def test_create_chat_result_noop_when_no_reasoning(): + model = _make_model() + response = { + "choices": [ + { + "message": { + "role": "assistant", + "content": "Hello!", + }, + "finish_reason": "stop", + } + ], + "model": "step-3.7-flash", + } + + result = model._create_chat_result(response) + assert "reasoning_content" not in result.generations[0].message.additional_kwargs diff --git a/config.example.yaml b/config.example.yaml index 5de11e226..290ef3302 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -274,6 +274,32 @@ models: # thinking: # type: disabled + # Example: StepFun (阶跃星辰) reasoning models + # StepFun provides OpenAI-compatible API with reasoning models. + # With reasoning_format: deepseek-style, the API returns reasoning_content + # (same field as DeepSeek), which must be replayed on historical assistant + # messages in multi-turn tool-call conversations. + # Use PatchedChatStepFun instead of plain ChatOpenAI. + # Docs: https://platform.stepfun.com/docs/api-reference/chat-completions + # - name: step-3.7-flash + # display_name: Step 3.7 Flash + # use: deerflow.models.patched_stepfun:PatchedChatStepFun + # model: step-3.7-flash + # api_key: $STEPFUN_API_KEY + # base_url: https://api.stepfun.com/v1 + # request_timeout: 600.0 + # max_retries: 2 + # max_tokens: 4096 + # supports_thinking: true + # supports_reasoning_effort: true + # supports_vision: true + # when_thinking_enabled: + # extra_body: + # reasoning_format: deepseek-style + # when_thinking_disabled: + # extra_body: + # reasoning_format: deepseek-style + # Example: MiniMax (OpenAI-compatible) - International Edition # MiniMax provides high-performance models with 512K context window and 128K max output # Docs: https://platform.minimax.io/docs/api-reference/text-openai-api