From 194bab469143f5dc370513144800705d3c4467ab Mon Sep 17 00:00:00 2001 From: shivam johri Date: Thu, 9 Apr 2026 16:19:00 +0530 Subject: [PATCH] feat(config): add when_thinking_disabled support for model configs (#1970) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(config): add when_thinking_disabled support for model configs Allow users to explicitly configure what parameters are sent to the model when thinking is disabled, via a new `when_thinking_disabled` field in model config. This mirrors the existing `when_thinking_enabled` pattern and takes full precedence over the hardcoded disable behavior when set. Backwards compatible — existing configs work unchanged. Closes #1675 * fix(config): address copilot review — gate when_thinking_disabled independently - Switch truthiness check to `is not None` so empty dict overrides work - Restructure disable path so when_thinking_disabled is gated independently of has_thinking_settings, allowing it to work without when_thinking_enabled - Update test to reflect new behavior --- .../harness/deerflow/config/model_config.py | 4 + .../harness/deerflow/models/factory.py | 12 +- backend/tests/test_model_factory.py | 132 ++++++++++++++++++ config.example.yaml | 25 +++- 4 files changed, 168 insertions(+), 5 deletions(-) diff --git a/backend/packages/harness/deerflow/config/model_config.py b/backend/packages/harness/deerflow/config/model_config.py index cb6a8b6a0..e9a3e1c16 100644 --- a/backend/packages/harness/deerflow/config/model_config.py +++ b/backend/packages/harness/deerflow/config/model_config.py @@ -27,6 +27,10 @@ class ModelConfig(BaseModel): default_factory=lambda: None, description="Extra settings to be passed to the model when thinking is enabled", ) + when_thinking_disabled: dict | None = Field( + default_factory=lambda: None, + description="Extra settings to be passed to the model when thinking is disabled", + ) supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs") thinking: dict | None = Field( default_factory=lambda: None, diff --git a/backend/packages/harness/deerflow/models/factory.py b/backend/packages/harness/deerflow/models/factory.py index 900bb71cc..a47f46d73 100644 --- a/backend/packages/harness/deerflow/models/factory.py +++ b/backend/packages/harness/deerflow/models/factory.py @@ -56,6 +56,7 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * "supports_thinking", "supports_reasoning_effort", "when_thinking_enabled", + "when_thinking_disabled", "thinking", "supports_vision", }, @@ -72,21 +73,24 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * raise ValueError(f"Model {name} does not support thinking. Set `supports_thinking` to true in the `config.yaml` to enable thinking.") from None if effective_wte: model_settings_from_config.update(effective_wte) - if not thinking_enabled and has_thinking_settings: - if effective_wte.get("extra_body", {}).get("thinking", {}).get("type"): + if not thinking_enabled: + if model_config.when_thinking_disabled is not None: + # User-provided disable settings take full precedence + model_settings_from_config.update(model_config.when_thinking_disabled) + elif has_thinking_settings and effective_wte.get("extra_body", {}).get("thinking", {}).get("type"): # OpenAI-compatible gateway: thinking is nested under extra_body model_settings_from_config["extra_body"] = _deep_merge_dicts( model_settings_from_config.get("extra_body"), {"thinking": {"type": "disabled"}}, ) model_settings_from_config["reasoning_effort"] = "minimal" - elif disable_chat_template_kwargs := _vllm_disable_chat_template_kwargs(effective_wte.get("extra_body", {}).get("chat_template_kwargs") or {}): + elif has_thinking_settings and (disable_chat_template_kwargs := _vllm_disable_chat_template_kwargs(effective_wte.get("extra_body", {}).get("chat_template_kwargs") or {})): # vLLM uses chat template kwargs to switch thinking on/off. model_settings_from_config["extra_body"] = _deep_merge_dicts( model_settings_from_config.get("extra_body"), {"chat_template_kwargs": disable_chat_template_kwargs}, ) - elif effective_wte.get("thinking", {}).get("type"): + elif has_thinking_settings and effective_wte.get("thinking", {}).get("type"): # Native langchain_anthropic: thinking is a direct constructor parameter model_settings_from_config["thinking"] = {"type": "disabled"} if not model_config.supports_reasoning_effort: diff --git a/backend/tests/test_model_factory.py b/backend/tests/test_model_factory.py index 573b2fc58..9bb6915b0 100644 --- a/backend/tests/test_model_factory.py +++ b/backend/tests/test_model_factory.py @@ -30,6 +30,7 @@ def _make_model( supports_thinking: bool = False, supports_reasoning_effort: bool = False, when_thinking_enabled: dict | None = None, + when_thinking_disabled: dict | None = None, thinking: dict | None = None, max_tokens: int | None = None, ) -> ModelConfig: @@ -43,6 +44,7 @@ def _make_model( supports_thinking=supports_thinking, supports_reasoning_effort=supports_reasoning_effort, when_thinking_enabled=when_thinking_enabled, + when_thinking_disabled=when_thinking_disabled, thinking=thinking, supports_vision=False, ) @@ -244,6 +246,136 @@ def test_thinking_disabled_no_when_thinking_enabled_does_nothing(monkeypatch): assert captured.get("reasoning_effort") is None +# --------------------------------------------------------------------------- +# when_thinking_disabled config +# --------------------------------------------------------------------------- + + +def test_when_thinking_disabled_takes_precedence_over_hardcoded_disable(monkeypatch): + """When when_thinking_disabled is set, it takes full precedence over the + hardcoded disable logic (extra_body.thinking.type=disabled etc.).""" + wte = {"extra_body": {"thinking": {"type": "enabled", "budget_tokens": 10000}}} + wtd = {"extra_body": {"thinking": {"type": "disabled"}}, "reasoning_effort": "low"} + cfg = _make_app_config( + [ + _make_model( + "custom-disable", + supports_thinking=True, + supports_reasoning_effort=True, + when_thinking_enabled=wte, + when_thinking_disabled=wtd, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="custom-disable", thinking_enabled=False) + + assert captured.get("extra_body") == {"thinking": {"type": "disabled"}} + # User overrode the hardcoded "minimal" with "low" + assert captured.get("reasoning_effort") == "low" + + +def test_when_thinking_disabled_not_used_when_thinking_enabled(monkeypatch): + """when_thinking_disabled must have no effect when thinking_enabled=True.""" + wte = {"extra_body": {"thinking": {"type": "enabled"}}} + wtd = {"extra_body": {"thinking": {"type": "disabled"}}} + cfg = _make_app_config( + [ + _make_model( + "wtd-ignored", + supports_thinking=True, + when_thinking_enabled=wte, + when_thinking_disabled=wtd, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="wtd-ignored", thinking_enabled=True) + + # when_thinking_enabled should apply, NOT when_thinking_disabled + assert captured.get("extra_body") == {"thinking": {"type": "enabled"}} + + +def test_when_thinking_disabled_without_when_thinking_enabled_still_applies(monkeypatch): + """when_thinking_disabled alone (no when_thinking_enabled) should still apply its settings.""" + cfg = _make_app_config( + [ + _make_model( + "wtd-only", + supports_thinking=True, + supports_reasoning_effort=True, + when_thinking_disabled={"reasoning_effort": "low"}, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="wtd-only", thinking_enabled=False) + + # when_thinking_disabled is now gated independently of has_thinking_settings + assert captured.get("reasoning_effort") == "low" + + +def test_when_thinking_disabled_excluded_from_model_dump(monkeypatch): + """when_thinking_disabled must not leak into the model constructor kwargs.""" + wte = {"extra_body": {"thinking": {"type": "enabled"}}} + wtd = {"extra_body": {"thinking": {"type": "disabled"}}} + cfg = _make_app_config( + [ + _make_model( + "no-leak-wtd", + supports_thinking=True, + when_thinking_enabled=wte, + when_thinking_disabled=wtd, + ) + ] + ) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="no-leak-wtd", thinking_enabled=True) + + # when_thinking_disabled value must NOT appear as a raw key + assert "when_thinking_disabled" not in captured + + # --------------------------------------------------------------------------- # reasoning_effort stripping # --------------------------------------------------------------------------- diff --git a/config.example.yaml b/config.example.yaml index 7edfe60ae..933f20a4f 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -12,7 +12,7 @@ # ============================================================================ # Bump this number when the config schema changes. # Run `make config-upgrade` to merge new fields into your local config.yaml. -config_version: 5 +config_version: 6 # ============================================================================ # Logging @@ -50,6 +50,10 @@ models: # extra_body: # thinking: # type: enabled + # when_thinking_disabled: + # extra_body: + # thinking: + # type: disabled # Example: OpenAI model # - name: gpt-4 @@ -88,6 +92,9 @@ models: # when_thinking_enabled: # thinking: # type: enabled + # when_thinking_disabled: + # thinking: + # type: disabled # Example: Google Gemini model (native SDK, no thinking support) # - name: gemini-2.5-pro @@ -120,6 +127,10 @@ models: # extra_body: # thinking: # type: enabled + # when_thinking_disabled: + # extra_body: + # thinking: + # type: disabled # Example: DeepSeek model (with thinking support) # - name: deepseek-v3 @@ -136,6 +147,10 @@ models: # extra_body: # thinking: # type: enabled + # when_thinking_disabled: + # extra_body: + # thinking: + # type: disabled # Example: Kimi K2.5 model # - name: kimi-k2.5 @@ -153,6 +168,10 @@ models: # extra_body: # thinking: # type: enabled + # when_thinking_disabled: + # extra_body: + # thinking: + # type: disabled # Example: Novita AI (OpenAI-compatible) # Novita provides an OpenAI-compatible API with competitive pricing @@ -173,6 +192,10 @@ models: # extra_body: # thinking: # type: enabled + # when_thinking_disabled: + # extra_body: + # thinking: + # type: disabled # Example: MiniMax (OpenAI-compatible) - International Edition # MiniMax provides high-performance models with 204K context window