From 194bab469143f5dc370513144800705d3c4467ab Mon Sep 17 00:00:00 2001
From: shivam johri <shivamjohri.theking@gmail.com>
Date: Thu, 9 Apr 2026 16:19:00 +0530
Subject: [PATCH] feat(config): add when_thinking_disabled support for model
 configs (#1970)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat(config): add when_thinking_disabled support for model configs

Allow users to explicitly configure what parameters are sent to the
model when thinking is disabled, via a new `when_thinking_disabled`
field in model config. This mirrors the existing `when_thinking_enabled`
pattern and takes full precedence over the hardcoded disable behavior
when set. Backwards compatible — existing configs work unchanged.

Closes #1675

* fix(config): address copilot review — gate when_thinking_disabled independently

- Switch truthiness check to `is not None` so empty dict overrides work
- Restructure disable path so when_thinking_disabled is gated independently
  of has_thinking_settings, allowing it to work without when_thinking_enabled
- Update test to reflect new behavior
---
 .../harness/deerflow/config/model_config.py   |   4 +
 .../harness/deerflow/models/factory.py        |  12 +-
 backend/tests/test_model_factory.py           | 132 ++++++++++++++++++
 config.example.yaml                           |  25 +++-
 4 files changed, 168 insertions(+), 5 deletions(-)

diff --git a/backend/packages/harness/deerflow/config/model_config.py b/backend/packages/harness/deerflow/config/model_config.py
index cb6a8b6a0..e9a3e1c16 100644
--- a/backend/packages/harness/deerflow/config/model_config.py
+++ b/backend/packages/harness/deerflow/config/model_config.py
@@ -27,6 +27,10 @@ class ModelConfig(BaseModel):
         default_factory=lambda: None,
         description="Extra settings to be passed to the model when thinking is enabled",
     )
+    when_thinking_disabled: dict | None = Field(
+        default_factory=lambda: None,
+        description="Extra settings to be passed to the model when thinking is disabled",
+    )
     supports_vision: bool = Field(default_factory=lambda: False, description="Whether the model supports vision/image inputs")
     thinking: dict | None = Field(
         default_factory=lambda: None,
diff --git a/backend/packages/harness/deerflow/models/factory.py b/backend/packages/harness/deerflow/models/factory.py
index 900bb71cc..a47f46d73 100644
--- a/backend/packages/harness/deerflow/models/factory.py
+++ b/backend/packages/harness/deerflow/models/factory.py
@@ -56,6 +56,7 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, *
             "supports_thinking",
             "supports_reasoning_effort",
             "when_thinking_enabled",
+            "when_thinking_disabled",
             "thinking",
             "supports_vision",
         },
@@ -72,21 +73,24 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, *
             raise ValueError(f"Model {name} does not support thinking. Set `supports_thinking` to true in the `config.yaml` to enable thinking.") from None
         if effective_wte:
             model_settings_from_config.update(effective_wte)
-    if not thinking_enabled and has_thinking_settings:
-        if effective_wte.get("extra_body", {}).get("thinking", {}).get("type"):
+    if not thinking_enabled:
+        if model_config.when_thinking_disabled is not None:
+            # User-provided disable settings take full precedence
+            model_settings_from_config.update(model_config.when_thinking_disabled)
+        elif has_thinking_settings and effective_wte.get("extra_body", {}).get("thinking", {}).get("type"):
             # OpenAI-compatible gateway: thinking is nested under extra_body
             model_settings_from_config["extra_body"] = _deep_merge_dicts(
                 model_settings_from_config.get("extra_body"),
                 {"thinking": {"type": "disabled"}},
             )
             model_settings_from_config["reasoning_effort"] = "minimal"
-        elif disable_chat_template_kwargs := _vllm_disable_chat_template_kwargs(effective_wte.get("extra_body", {}).get("chat_template_kwargs") or {}):
+        elif has_thinking_settings and (disable_chat_template_kwargs := _vllm_disable_chat_template_kwargs(effective_wte.get("extra_body", {}).get("chat_template_kwargs") or {})):
             # vLLM uses chat template kwargs to switch thinking on/off.
             model_settings_from_config["extra_body"] = _deep_merge_dicts(
                 model_settings_from_config.get("extra_body"),
                 {"chat_template_kwargs": disable_chat_template_kwargs},
             )
-        elif effective_wte.get("thinking", {}).get("type"):
+        elif has_thinking_settings and effective_wte.get("thinking", {}).get("type"):
             # Native langchain_anthropic: thinking is a direct constructor parameter
             model_settings_from_config["thinking"] = {"type": "disabled"}
     if not model_config.supports_reasoning_effort:
diff --git a/backend/tests/test_model_factory.py b/backend/tests/test_model_factory.py
index 573b2fc58..9bb6915b0 100644
--- a/backend/tests/test_model_factory.py
+++ b/backend/tests/test_model_factory.py
@@ -30,6 +30,7 @@ def _make_model(
     supports_thinking: bool = False,
     supports_reasoning_effort: bool = False,
     when_thinking_enabled: dict | None = None,
+    when_thinking_disabled: dict | None = None,
     thinking: dict | None = None,
     max_tokens: int | None = None,
 ) -> ModelConfig:
@@ -43,6 +44,7 @@ def _make_model(
         supports_thinking=supports_thinking,
         supports_reasoning_effort=supports_reasoning_effort,
         when_thinking_enabled=when_thinking_enabled,
+        when_thinking_disabled=when_thinking_disabled,
         thinking=thinking,
         supports_vision=False,
     )
@@ -244,6 +246,136 @@ def test_thinking_disabled_no_when_thinking_enabled_does_nothing(monkeypatch):
     assert captured.get("reasoning_effort") is None
 
 
+# ---------------------------------------------------------------------------
+# when_thinking_disabled config
+# ---------------------------------------------------------------------------
+
+
+def test_when_thinking_disabled_takes_precedence_over_hardcoded_disable(monkeypatch):
+    """When when_thinking_disabled is set, it takes full precedence over the
+    hardcoded disable logic (extra_body.thinking.type=disabled etc.)."""
+    wte = {"extra_body": {"thinking": {"type": "enabled", "budget_tokens": 10000}}}
+    wtd = {"extra_body": {"thinking": {"type": "disabled"}}, "reasoning_effort": "low"}
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "custom-disable",
+                supports_thinking=True,
+                supports_reasoning_effort=True,
+                when_thinking_enabled=wte,
+                when_thinking_disabled=wtd,
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg)
+
+    captured: dict = {}
+
+    class CapturingModel(FakeChatModel):
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            BaseChatModel.__init__(self, **kwargs)
+
+    monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
+
+    factory_module.create_chat_model(name="custom-disable", thinking_enabled=False)
+
+    assert captured.get("extra_body") == {"thinking": {"type": "disabled"}}
+    # User overrode the hardcoded "minimal" with "low"
+    assert captured.get("reasoning_effort") == "low"
+
+
+def test_when_thinking_disabled_not_used_when_thinking_enabled(monkeypatch):
+    """when_thinking_disabled must have no effect when thinking_enabled=True."""
+    wte = {"extra_body": {"thinking": {"type": "enabled"}}}
+    wtd = {"extra_body": {"thinking": {"type": "disabled"}}}
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "wtd-ignored",
+                supports_thinking=True,
+                when_thinking_enabled=wte,
+                when_thinking_disabled=wtd,
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg)
+
+    captured: dict = {}
+
+    class CapturingModel(FakeChatModel):
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            BaseChatModel.__init__(self, **kwargs)
+
+    monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
+
+    factory_module.create_chat_model(name="wtd-ignored", thinking_enabled=True)
+
+    # when_thinking_enabled should apply, NOT when_thinking_disabled
+    assert captured.get("extra_body") == {"thinking": {"type": "enabled"}}
+
+
+def test_when_thinking_disabled_without_when_thinking_enabled_still_applies(monkeypatch):
+    """when_thinking_disabled alone (no when_thinking_enabled) should still apply its settings."""
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "wtd-only",
+                supports_thinking=True,
+                supports_reasoning_effort=True,
+                when_thinking_disabled={"reasoning_effort": "low"},
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg)
+
+    captured: dict = {}
+
+    class CapturingModel(FakeChatModel):
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            BaseChatModel.__init__(self, **kwargs)
+
+    monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
+
+    factory_module.create_chat_model(name="wtd-only", thinking_enabled=False)
+
+    # when_thinking_disabled is now gated independently of has_thinking_settings
+    assert captured.get("reasoning_effort") == "low"
+
+
+def test_when_thinking_disabled_excluded_from_model_dump(monkeypatch):
+    """when_thinking_disabled must not leak into the model constructor kwargs."""
+    wte = {"extra_body": {"thinking": {"type": "enabled"}}}
+    wtd = {"extra_body": {"thinking": {"type": "disabled"}}}
+    cfg = _make_app_config(
+        [
+            _make_model(
+                "no-leak-wtd",
+                supports_thinking=True,
+                when_thinking_enabled=wte,
+                when_thinking_disabled=wtd,
+            )
+        ]
+    )
+    _patch_factory(monkeypatch, cfg)
+
+    captured: dict = {}
+
+    class CapturingModel(FakeChatModel):
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            BaseChatModel.__init__(self, **kwargs)
+
+    monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
+
+    factory_module.create_chat_model(name="no-leak-wtd", thinking_enabled=True)
+
+    # when_thinking_disabled value must NOT appear as a raw key
+    assert "when_thinking_disabled" not in captured
+
+
 # ---------------------------------------------------------------------------
 # reasoning_effort stripping
 # ---------------------------------------------------------------------------
diff --git a/config.example.yaml b/config.example.yaml
index 7edfe60ae..933f20a4f 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -12,7 +12,7 @@
 # ============================================================================
 # Bump this number when the config schema changes.
 # Run `make config-upgrade` to merge new fields into your local config.yaml.
-config_version: 5
+config_version: 6
 
 # ============================================================================
 # Logging
@@ -50,6 +50,10 @@ models:
   #     extra_body:
   #       thinking:
   #         type: enabled
+  #   when_thinking_disabled:
+  #     extra_body:
+  #       thinking:
+  #         type: disabled
 
   # Example: OpenAI model
   # - name: gpt-4
@@ -88,6 +92,9 @@ models:
   #   when_thinking_enabled:
   #     thinking:
   #       type: enabled
+  #   when_thinking_disabled:
+  #     thinking:
+  #       type: disabled
 
   # Example: Google Gemini model (native SDK, no thinking support)
   # - name: gemini-2.5-pro
@@ -120,6 +127,10 @@ models:
   #     extra_body:
   #       thinking:
   #         type: enabled
+  #   when_thinking_disabled:
+  #     extra_body:
+  #       thinking:
+  #         type: disabled
 
   # Example: DeepSeek model (with thinking support)
   # - name: deepseek-v3
@@ -136,6 +147,10 @@ models:
   #     extra_body:
   #       thinking:
   #         type: enabled
+  #   when_thinking_disabled:
+  #     extra_body:
+  #       thinking:
+  #         type: disabled
 
   # Example: Kimi K2.5 model
   # - name: kimi-k2.5
@@ -153,6 +168,10 @@ models:
   #     extra_body:
   #       thinking:
   #         type: enabled
+  #   when_thinking_disabled:
+  #     extra_body:
+  #       thinking:
+  #         type: disabled
 
   # Example: Novita AI (OpenAI-compatible)
   # Novita provides an OpenAI-compatible API with competitive pricing
@@ -173,6 +192,10 @@ models:
   #     extra_body:
   #       thinking:
   #         type: enabled
+  #   when_thinking_disabled:
+  #     extra_body:
+  #       thinking:
+  #         type: disabled
 
   # Example: MiniMax (OpenAI-compatible) - International Edition
   # MiniMax provides high-performance models with 204K context window