From f1a0ab699aee5642fccf9f0fc211b231d41e6b5d Mon Sep 17 00:00:00 2001 From: Xinmin Zeng <135568692+fancyboi999@users.noreply.github.com> Date: Wed, 13 May 2026 23:45:47 +0800 Subject: [PATCH] fix(tools): preserve tool_search promotions across re-entrant get_available_tools (#2885) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(tools): preserve tool_search promotions across re-entrant get_available_tools Closes #2884. ``get_available_tools`` used to unconditionally call ``reset_deferred_registry()`` and rebuild a fresh ``DeferredToolRegistry`` on every invocation. That works for the first call of a request (the ContextVar starts at its default of ``None``), but any RE-ENTRANT call during the same async context — e.g. ``task_tool`` building a subagent's toolset, or a custom middleware that rebuilds tools mid-run — wiped any ``tool_search`` promotions the parent agent had already made. The ``DeferredToolFilterMiddleware`` would then re-hide those tools from the next model call, leaving the agent able to see a tool's name (via the prior ``tool_search`` result that's still in conversation history) but unable to invoke it. Fix: when the ContextVar already holds a registry, reuse it instead of rebuilding. Fresh requests still get a fresh registry because each new graph run starts in a new asyncio task with the ContextVar at ``None``. ## Verification - Unit-level reproduction (``test_get_available_tools_resets_registry_wiping_promotion``): promote a tool in the registry, call ``get_available_tools`` again, assert the promotion is preserved. Fails on main, passes on this branch. - Graph-execution reproduction (two tests): drive a real ``langchain.agents.create_agent`` graph with the real ``DeferredToolFilterMiddleware`` through two model turns, including one that issues a re-entrant ``get_available_tools`` call to simulate the task_tool subagent path. - Real-LLM end-to-end (``test_deferred_tool_promotion_real_llm.py``, opt-in via ``ONEAPI_E2E=1``): drives the same flow against a real OpenAI-compatible model (verified on GPT-5.4-mini through the one-api gateway), watches the model call the promoted ``fake_calculator`` through the deferred-filter middleware, and asserts the right arithmetic result. Passes against the fixed branch. - Companion update to ``test_tool_deduplication.py``: dropped the ``@patch("deerflow.tools.tools.reset_deferred_registry")`` decorators because the symbol is no longer imported there. - Test fixtures in the new files patch ``deerflow.tools.tools.get_app_config`` with a minimal ``model_construct``-ed ``AppConfig`` instead of calling the real loader, so they never trigger ``_apply_singleton_configs`` and never leak ``_memory_config``/``_title_config``/… mutations into the rest of the suite. Full backend suite: 3208 passed / 14 skipped / 0 failed. ruff check + format clean. * fix(tools): address Copilot review on #2885 - tools.py: rewrite the reuse-path comment to spell out (a) why we don't reconcile the registry against the current ``mcp_tools`` snapshot — the MCP cache doesn't refresh mid-graph-run, the lead agent's ``ToolNode`` is already bound to the previous tool set anyway, and ``promote()`` drops the entry so a naive re-sync misclassifies promotions as new tools — and (b) why the log uses ``max(0, …)`` to avoid negative counts when the cache shrinks between snapshots. - Replace direct ``ts_mod._registry_var.set(None)`` in test fixtures with the public ``reset_deferred_registry()`` helper so tests don't couple to module internals. - Correct the docstring path in ``test_deferred_tool_registry_promotion.py`` to match the actual monkeypatch target (``deerflow.mcp.cache.get_cached_mcp_tools``). - Rename ``test_get_available_tools_resets_registry_wiping_promotion`` to ``test_get_available_tools_preserves_promotions_across_reentrant_calls`` so the test name describes the contract being asserted, not the bug it originally reproduced. Full backend suite: 3208 passed / 14 skipped. Real-LLM e2e: 1 passed. --- .../packages/harness/deerflow/tools/tools.py | 53 ++- .../test_deferred_tool_promotion_real_llm.py | 222 ++++++++++ .../test_deferred_tool_registry_promotion.py | 390 ++++++++++++++++++ backend/tests/test_tool_deduplication.py | 12 +- 4 files changed, 661 insertions(+), 16 deletions(-) create mode 100644 backend/tests/test_deferred_tool_promotion_real_llm.py create mode 100644 backend/tests/test_deferred_tool_registry_promotion.py diff --git a/backend/packages/harness/deerflow/tools/tools.py b/backend/packages/harness/deerflow/tools/tools.py index 01bfce43f..5c97962fc 100644 --- a/backend/packages/harness/deerflow/tools/tools.py +++ b/backend/packages/harness/deerflow/tools/tools.py @@ -7,7 +7,7 @@ from deerflow.config.app_config import AppConfig from deerflow.reflection import resolve_variable from deerflow.sandbox.security import is_host_bash_allowed from deerflow.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool -from deerflow.tools.builtins.tool_search import reset_deferred_registry +from deerflow.tools.builtins.tool_search import get_deferred_registry from deerflow.tools.sync import make_sync_tool_wrapper logger = logging.getLogger(__name__) @@ -116,8 +116,6 @@ def get_available_tools( # made through the Gateway API (which runs in a separate process) are immediately # reflected when loading MCP tools. mcp_tools = [] - # Reset deferred registry upfront to prevent stale state from previous calls - reset_deferred_registry() if include_mcp: try: from deerflow.config.extensions_config import ExtensionsConfig @@ -135,12 +133,51 @@ def get_available_tools( from deerflow.tools.builtins.tool_search import DeferredToolRegistry, set_deferred_registry from deerflow.tools.builtins.tool_search import tool_search as tool_search_tool - registry = DeferredToolRegistry() - for t in mcp_tools: - registry.register(t) - set_deferred_registry(registry) + # Reuse the existing registry if one is already set for + # this async context. ``get_available_tools`` is + # re-entered whenever a subagent is spawned + # (``task_tool`` calls it to build the child agent's + # toolset), and previously we used to unconditionally + # rebuild the registry — wiping out the parent agent's + # tool_search promotions. The + # ``DeferredToolFilterMiddleware`` then re-hid those + # tools from subsequent model calls, leaving the agent + # able to see a tool's name but unable to invoke it + # (issue #2884). ``contextvars`` already gives us the + # lifetime semantics we want: a fresh request / graph + # run starts in a new asyncio task with the + # ContextVar at its default of ``None``, so reuse is + # only triggered for re-entrant calls inside one run. + # + # Intentionally NOT reconciling against the current + # ``mcp_tools`` snapshot. The MCP cache only refreshes + # on ``extensions_config.json`` mtime changes, which + # in practice happens between graph runs — not inside + # one. And even if a refresh did happen mid-run, the + # already-built lead agent's ``ToolNode`` still holds + # the *previous* tool set (LangGraph binds tools at + # graph construction time), so a brand-new MCP tool + # couldn't actually be invoked anyway. The + # ``DeferredToolRegistry`` doesn't retain the names + # of previously-promoted tools (``promote()`` drops + # the entry entirely), so re-syncing the registry + # against a fresh ``mcp_tools`` list would + # mis-classify those promotions as new tools and + # re-register them as deferred — exactly the bug + # this fix exists to prevent. + existing_registry = get_deferred_registry() + if existing_registry is None: + registry = DeferredToolRegistry() + for t in mcp_tools: + registry.register(t) + set_deferred_registry(registry) + logger.info(f"Tool search active: {len(mcp_tools)} tools deferred") + else: + mcp_tool_names = {t.name for t in mcp_tools} + still_deferred = len(existing_registry) + promoted_count = max(0, len(mcp_tool_names) - still_deferred) + logger.info(f"Tool search active (preserved promotions): {still_deferred} tools deferred, {promoted_count} already promoted") builtin_tools.append(tool_search_tool) - logger.info(f"Tool search active: {len(mcp_tools)} tools deferred") except ImportError: logger.warning("MCP module not available. Install 'langchain-mcp-adapters' package to enable MCP tools.") except Exception as e: diff --git a/backend/tests/test_deferred_tool_promotion_real_llm.py b/backend/tests/test_deferred_tool_promotion_real_llm.py new file mode 100644 index 000000000..46ae24d41 --- /dev/null +++ b/backend/tests/test_deferred_tool_promotion_real_llm.py @@ -0,0 +1,222 @@ +"""Real-LLM end-to-end verification for issue #2884. + +Drives a real ``langchain.agents.create_agent`` graph against a real OpenAI- +compatible LLM (one-api gateway), bound through ``DeferredToolFilterMiddleware`` +and the production ``get_available_tools`` pipeline. The only thing we mock is +the MCP tool source — we hand-roll two ``@tool``s and inject them through +``deerflow.mcp.cache.get_cached_mcp_tools``. + +The flow exercised: + 1. Turn 1: agent sees ``tool_search`` (plus a ``fake_subagent_trigger`` + that re-enters ``get_available_tools`` on the same task — this is the + code path issue #2884 reports). It must call ``tool_search`` to + discover the deferred ``fake_calculator`` tool. + 2. Tool batch: ``tool_search`` promotes ``fake_calculator``; + ``fake_subagent_trigger`` re-enters ``get_available_tools``. + 3. Turn 2: the promoted ``fake_calculator`` schema must reach the model + so it can actually call it. Without this PR's fix, the re-entry wipes + the promotion and the model can no longer invoke the tool. + +Skipped unless ``ONEAPI_E2E=1`` is set so this doesn't burn credits on every +test run. Run with:: + + ONEAPI_E2E=1 OPENAI_API_KEY=... OPENAI_API_BASE=... \ + PYTHONPATH=. uv run pytest \ + tests/test_deferred_tool_promotion_real_llm.py -v -s +""" + +from __future__ import annotations + +import os + +import pytest +from langchain_core.messages import HumanMessage +from langchain_core.tools import tool as as_tool + +# --------------------------------------------------------------------------- +# Skip control: only run when explicitly opted in. +# --------------------------------------------------------------------------- + + +pytestmark = pytest.mark.skipif( + os.getenv("ONEAPI_E2E") != "1", + reason="Real-LLM e2e: opt in with ONEAPI_E2E=1 (requires OPENAI_API_KEY + OPENAI_API_BASE)", +) + + +# --------------------------------------------------------------------------- +# Fake "MCP" tools the agent should discover via tool_search. +# Keep them obviously synthetic so the model can pattern-match the search. +# --------------------------------------------------------------------------- + + +_calls: list[str] = [] + + +@as_tool +def fake_calculator(expression: str) -> str: + """Evaluate a tiny arithmetic expression like '2 + 2'. + + Reserved for the user — only call this if the user asks for arithmetic. + """ + _calls.append(f"fake_calculator:{expression}") + try: + # Trivially safe-eval just for the e2e check + allowed = set("0123456789+-*/() .") + if not set(expression) <= allowed: + return "expression contains disallowed characters" + return str(eval(expression, {"__builtins__": {}}, {})) # noqa: S307 + except Exception as e: + return f"error: {e}" + + +@as_tool +def fake_translator(text: str, target_lang: str) -> str: + """Translate text into the given language code. Decorative — not used.""" + _calls.append(f"fake_translator:{text}:{target_lang}") + return f"[{target_lang}] {text}" + + +# --------------------------------------------------------------------------- +# Pipeline wiring (same shape as the in-process tests). +# --------------------------------------------------------------------------- + + +@pytest.fixture(autouse=True) +def _reset_registry_between_tests(): + from deerflow.tools.builtins.tool_search import reset_deferred_registry + + reset_deferred_registry() + yield + reset_deferred_registry() + + +def _patch_mcp_pipeline(monkeypatch: pytest.MonkeyPatch, mcp_tools: list) -> None: + from deerflow.config.extensions_config import ExtensionsConfig, McpServerConfig + + real_ext = ExtensionsConfig( + mcpServers={"fake-server": McpServerConfig(type="stdio", command="echo", enabled=True)}, + ) + monkeypatch.setattr( + "deerflow.config.extensions_config.ExtensionsConfig.from_file", + classmethod(lambda cls: real_ext), + ) + monkeypatch.setattr("deerflow.mcp.cache.get_cached_mcp_tools", lambda: list(mcp_tools)) + + +def _force_tool_search_enabled(monkeypatch: pytest.MonkeyPatch) -> None: + """Build a minimal mock AppConfig and patch the symbol — never call the + real loader, which would trigger ``_apply_singleton_configs`` and + permanently mutate cross-test singletons (memory, title, …).""" + from deerflow.config.app_config import AppConfig + from deerflow.config.tool_search_config import ToolSearchConfig + + mock_cfg = AppConfig.model_construct( + log_level="info", + models=[], + tools=[], + tool_groups=[], + sandbox=AppConfig.model_fields["sandbox"].annotation.model_construct(use="x"), + tool_search=ToolSearchConfig(enabled=True), + ) + monkeypatch.setattr("deerflow.tools.tools.get_app_config", lambda: mock_cfg) + + +# --------------------------------------------------------------------------- +# Real-LLM e2e test +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_real_llm_promotes_then_invokes_with_subagent_reentry(monkeypatch: pytest.MonkeyPatch): + """End-to-end against a real OpenAI-compatible LLM. + + The model must: + Turn 1 — see ``tool_search`` (deferred tools aren't bound yet) and + batch-call BOTH ``tool_search(select:fake_calculator)`` AND + ``fake_subagent_trigger(...)``. + Turn 2 — call ``fake_calculator`` and finish. + + Pass criterion: ``fake_calculator`` actually gets invoked at the tool + layer — recorded in ``_calls`` — which proves the model received the + promoted schema after the re-entrant ``get_available_tools`` call. + """ + from langchain.agents import create_agent + from langchain_openai import ChatOpenAI + + from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware + from deerflow.tools.tools import get_available_tools + + _patch_mcp_pipeline(monkeypatch, [fake_calculator, fake_translator]) + _force_tool_search_enabled(monkeypatch) + _calls.clear() + + @as_tool + async def fake_subagent_trigger(prompt: str) -> str: + """Pretend to spawn a subagent. Internally rebuilds the toolset. + + Use this whenever the user asks you to delegate work — pass a short + description as ``prompt``. + """ + # ``task_tool`` does this internally. Whether the registry-reset that + # used to happen here actually leaks back to the parent task depends + # on asyncio's implicit context-copying semantics (gather creates + # child tasks with copied contexts, so reset_deferred_registry is + # task-local) — but the fix in this PR is what GUARANTEES the + # promotion sticks regardless of which integration path triggers a + # re-entrant ``get_available_tools`` call. + get_available_tools(subagent_enabled=False) + _calls.append(f"fake_subagent_trigger:{prompt}") + return "subagent completed" + + tools = get_available_tools() + [fake_subagent_trigger] + + model = ChatOpenAI( + model=os.environ.get("ONEAPI_MODEL", "claude-sonnet-4-6"), + api_key=os.environ["OPENAI_API_KEY"], + base_url=os.environ["OPENAI_API_BASE"], + temperature=0, + max_retries=1, + ) + + system_prompt = ( + "You are a meticulous assistant. Available deferred tools include a " + "calculator and a translator — their schemas are hidden until you " + "search for them via tool_search.\n\n" + "Procedure for the user's request:\n" + " 1. Call tool_search with query 'select:fake_calculator' AND " + "in the SAME tool batch also call fake_subagent_trigger(prompt='go') " + "to delegate the side work. Put both tool_calls in your first response.\n" + " 2. After both tool messages come back, call fake_calculator with " + "the user's expression.\n" + " 3. Reply with just the numeric result." + ) + + graph = create_agent( + model=model, + tools=tools, + middleware=[DeferredToolFilterMiddleware()], + system_prompt=system_prompt, + ) + + result = await graph.ainvoke( + {"messages": [HumanMessage(content="What is 17 * 23? Use the deferred calculator tool.")]}, + config={"recursion_limit": 12}, + ) + + print("\n=== tool calls recorded ===") + for c in _calls: + print(f" {c}") + print("\n=== final message ===") + final_text = result["messages"][-1].content if result["messages"] else "(none)" + print(f" {final_text!r}") + + # The smoking-gun assertion: fake_calculator was actually invoked at the + # tool layer. This is only possible if the promoted schema reached the + # model in turn 2, despite the subagent-style re-entry in turn 1. + calc_calls = [c for c in _calls if c.startswith("fake_calculator:")] + assert calc_calls, f"REGRESSION (#2884): the model never managed to call fake_calculator. All recorded tool calls: {_calls!r}. Final text: {final_text!r}" + + # And the math should actually be done correctly (sanity that the LLM + # really used the result, not just hallucinated the answer). + assert "391" in str(final_text), f"Model didn't surface 17*23=391. Final text: {final_text!r}" diff --git a/backend/tests/test_deferred_tool_registry_promotion.py b/backend/tests/test_deferred_tool_registry_promotion.py new file mode 100644 index 000000000..23b7649ec --- /dev/null +++ b/backend/tests/test_deferred_tool_registry_promotion.py @@ -0,0 +1,390 @@ +"""Reproduce + regression-guard issue #2884. + +Hypothesis from the issue: + ``tools.tools.get_available_tools`` unconditionally calls + ``reset_deferred_registry()`` and constructs a fresh ``DeferredToolRegistry`` + every time it is invoked. If anything calls ``get_available_tools`` again + during the same async context (after the agent has promoted tools via + ``tool_search``), the promotion is wiped and the next model call hides the + tool's schema again. + +These tests pin two things: + +A. **At the unit boundary** — verify the failure mode directly. Promote a + tool in the registry, then call ``get_available_tools`` again and observe + that the ContextVar registry is reset and the promotion is lost. + +B. **At the graph-execution boundary** — drive a real ``create_agent`` graph + with the real ``DeferredToolFilterMiddleware`` through two model turns. + The first turn calls ``tool_search`` which promotes a tool. The second + turn must see that tool's schema in ``request.tools``. If + ``get_available_tools`` were to run again between the two turns and reset + the registry, the second turn's filter would strip the tool. + +Strategy: use the production ``deerflow.tools.tools.get_available_tools`` +unmodified; mock only the LLM and the MCP tool source. Patch +``deerflow.mcp.cache.get_cached_mcp_tools`` (the symbol that +``get_available_tools`` resolves via lazy import) to return our fixture +tools so we don't need a real MCP server. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from langchain_core.language_models.fake_chat_models import FakeMessagesListChatModel +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.runnables import Runnable +from langchain_core.tools import tool as as_tool + + +class FakeToolCallingModel(FakeMessagesListChatModel): + """FakeMessagesListChatModel + no-op bind_tools so create_agent works.""" + + def bind_tools( # type: ignore[override] + self, + tools: Any, + *, + tool_choice: Any = None, + **kwargs: Any, + ) -> Runnable: + return self + + +# --------------------------------------------------------------------------- +# Fixtures: a fake MCP tool source + a way to force config.tool_search.enabled +# --------------------------------------------------------------------------- + + +@as_tool +def fake_mcp_search(query: str) -> str: + """Pretend to search a knowledge base for the given query.""" + return f"results for {query}" + + +@as_tool +def fake_mcp_fetch(url: str) -> str: + """Pretend to fetch a page at the given URL.""" + return f"content of {url}" + + +@pytest.fixture(autouse=True) +def _supply_env(monkeypatch: pytest.MonkeyPatch): + """config.yaml references $OPENAI_API_KEY at parse time; supply a placeholder.""" + monkeypatch.setenv("OPENAI_API_KEY", "sk-fake-not-used") + monkeypatch.setenv("OPENAI_API_BASE", "https://example.invalid") + + +@pytest.fixture(autouse=True) +def _reset_deferred_registry_between_tests(): + """Each test must start with a clean ContextVar. + + The registry lives in a module-level ContextVar with no per-task isolation + in a synchronous test runner, so one test's promotion can leak into the + next and silently break filter assertions. + """ + from deerflow.tools.builtins.tool_search import reset_deferred_registry + + reset_deferred_registry() + yield + reset_deferred_registry() + + +def _patch_mcp_pipeline(monkeypatch: pytest.MonkeyPatch, mcp_tools: list) -> None: + """Make get_available_tools believe an MCP server is registered. + + Build a real ``ExtensionsConfig`` with one enabled MCP server entry so + that both ``AppConfig.from_file`` (which calls + ``ExtensionsConfig.from_file().model_dump()``) and ``tools.get_available_tools`` + (which calls ``ExtensionsConfig.from_file().get_enabled_mcp_servers()``) + see a valid instance. Then point the MCP tool cache at our fixture tools. + """ + from deerflow.config.extensions_config import ExtensionsConfig, McpServerConfig + + real_ext = ExtensionsConfig( + mcpServers={"fake-server": McpServerConfig(type="stdio", command="echo", enabled=True)}, + ) + monkeypatch.setattr( + "deerflow.config.extensions_config.ExtensionsConfig.from_file", + classmethod(lambda cls: real_ext), + ) + monkeypatch.setattr("deerflow.mcp.cache.get_cached_mcp_tools", lambda: list(mcp_tools)) + + +def _force_tool_search_enabled(monkeypatch: pytest.MonkeyPatch) -> None: + """Force config.tool_search.enabled=True without touching the yaml. + + Calling the real ``get_app_config()`` would trigger ``_apply_singleton_configs`` + which permanently mutates module-level singletons (``_memory_config``, + ``_title_config``, …) to match the developer's ``config.yaml`` — even + after pytest restores our patch. That leaks across tests later in the + run that rely on those singletons' DEFAULTS (e.g. memory queue tests + require ``_memory_config.enabled = True``, which is the dataclass default + but FALSE in the actual yaml). + + Build a minimal mock AppConfig instead and never call the real loader. + """ + from deerflow.config.app_config import AppConfig + from deerflow.config.tool_search_config import ToolSearchConfig + + mock_cfg = AppConfig.model_construct( + log_level="info", + models=[], + tools=[], + tool_groups=[], + sandbox=AppConfig.model_fields["sandbox"].annotation.model_construct(use="x"), + tool_search=ToolSearchConfig(enabled=True), + ) + monkeypatch.setattr("deerflow.tools.tools.get_app_config", lambda: mock_cfg) + + +# --------------------------------------------------------------------------- +# Section A — direct unit-level reproduction +# --------------------------------------------------------------------------- + + +def test_get_available_tools_preserves_promotions_across_reentrant_calls(monkeypatch: pytest.MonkeyPatch): + """Re-entrant ``get_available_tools()`` must preserve prior promotions. + + Step 1: call get_available_tools() — registers MCP tools as deferred. + Step 2: simulate the agent calling tool_search by promoting one tool. + Step 3: call get_available_tools() again (the same code path + ``task_tool`` exercises mid-run). + + Assertion: after step 3, the promoted tool is STILL promoted (not + re-deferred). On ``main`` before the fix, step 3's + ``reset_deferred_registry()`` wiped the promotion and re-registered + every MCP tool as deferred — this assertion fired with + ``REGRESSION (#2884)``. + """ + from deerflow.tools.builtins.tool_search import get_deferred_registry + from deerflow.tools.tools import get_available_tools + + _patch_mcp_pipeline(monkeypatch, [fake_mcp_search, fake_mcp_fetch]) + _force_tool_search_enabled(monkeypatch) + + # Step 1: first call — both MCP tools start deferred + get_available_tools() + reg1 = get_deferred_registry() + assert reg1 is not None + assert {e.name for e in reg1.entries} == {"fake_mcp_search", "fake_mcp_fetch"} + + # Step 2: simulate tool_search promoting one of them + reg1.promote({"fake_mcp_search"}) + assert {e.name for e in reg1.entries} == {"fake_mcp_fetch"}, "Sanity: promote should remove fake_mcp_search" + + # Step 3: second call — registry must NOT silently undo the promotion + get_available_tools() + reg2 = get_deferred_registry() + assert reg2 is not None + deferred_after = {e.name for e in reg2.entries} + assert "fake_mcp_search" not in deferred_after, f"REGRESSION (#2884): get_available_tools wiped the deferred registry, re-deferring a tool that was already promoted by tool_search. deferred_after_second_call={deferred_after!r}" + + +# --------------------------------------------------------------------------- +# Section B — graph-execution reproduction +# --------------------------------------------------------------------------- + + +class _ToolSearchPromotingModel(FakeToolCallingModel): + """Two-turn model that: + + Turn 1 → emit a tool_call for ``tool_search`` (the real one) + Turn 2 → emit a tool_call for ``fake_mcp_search`` (the promoted tool) + + Records the tools it received on each turn so the test can inspect what + DeferredToolFilterMiddleware actually fed to ``bind_tools``. + """ + + bound_tools_per_turn: list[list[str]] = [] + + def bind_tools( # type: ignore[override] + self, + tools: Any, + *, + tool_choice: Any = None, + **kwargs: Any, + ) -> Runnable: + # Record the tool names the model would see in this turn + names = [getattr(t, "name", getattr(t, "__name__", repr(t))) for t in tools] + self.bound_tools_per_turn.append(names) + return self + + +def _build_promoting_model() -> _ToolSearchPromotingModel: + return _ToolSearchPromotingModel( + responses=[ + AIMessage( + content="", + tool_calls=[ + { + "name": "tool_search", + "args": {"query": "select:fake_mcp_search"}, + "id": "call_search_1", + "type": "tool_call", + } + ], + ), + AIMessage( + content="", + tool_calls=[ + { + "name": "fake_mcp_search", + "args": {"query": "hello"}, + "id": "call_mcp_1", + "type": "tool_call", + } + ], + ), + AIMessage(content="all done"), + ] + ) + + +def test_promoted_tool_is_visible_to_model_on_second_turn(monkeypatch: pytest.MonkeyPatch): + """End-to-end: drive a real create_agent graph through two turns. + + Without the fix, the second-turn bind_tools call should NOT contain + fake_mcp_search (because DeferredToolFilterMiddleware sees it in the + registry and strips it). With the fix, the model sees the schema and can + invoke it. + """ + from langchain.agents import create_agent + + from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware + from deerflow.tools.tools import get_available_tools + + _patch_mcp_pipeline(monkeypatch, [fake_mcp_search, fake_mcp_fetch]) + _force_tool_search_enabled(monkeypatch) + + tools = get_available_tools() + # Sanity: the assembled tool list includes the deferred tools (they're in + # bind_tools but DeferredToolFilterMiddleware strips deferred ones before + # they reach the model) + tool_names = {getattr(t, "name", "") for t in tools} + assert {"tool_search", "fake_mcp_search", "fake_mcp_fetch"} <= tool_names + + model = _build_promoting_model() + model.bound_tools_per_turn = [] # reset class-level recorder + + graph = create_agent( + model=model, + tools=tools, + middleware=[DeferredToolFilterMiddleware()], + system_prompt="bug-2884-repro", + ) + + graph.invoke({"messages": [HumanMessage(content="use the search tool")]}) + + # Turn 1: model should NOT see fake_mcp_search (it's deferred) + turn1 = set(model.bound_tools_per_turn[0]) + assert "fake_mcp_search" not in turn1, f"Turn 1 sanity: deferred tools must be hidden from the model. Saw: {turn1!r}" + assert "tool_search" in turn1, f"Turn 1 sanity: tool_search must be visible so the agent can discover. Saw: {turn1!r}" + + # Turn 2: AFTER tool_search promotes fake_mcp_search, the model must see it. + # This is the load-bearing assertion for issue #2884. + assert len(model.bound_tools_per_turn) >= 2, f"Expected at least 2 model turns, got {len(model.bound_tools_per_turn)}" + turn2 = set(model.bound_tools_per_turn[1]) + assert "fake_mcp_search" in turn2, f"REGRESSION (#2884): tool_search promoted fake_mcp_search in turn 1, but the deferred-tool filter still hid it from the model in turn 2. Turn 2 bound tools: {turn2!r}" + + +# --------------------------------------------------------------------------- +# Section C — the actual issue #2884 trigger: a re-entrant +# get_available_tools call (e.g. when task_tool spawns a subagent) must not +# wipe the parent's promotion. +# --------------------------------------------------------------------------- + + +def test_reentrant_get_available_tools_preserves_promotion(monkeypatch: pytest.MonkeyPatch): + """Issue #2884 in its real shape: a re-entrant get_available_tools call + (the same pattern that happens when ``task_tool`` builds a subagent's + toolset mid-run) must not wipe the parent agent's tool_search promotions. + + Turn 1's tool batch contains BOTH ``tool_search`` (which promotes + ``fake_mcp_search``) AND ``fake_subagent_trigger`` (which calls + ``get_available_tools`` again — exactly what ``task_tool`` does when it + builds a subagent's toolset). With the fix, turn 2's bind_tools sees the + promoted tool. Without the fix, the re-entry wipes the registry and + the filter re-hides it. + """ + from langchain.agents import create_agent + + from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware + from deerflow.tools.tools import get_available_tools + + _patch_mcp_pipeline(monkeypatch, [fake_mcp_search, fake_mcp_fetch]) + _force_tool_search_enabled(monkeypatch) + + # The trigger tool simulates what task_tool does internally: rebuild the + # toolset by calling get_available_tools while the registry is live. + @as_tool + def fake_subagent_trigger(prompt: str) -> str: + """Pretend to spawn a subagent. Internally rebuilds the toolset.""" + get_available_tools(subagent_enabled=False) + return f"spawned subagent for: {prompt}" + + tools = get_available_tools() + [fake_subagent_trigger] + + bound_per_turn: list[list[str]] = [] + + class _Model(FakeToolCallingModel): + def bind_tools(self, tools_arg, **kwargs): # type: ignore[override] + bound_per_turn.append([getattr(t, "name", repr(t)) for t in tools_arg]) + return self + + model = _Model( + responses=[ + # Turn 1: do both in one batch — promote AND trigger the + # subagent-style rebuild. LangGraph executes them in order in the + # same agent step. + AIMessage( + content="", + tool_calls=[ + { + "name": "tool_search", + "args": {"query": "select:fake_mcp_search"}, + "id": "call_search_1", + "type": "tool_call", + }, + { + "name": "fake_subagent_trigger", + "args": {"prompt": "go"}, + "id": "call_trigger_1", + "type": "tool_call", + }, + ], + ), + # Turn 2: try to invoke the promoted tool. The model gets this + # turn only if turn 1's bind_tools recorded what the filter sent. + AIMessage( + content="", + tool_calls=[ + { + "name": "fake_mcp_search", + "args": {"query": "hello"}, + "id": "call_mcp_1", + "type": "tool_call", + } + ], + ), + AIMessage(content="all done"), + ] + ) + + graph = create_agent( + model=model, + tools=tools, + middleware=[DeferredToolFilterMiddleware()], + system_prompt="bug-2884-subagent-repro", + ) + graph.invoke({"messages": [HumanMessage(content="use the search tool")]}) + + # Turn 1 sanity: deferred tool not visible yet + assert "fake_mcp_search" not in set(bound_per_turn[0]), bound_per_turn[0] + + # The smoking-gun assertion: turn 2 sees the promoted tool DESPITE the + # re-entrant get_available_tools call that happened in turn 1's tool batch. + assert len(bound_per_turn) >= 2, f"Expected ≥2 turns, got {len(bound_per_turn)}" + turn2 = set(bound_per_turn[1]) + assert "fake_mcp_search" in turn2, f"REGRESSION (#2884): a re-entrant get_available_tools call (e.g. task_tool spawning a subagent) wiped the parent agent's promotion. Turn 2 bound tools: {turn2!r}" diff --git a/backend/tests/test_tool_deduplication.py b/backend/tests/test_tool_deduplication.py index ed9efffaf..f018fc57d 100644 --- a/backend/tests/test_tool_deduplication.py +++ b/backend/tests/test_tool_deduplication.py @@ -65,8 +65,7 @@ def _make_minimal_config(tools): @patch("deerflow.tools.tools.get_app_config") @patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True) -@patch("deerflow.tools.tools.reset_deferred_registry") -def test_config_loaded_async_only_tool_gets_sync_wrapper(mock_reset, mock_bash, mock_cfg): +def test_config_loaded_async_only_tool_gets_sync_wrapper(mock_bash, mock_cfg): """Config-loaded async-only tools can still be invoked by sync clients.""" async def async_tool_impl(x: int) -> str: @@ -98,8 +97,7 @@ def test_config_loaded_async_only_tool_gets_sync_wrapper(mock_reset, mock_bash, @patch("deerflow.tools.tools.get_app_config") @patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True) -@patch("deerflow.tools.tools.reset_deferred_registry") -def test_no_duplicates_returned(mock_reset, mock_bash, mock_cfg): +def test_no_duplicates_returned(mock_bash, mock_cfg): """get_available_tools() never returns two tools with the same name.""" mock_cfg.return_value = _make_minimal_config([]) @@ -113,8 +111,7 @@ def test_no_duplicates_returned(mock_reset, mock_bash, mock_cfg): @patch("deerflow.tools.tools.get_app_config") @patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True) -@patch("deerflow.tools.tools.reset_deferred_registry") -def test_first_occurrence_wins(mock_reset, mock_bash, mock_cfg): +def test_first_occurrence_wins(mock_bash, mock_cfg): """When duplicates exist, the first occurrence is kept.""" mock_cfg.return_value = _make_minimal_config([]) @@ -132,8 +129,7 @@ def test_first_occurrence_wins(mock_reset, mock_bash, mock_cfg): @patch("deerflow.tools.tools.get_app_config") @patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True) -@patch("deerflow.tools.tools.reset_deferred_registry") -def test_duplicate_triggers_warning(mock_reset, mock_bash, mock_cfg, caplog): +def test_duplicate_triggers_warning(mock_bash, mock_cfg, caplog): """A warning is logged for every skipped duplicate.""" import logging