mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-09 17:12:01 +00:00
* feat(subagents): extend deferred MCP tool loading to subagents (#3341) Subagents now reuse the lead agent's deferred-tool path: when tool_search.enabled, MCP tool schemas are withheld from the model and surfaced by name in <available-deferred-tools>, fetched on demand via the generated tool_search helper. DeferredToolFilterMiddleware deterministically rewrites request.tools to hide the deferred schemas (the prompt section is discovery only, not enforcement). Consolidates the assembly into deerflow.tools.builtins.tool_search, now the single home for both assemble_deferred_tools (centralized fail-closed guard, replacing the lead-only private _assemble_deferred) and the relocated get_deferred_tools_prompt_section. Shared by every build path: lead agent, embedded client, and subagent executor. tool_search is appended after the subagent's name-level tool policy and is treated as infrastructure: its catalog is built from the already policy-filtered list, so it can never surface a tool the policy denied. Follow-up to #3370. Fixes #3341. * test(subagents): assert the real middleware builder emits a working deferred filter (#3341) The existing recipe test hand-constructs DeferredToolFilterMiddleware, so it cannot catch a regression in how build_subagent_runtime_middlewares (the call executor._create_agent actually makes) wires the deferred setup into the filter. Add a test that sources the filter from the real builder given a real setup and runs it through a graph: a wrong catalog hash would silently stop promotion, a dropped filter would stop hiding — both now caught. Running the full real middleware stack is intentionally avoided (the other runtime middlewares need sandbox/thread infra to execute, which would make the test flaky); their attachment + ordering before Safety stays locked in test_tool_error_handling_middleware.py. * test(subagents): keep executor tests config-free in CI * chore: trigger ci * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
174 lines
6.8 KiB
Python
174 lines
6.8 KiB
Python
"""Regressions for the deferred-tool redesign (#3272).
|
|
|
|
- Cross-context: building the graph in one async context and running it in a
|
|
sibling context (that did NOT inherit the build context) must still hide
|
|
deferred tools. The old ContextVar implementation failed this; the closure +
|
|
graph-state implementation must pass.
|
|
- Policy leak (Finding 1): a tool removed by policy must not be searchable.
|
|
- Fail-closed (Finding 2): a wiring regression must raise, not silently leak.
|
|
- #2884 isolation: a second (subagent-style) setup build must not affect the
|
|
lead agent's middleware/promotion.
|
|
"""
|
|
|
|
import asyncio
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
from langchain.agents import create_agent
|
|
from langchain_core.language_models.fake_chat_models import GenericFakeChatModel
|
|
from langchain_core.messages import AIMessage, HumanMessage
|
|
from langchain_core.tools import tool as as_tool
|
|
|
|
from deerflow.agents.middlewares.deferred_tool_filter_middleware import DeferredToolFilterMiddleware
|
|
from deerflow.skills.tool_policy import filter_tools_by_skill_allowed_tools
|
|
from deerflow.skills.types import Skill
|
|
from deerflow.tools.builtins.tool_search import DeferredToolSetup, assemble_deferred_tools, build_deferred_tool_setup
|
|
from deerflow.tools.mcp_metadata import tag_mcp_tool
|
|
|
|
|
|
@as_tool
|
|
def active_tool(x: str) -> str:
|
|
"active"
|
|
return x
|
|
|
|
|
|
@as_tool
|
|
def mcp_secret(x: str) -> str:
|
|
"deferred mcp tool — must be hidden from bind_tools until promoted"
|
|
return x
|
|
|
|
|
|
_BOUND: list[list[str]] = []
|
|
|
|
|
|
class _RecordingModel(GenericFakeChatModel):
|
|
def bind_tools(self, tools, **kwargs):
|
|
_BOUND.append([getattr(t, "name", None) for t in tools])
|
|
return self
|
|
|
|
|
|
def _build_graph():
|
|
filtered = [active_tool, tag_mcp_tool(mcp_secret)]
|
|
setup = build_deferred_tool_setup(filtered, enabled=True)
|
|
final = [*filtered, setup.tool_search_tool]
|
|
model = _RecordingModel(messages=iter([AIMessage(content="done")] * 4))
|
|
return create_agent(
|
|
model=model,
|
|
tools=final,
|
|
middleware=[DeferredToolFilterMiddleware(setup.deferred_names, setup.catalog_hash)],
|
|
system_prompt="t",
|
|
)
|
|
|
|
|
|
async def _abuild():
|
|
return _build_graph()
|
|
|
|
|
|
def test_deferred_hidden_when_built_and_run_in_different_contexts():
|
|
"""Build in one task, run in a sibling task that did not inherit it."""
|
|
_BOUND.clear()
|
|
|
|
async def main():
|
|
graph = await asyncio.create_task(_abuild())
|
|
|
|
async def run():
|
|
await graph.ainvoke({"messages": [HumanMessage(content="hi")]})
|
|
|
|
await asyncio.create_task(run())
|
|
|
|
asyncio.run(main())
|
|
|
|
assert _BOUND, "model was never bound"
|
|
assert not any("mcp_secret" in names for names in _BOUND), f"deferred MCP tool leaked into bind_tools: {_BOUND}"
|
|
|
|
|
|
def test_policy_excluded_mcp_tool_not_in_catalog():
|
|
"""Finding 1: a tool removed by policy is not searchable/exposed."""
|
|
filtered_after_policy = [active_tool] # mcp_secret denied by skill allowed-tools
|
|
setup = build_deferred_tool_setup(filtered_after_policy, enabled=True)
|
|
assert setup.deferred_names == frozenset()
|
|
assert setup.tool_search_tool is None
|
|
|
|
|
|
def test_fail_closed_when_mcp_survives_without_setup(monkeypatch):
|
|
"""Finding 2: simulate a wiring regression and assert it fails loudly.
|
|
|
|
``assemble_deferred_tools`` references ``build_deferred_tool_setup`` as a
|
|
module global, so patch it in ``tool_search`` (its home module).
|
|
"""
|
|
monkeypatch.setattr(
|
|
"deerflow.tools.builtins.tool_search.build_deferred_tool_setup",
|
|
lambda tools, *, enabled: DeferredToolSetup(None, frozenset(), None),
|
|
)
|
|
with pytest.raises(RuntimeError, match="fail-closed"):
|
|
assemble_deferred_tools([tag_mcp_tool(mcp_secret)], enabled=True)
|
|
|
|
|
|
def test_subagent_reentry_does_not_touch_lead_state():
|
|
"""#2884: building a second (subagent) setup must not affect the lead's
|
|
middleware. With no shared registry/ContextVar, the lead middleware depends
|
|
only on its own deferred_names + the passed state."""
|
|
lead_setup = build_deferred_tool_setup([active_tool, tag_mcp_tool(mcp_secret)], enabled=True)
|
|
mw = DeferredToolFilterMiddleware(lead_setup.deferred_names, lead_setup.catalog_hash)
|
|
|
|
# Simulate a subagent build re-entering tool assembly with its own setup.
|
|
_ = build_deferred_tool_setup([tag_mcp_tool(mcp_secret)], enabled=True)
|
|
|
|
class _Req:
|
|
def __init__(self):
|
|
self.tools = [active_tool, mcp_secret]
|
|
self.state = {"promoted": {"catalog_hash": lead_setup.catalog_hash, "names": ["mcp_secret"]}}
|
|
|
|
def override(self, tools):
|
|
self.tools = tools
|
|
return self
|
|
|
|
out = mw._filter_tools(_Req())
|
|
assert {t.name for t in out.tools} == {"active_tool", "mcp_secret"} # promotion intact
|
|
|
|
|
|
def _make_skill(allowed_tools):
|
|
"""Skill carrying an explicit allowed-tools allowlist (None = legacy allow-all)."""
|
|
return Skill(
|
|
name="s",
|
|
description="d",
|
|
license="MIT",
|
|
skill_dir=Path("/tmp/s"),
|
|
skill_file=Path("/tmp/s/SKILL.md"),
|
|
relative_path=Path("s"),
|
|
category="public",
|
|
allowed_tools=allowed_tools,
|
|
enabled=True,
|
|
)
|
|
|
|
|
|
def test_policy_denied_mcp_yields_no_tool_search_end_to_end():
|
|
"""An allowlist that denies the MCP tool gates it end-to-end: after the real
|
|
policy filter no MCP tool survives, so ``assemble_deferred_tools`` adds no
|
|
tool_search (and does not fail-closed, because no MCP tool leaked through)."""
|
|
filtered = filter_tools_by_skill_allowed_tools([active_tool, tag_mcp_tool(mcp_secret)], [_make_skill(["active_tool"])])
|
|
final_tools, setup = assemble_deferred_tools(filtered, enabled=True)
|
|
|
|
assert [t.name for t in final_tools] == ["active_tool"]
|
|
assert "tool_search" not in {t.name for t in final_tools}
|
|
assert setup.deferred_names == frozenset()
|
|
|
|
|
|
def test_tool_search_appended_after_policy_but_never_exposes_denied_tool():
|
|
"""Intentional behavior change vs. upstream (Copilot review on PR #3342).
|
|
|
|
``tool_search`` is appended AFTER skill-allowlist filtering, so an allowlist
|
|
can no longer deny ``tool_search`` by name. This is safe by construction: the
|
|
tool only appears when allowed MCP tools survive the filter, and its catalog
|
|
is derived from the already policy-filtered list — so it can never expose a
|
|
tool the allowlist denied. Locks that contract so the ordering cannot regress.
|
|
"""
|
|
allowed = ["active_tool", "mcp_secret"] # permits the MCP tool, does NOT list tool_search
|
|
filtered = filter_tools_by_skill_allowed_tools([active_tool, tag_mcp_tool(mcp_secret)], [_make_skill(allowed)])
|
|
final_tools, setup = assemble_deferred_tools(filtered, enabled=True)
|
|
|
|
names = {t.name for t in final_tools}
|
|
assert "tool_search" in names # appended despite not being in the allowlist
|
|
assert setup.deferred_names == frozenset({"mcp_secret"})
|
|
assert set(setup.deferred_names) <= set(allowed) # catalog never exceeds the allowlist
|