fix: resolve tool duplication and skill parser YAML inconsistencies (#1803) (#2107)

* Refactor tests for SKILL.md parser Updated tests for SKILL.md parser to handle quoted names and descriptions correctly. Added new tests for parsing plain and single-quoted names, and ensured multi-line descriptions are processed properly. * Implement tool name validation and deduplication Add tool name mismatch warning and deduplication logic * Refactor skill file parsing and error handling * Add tests for tool name deduplication Added tests for tool name deduplication in get_available_tools(). Ensured that duplicates are not returned, the first occurrence is kept, and warnings are logged for skipped duplicates. * Apply suggestions from code review Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * Update minimal config to include tools list * Update test for nonexistent skill file Ensure the test for nonexistent files checks for None. * Refactor tool loading and add skill management support Refactor tool loading logic to include skill management tools based on configuration and clean up comments. * Enhance code comments for tool loading logic Added comments to clarify the purpose of various code sections related to tool loading and configuration. * Fix assertion for duplicate tool name warning * Fix indentation issues in tools.py * Fix the lint error of test_tool_deduplication * Fix the lint error of tools.py * Fix the lint error * Fix the lint error * make format --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
2026-04-25 19:28:23 +00:00 · 2026-04-20 08:25:03 -04:00 · 2026-04-20 08:25:03 -04:00 · 6dce26a52e
commit 6dce26a52e
parent fc94e90f6c
4 changed files with 282 additions and 178 deletions
--- a/backend/packages/harness/deerflow/skills/parser.py
+++ b/backend/packages/harness/deerflow/skills/parser.py
@ -2,21 +2,24 @@ import logging
 import re
 from pathlib import Path

+import yaml
+
 from .types import Skill

 logger = logging.getLogger(__name__)


 def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None = None) -> Skill | None:
-    """
-    Parse a SKILL.md file and extract metadata.
+    """Parse a SKILL.md file and extract metadata.

    Args:
-        skill_file: Path to the SKILL.md file
-        category: Category of the skill ('public' or 'custom')
+        skill_file: Path to the SKILL.md file.
+        category: Category of the skill ('public' or 'custom').
+        relative_path: Relative path from the category root to the skill
+            directory.  Defaults to the skill directory name when omitted.

    Returns:
-        Skill object if parsing succeeds, None otherwise
+        Skill object if parsing succeeds, None otherwise.
    """
    if not skill_file.exists() or skill_file.name != "SKILL.md":
        return None
@ -24,90 +27,42 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
    try:
        content = skill_file.read_text(encoding="utf-8")

-        # Extract YAML front matter
-        # Pattern: ---\nkey: value\n---
+        # Extract YAML front-matter block between leading ``---`` fences.
        front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL)
-
        if not front_matter_match:
            return None

-        front_matter = front_matter_match.group(1)
+        front_matter_text = front_matter_match.group(1)

-        # Parse YAML front matter with basic multiline string support
-        metadata = {}
-        lines = front_matter.split("\n")
-        current_key = None
-        current_value = []
-        is_multiline = False
-        multiline_style = None
-        indent_level = None
+        try:
+            metadata = yaml.safe_load(front_matter_text)
+        except yaml.YAMLError as exc:
+            logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc)
+            return None

-        for line in lines:
-            if is_multiline:
-                if not line.strip():
-                    current_value.append("")
-                    continue
+        if not isinstance(metadata, dict):
+            logger.error("Front-matter in %s is not a YAML mapping", skill_file)
+            return None

-                current_indent = len(line) - len(line.lstrip())
-
-                if indent_level is None:
-                    if current_indent > 0:
-                        indent_level = current_indent
-                        current_value.append(line[indent_level:])
-                        continue
-                elif current_indent >= indent_level:
-                    current_value.append(line[indent_level:])
-                    continue
-
-            # If we reach here, it's either a new key or the end of multiline
-            if current_key and is_multiline:
-                if multiline_style == "|":
-                    metadata[current_key] = "\n".join(current_value).rstrip()
-                else:
-                    text = "\n".join(current_value).rstrip()
-                    # Replace single newlines with spaces for folded blocks
-                    metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
-
-                current_key = None
-                current_value = []
-                is_multiline = False
-                multiline_style = None
-                indent_level = None
-
-            if not line.strip():
-                continue
-
-            if ":" in line:
-                # Handle nested dicts simply by ignoring indentation for now,
-                # or just extracting top-level keys
-                key, value = line.split(":", 1)
-                key = key.strip()
-                value = value.strip()
-
-                if value in (">", "|"):
-                    current_key = key
-                    is_multiline = True
-                    multiline_style = value
-                    current_value = []
-                    indent_level = None
-                else:
-                    metadata[key] = value
-
-        if current_key and is_multiline:
-            if multiline_style == "|":
-                metadata[current_key] = "\n".join(current_value).rstrip()
-            else:
-                text = "\n".join(current_value).rstrip()
-                metadata[current_key] = re.sub(r"(?<!\n)\n(?!\n)", " ", text)
-
-        # Extract required fields
+        # Extract required fields.  Both must be non-empty strings.
        name = metadata.get("name")
        description = metadata.get("description")

+        if not name or not isinstance(name, str):
+            return None
+        if not description or not isinstance(description, str):
+            return None
+
+        # Normalise: strip surrounding whitespace that YAML may preserve.
+        name = name.strip()
+        description = description.strip()
+
        if not name or not description:
            return None

        license_text = metadata.get("license")
+        if license_text is not None:
+            license_text = str(license_text).strip() or None

        return Skill(
            name=name,
@ -117,9 +72,9 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None
            skill_file=skill_file,
            relative_path=relative_path or Path(skill_file.parent.name),
            category=category,
-            enabled=True,  # Default to enabled, actual state comes from config file
+            enabled=True,  # Actual state comes from the extensions config file.
        )

-    except Exception as e:
-        logger.error("Error parsing skill file %s: %s", skill_file, e)
+    except Exception:
+        logger.exception("Unexpected error parsing skill file %s", skill_file)
        return None
--- a/backend/packages/harness/deerflow/tools/tools.py
+++ b/backend/packages/harness/deerflow/tools/tools.py
@ -59,7 +59,22 @@ def get_available_tools(
    if not is_host_bash_allowed(config):
        tool_configs = [tool for tool in tool_configs if not _is_host_bash_tool(tool)]

-    loaded_tools = [resolve_variable(tool.use, BaseTool) for tool in tool_configs]
+    loaded_tools_raw = [(cfg, resolve_variable(cfg.use, BaseTool)) for cfg in tool_configs]
+
+    # Warn when the config ``name`` field and the tool object's ``.name``
+    # attribute diverge — this mismatch is the root cause of issue #1803 where
+    # the LLM receives one name in its tool schema but the runtime router
+    # recognises a different name, producing "not a valid tool" errors.
+    for cfg, loaded in loaded_tools_raw:
+        if cfg.name != loaded.name:
+            logger.warning(
+                "Tool name mismatch: config name %r does not match tool .name %r (use: %s). The tool's own .name will be used for binding.",
+                cfg.name,
+                loaded.name,
+                cfg.use,
+            )
+
+    loaded_tools = [t for _, t in loaded_tools_raw]

    # Conditionally add tools based on config
    builtin_tools = BUILTIN_TOOLS.copy()
@ -134,4 +149,20 @@ def get_available_tools(
        logger.warning(f"Failed to load ACP tool: {e}")

    logger.info(f"Total tools loaded: {len(loaded_tools)}, built-in tools: {len(builtin_tools)}, MCP tools: {len(mcp_tools)}, ACP tools: {len(acp_tools)}")
-    return loaded_tools + builtin_tools + mcp_tools + acp_tools
+
+    # Deduplicate by tool name — config-loaded tools take priority, followed by
+    # built-ins, MCP tools, and ACP tools.  Duplicate names cause the LLM to
+    # receive ambiguous or concatenated function schemas (issue #1803).
+    all_tools = loaded_tools + builtin_tools + mcp_tools + acp_tools
+    seen_names: set[str] = set()
+    unique_tools: list[BaseTool] = []
+    for t in all_tools:
+        if t.name not in seen_names:
+            unique_tools.append(t)
+            seen_names.add(t.name)
+        else:
+            logger.warning(
+                "Duplicate tool name %r detected and skipped — check your config.yaml and MCP server registrations (issue #1803).",
+                t.name,
+            )
+    return unique_tools
--- a/backend/tests/test_skills_parser.py
+++ b/backend/tests/test_skills_parser.py
@ -1,119 +1,131 @@
-"""Tests for skill file parser."""
+"""Tests for the SKILL.md parser regression introduced in issue #1803.
+
+The previous hand-rolled YAML parser stored quoted string values with their
+surrounding quotes intact (e.g. ``name: "my-skill"`` → ``'"my-skill"'``).
+This caused a mismatch with ``_validate_skill_frontmatter`` (which uses
+``yaml.safe_load``) and broke skill lookup after installation.
+
+The parser now uses ``yaml.safe_load`` consistently with ``validation.py``.
+"""
+
+from __future__ import annotations

 from pathlib import Path

 from deerflow.skills.parser import parse_skill_file

+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------

-def _write_skill(tmp_path: Path, content: str) -> Path:
-    """Write a SKILL.md file and return its path."""
-    skill_file = tmp_path / "SKILL.md"
-    skill_file.write_text(content, encoding="utf-8")
+
+def _write_skill(tmp_path: Path, front_matter: str, body: str = "# My Skill\n") -> Path:
+    """Write a minimal SKILL.md and return the path."""
+    skill_dir = tmp_path / "my-skill"
+    skill_dir.mkdir()
+    skill_file = skill_dir / "SKILL.md"
+    skill_file.write_text(f"---\n{front_matter}\n---\n{body}", encoding="utf-8")
    return skill_file


-class TestParseSkillFile:
-    def test_valid_skill_file(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: my-skill\ndescription: A test skill\nlicense: MIT\n---\n\n# My Skill\n",
-        )
-        result = parse_skill_file(skill_file, "public")
-        assert result is not None
-        assert result.name == "my-skill"
-        assert result.description == "A test skill"
-        assert result.license == "MIT"
-        assert result.category == "public"
-        assert result.enabled is True
-        assert result.skill_dir == tmp_path
-        assert result.skill_file == skill_file
+# ---------------------------------------------------------------------------
+# Basic parsing
+# ---------------------------------------------------------------------------

-    def test_missing_name_returns_none(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\ndescription: A test skill\n---\n\nBody\n",
-        )
-        assert parse_skill_file(skill_file, "public") is None

-    def test_missing_description_returns_none(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: my-skill\n---\n\nBody\n",
-        )
-        assert parse_skill_file(skill_file, "public") is None
+def test_parse_plain_name(tmp_path):
+    """Unquoted name is parsed correctly."""
+    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: A test skill")
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is not None
+    assert skill.name == "my-skill"

-    def test_no_front_matter_returns_none(self, tmp_path):
-        skill_file = _write_skill(tmp_path, "# Just a markdown file\n\nNo front matter here.\n")
-        assert parse_skill_file(skill_file, "public") is None

-    def test_nonexistent_file_returns_none(self, tmp_path):
-        skill_file = tmp_path / "SKILL.md"
-        assert parse_skill_file(skill_file, "public") is None
+def test_parse_quoted_name_no_quotes_in_result(tmp_path):
+    """Quoted name (YAML string) must not include surrounding quotes in result.

-    def test_wrong_filename_returns_none(self, tmp_path):
-        wrong_file = tmp_path / "README.md"
-        wrong_file.write_text("---\nname: test\ndescription: test\n---\n", encoding="utf-8")
-        assert parse_skill_file(wrong_file, "public") is None
+    Regression: the old hand-rolled parser stored ``'"my-skill"'`` instead of
+    ``'my-skill'`` when the YAML value was wrapped in double-quotes.
+    """
+    skill_file = _write_skill(tmp_path, 'name: "my-skill"\ndescription: A test skill')
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is not None
+    assert skill.name == "my-skill", f"Expected 'my-skill', got {skill.name!r}"

-    def test_optional_license_field(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: my-skill\ndescription: A test skill\n---\n\nBody\n",
-        )
-        result = parse_skill_file(skill_file, "custom")
-        assert result is not None
-        assert result.license is None
-        assert result.category == "custom"

-    def test_custom_relative_path(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: nested-skill\ndescription: Nested\n---\n\nBody\n",
-        )
-        rel = Path("group/nested-skill")
-        result = parse_skill_file(skill_file, "public", relative_path=rel)
-        assert result is not None
-        assert result.relative_path == rel
+def test_parse_single_quoted_name(tmp_path):
+    """Single-quoted YAML strings are also handled correctly."""
+    skill_file = _write_skill(tmp_path, "name: 'my-skill'\ndescription: A test skill")
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is not None
+    assert skill.name == "my-skill"

-    def test_default_relative_path_is_parent_name(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: my-skill\ndescription: Test\n---\n\nBody\n",
-        )
-        result = parse_skill_file(skill_file, "public")
-        assert result is not None
-        assert result.relative_path == Path(tmp_path.name)

-    def test_colons_in_description(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: my-skill\ndescription: A skill: does things\n---\n\nBody\n",
-        )
-        result = parse_skill_file(skill_file, "public")
-        assert result is not None
-        assert result.description == "A skill: does things"
+def test_parse_description_returned(tmp_path):
+    """Description field is correctly extracted."""
+    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Does amazing things")
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is not None
+    assert skill.description == "Does amazing things"

-    def test_multiline_yaml_folded_description(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: multiline-skill\ndescription: >\n   This is a multiline\n   description for a skill.\n\n   It spans multiple lines.\nlicense: MIT\n---\n\nBody\n",
-        )
-        result = parse_skill_file(skill_file, "public")
-        assert result is not None
-        assert result.name == "multiline-skill"
-        assert result.description == "This is a multiline description for a skill.\n\nIt spans multiple lines."
-        assert result.license == "MIT"

-    def test_multiline_yaml_literal_description(self, tmp_path):
-        skill_file = _write_skill(
-            tmp_path,
-            "---\nname: pipe-skill\ndescription: |\n    First line.\n    Second line.\n---\n\nBody\n",
-        )
-        result = parse_skill_file(skill_file, "public")
-        assert result is not None
-        assert result.name == "pipe-skill"
-        assert result.description == "First line.\nSecond line."
+def test_parse_multiline_description(tmp_path):
+    """Multi-line YAML descriptions are collapsed correctly by yaml.safe_load."""
+    front_matter = "name: my-skill\ndescription: >\n  A folded\n  description"
+    skill_file = _write_skill(tmp_path, front_matter)
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is not None
+    assert "folded" in skill.description

-    def test_empty_front_matter_returns_none(self, tmp_path):
-        skill_file = _write_skill(tmp_path, "---\n\n---\n\nBody\n")
-        assert parse_skill_file(skill_file, "public") is None
+
+def test_parse_license_field(tmp_path):
+    """Optional license field is captured when present."""
+    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test\nlicense: MIT")
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is not None
+    assert skill.license == "MIT"
+
+
+def test_parse_missing_name_returns_none(tmp_path):
+    """Skills missing a name field are rejected."""
+    skill_file = _write_skill(tmp_path, "description: A test skill")
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is None
+
+
+def test_parse_missing_description_returns_none(tmp_path):
+    """Skills missing a description field are rejected."""
+    skill_file = _write_skill(tmp_path, "name: my-skill")
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is None
+
+
+def test_parse_no_front_matter_returns_none(tmp_path):
+    """Files without YAML front-matter delimiters return None."""
+    skill_dir = tmp_path / "no-fm"
+    skill_dir.mkdir()
+    skill_file = skill_dir / "SKILL.md"
+    skill_file.write_text("# No front matter here\n", encoding="utf-8")
+    skill = parse_skill_file(skill_file, category="public")
+    assert skill is None
+
+
+def test_parse_invalid_yaml_returns_none(tmp_path):
+    """Malformed YAML front-matter is handled gracefully (returns None)."""
+    skill_file = _write_skill(tmp_path, "name: [unclosed")
+    skill = parse_skill_file(skill_file, category="custom")
+    assert skill is None
+
+
+def test_parse_category_stored(tmp_path):
+    """Category is propagated into the returned Skill object."""
+    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test")
+    skill = parse_skill_file(skill_file, category="public")
+    assert skill is not None
+    assert skill.category == "public"
+
+
+def test_parse_nonexistent_file_returns_none(tmp_path):
+    """Non-existent files are handled gracefully."""
+    skill = parse_skill_file(tmp_path / "ghost" / "SKILL.md", category="custom")
+    assert skill is None
--- a/backend/tests/test_tool_deduplication.py
+++ b/backend/tests/test_tool_deduplication.py
@ -0,0 +1,106 @@
+"""Tests for tool name deduplication in get_available_tools() (issue #1803).
+
+Duplicate tool registrations previously passed through silently and could
+produce mangled function-name schemas that caused 100% tool call failures.
+``get_available_tools()`` now deduplicates by name, config-loaded tools taking
+priority, and logs a warning for every skipped duplicate.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock, patch
+
+from langchain_core.tools import BaseTool, tool
+
+from deerflow.tools.tools import get_available_tools
+
+# ---------------------------------------------------------------------------
+# Fixture tools
+# ---------------------------------------------------------------------------
+
+
+@tool
+def _tool_alpha(x: str) -> str:
+    """Alpha tool."""
+    return x
+
+
+@tool
+def _tool_alpha_dup(x: str) -> str:
+    """Duplicate of alpha — same name, different object."""
+    return x
+
+
+# Rename duplicate to share the same .name as _tool_alpha
+_tool_alpha_dup.name = _tool_alpha.name  # type: ignore[attr-defined]
+
+
+@tool
+def _tool_beta(x: str) -> str:
+    """Beta tool."""
+    return x
+
+
+# ---------------------------------------------------------------------------
+# Deduplication behaviour
+# ---------------------------------------------------------------------------
+
+
+def _make_minimal_config(tools):
+    """Return an AppConfig-like mock with the given tools list."""
+    config = MagicMock()
+    config.tools = tools
+    config.models = []
+    config.tool_search.enabled = False
+    config.sandbox = MagicMock()
+    return config
+
+
+@patch("deerflow.tools.tools.get_app_config")
+@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True)
+@patch("deerflow.tools.tools.reset_deferred_registry")
+def test_no_duplicates_returned(mock_reset, mock_bash, mock_cfg):
+    """get_available_tools() never returns two tools with the same name."""
+    mock_cfg.return_value = _make_minimal_config([])
+
+    # Patch the builtin tools so we control exactly what comes back.
+    with patch("deerflow.tools.tools.BUILTIN_TOOLS", [_tool_alpha, _tool_alpha_dup, _tool_beta]):
+        result = get_available_tools(include_mcp=False)
+
+    names = [t.name for t in result]
+    assert len(names) == len(set(names)), f"Duplicate names detected: {names}"
+
+
+@patch("deerflow.tools.tools.get_app_config")
+@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True)
+@patch("deerflow.tools.tools.reset_deferred_registry")
+def test_first_occurrence_wins(mock_reset, mock_bash, mock_cfg):
+    """When duplicates exist, the first occurrence is kept."""
+    mock_cfg.return_value = _make_minimal_config([])
+
+    sentinel_alpha = MagicMock(spec=BaseTool, name="_sentinel")
+    sentinel_alpha.name = _tool_alpha.name  # same name
+    sentinel_alpha_dup = MagicMock(spec=BaseTool, name="_sentinel_dup")
+    sentinel_alpha_dup.name = _tool_alpha.name  # same name — should be dropped
+
+    with patch("deerflow.tools.tools.BUILTIN_TOOLS", [sentinel_alpha, sentinel_alpha_dup, _tool_beta]):
+        result = get_available_tools(include_mcp=False)
+
+    returned_alpha = next(t for t in result if t.name == _tool_alpha.name)
+    assert returned_alpha is sentinel_alpha
+
+
+@patch("deerflow.tools.tools.get_app_config")
+@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True)
+@patch("deerflow.tools.tools.reset_deferred_registry")
+def test_duplicate_triggers_warning(mock_reset, mock_bash, mock_cfg, caplog):
+    """A warning is logged for every skipped duplicate."""
+    import logging
+
+    mock_cfg.return_value = _make_minimal_config([])
+
+    with patch("deerflow.tools.tools.BUILTIN_TOOLS", [_tool_alpha, _tool_alpha_dup]):
+        with caplog.at_level(logging.WARNING, logger="deerflow.tools.tools"):
+            get_available_tools(include_mcp=False)
+
+    assert any("Duplicate tool name" in r.message for r in caplog.records), "Expected a duplicate-tool warning in log output"