diff --git a/backend/packages/harness/deerflow/skills/parser.py b/backend/packages/harness/deerflow/skills/parser.py index d2a3af67b..63bcfef7c 100644 --- a/backend/packages/harness/deerflow/skills/parser.py +++ b/backend/packages/harness/deerflow/skills/parser.py @@ -2,21 +2,24 @@ import logging import re from pathlib import Path +import yaml + from .types import Skill logger = logging.getLogger(__name__) def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None = None) -> Skill | None: - """ - Parse a SKILL.md file and extract metadata. + """Parse a SKILL.md file and extract metadata. Args: - skill_file: Path to the SKILL.md file - category: Category of the skill ('public' or 'custom') + skill_file: Path to the SKILL.md file. + category: Category of the skill ('public' or 'custom'). + relative_path: Relative path from the category root to the skill + directory. Defaults to the skill directory name when omitted. Returns: - Skill object if parsing succeeds, None otherwise + Skill object if parsing succeeds, None otherwise. """ if not skill_file.exists() or skill_file.name != "SKILL.md": return None @@ -24,90 +27,42 @@ def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None try: content = skill_file.read_text(encoding="utf-8") - # Extract YAML front matter - # Pattern: ---\nkey: value\n--- + # Extract YAML front-matter block between leading ``---`` fences. front_matter_match = re.match(r"^---\s*\n(.*?)\n---\s*\n", content, re.DOTALL) - if not front_matter_match: return None - front_matter = front_matter_match.group(1) + front_matter_text = front_matter_match.group(1) - # Parse YAML front matter with basic multiline string support - metadata = {} - lines = front_matter.split("\n") - current_key = None - current_value = [] - is_multiline = False - multiline_style = None - indent_level = None + try: + metadata = yaml.safe_load(front_matter_text) + except yaml.YAMLError as exc: + logger.error("Invalid YAML front-matter in %s: %s", skill_file, exc) + return None - for line in lines: - if is_multiline: - if not line.strip(): - current_value.append("") - continue + if not isinstance(metadata, dict): + logger.error("Front-matter in %s is not a YAML mapping", skill_file) + return None - current_indent = len(line) - len(line.lstrip()) - - if indent_level is None: - if current_indent > 0: - indent_level = current_indent - current_value.append(line[indent_level:]) - continue - elif current_indent >= indent_level: - current_value.append(line[indent_level:]) - continue - - # If we reach here, it's either a new key or the end of multiline - if current_key and is_multiline: - if multiline_style == "|": - metadata[current_key] = "\n".join(current_value).rstrip() - else: - text = "\n".join(current_value).rstrip() - # Replace single newlines with spaces for folded blocks - metadata[current_key] = re.sub(r"(?", "|"): - current_key = key - is_multiline = True - multiline_style = value - current_value = [] - indent_level = None - else: - metadata[key] = value - - if current_key and is_multiline: - if multiline_style == "|": - metadata[current_key] = "\n".join(current_value).rstrip() - else: - text = "\n".join(current_value).rstrip() - metadata[current_key] = re.sub(r"(? Path: - """Write a SKILL.md file and return its path.""" - skill_file = tmp_path / "SKILL.md" - skill_file.write_text(content, encoding="utf-8") + +def _write_skill(tmp_path: Path, front_matter: str, body: str = "# My Skill\n") -> Path: + """Write a minimal SKILL.md and return the path.""" + skill_dir = tmp_path / "my-skill" + skill_dir.mkdir() + skill_file = skill_dir / "SKILL.md" + skill_file.write_text(f"---\n{front_matter}\n---\n{body}", encoding="utf-8") return skill_file -class TestParseSkillFile: - def test_valid_skill_file(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: my-skill\ndescription: A test skill\nlicense: MIT\n---\n\n# My Skill\n", - ) - result = parse_skill_file(skill_file, "public") - assert result is not None - assert result.name == "my-skill" - assert result.description == "A test skill" - assert result.license == "MIT" - assert result.category == "public" - assert result.enabled is True - assert result.skill_dir == tmp_path - assert result.skill_file == skill_file +# --------------------------------------------------------------------------- +# Basic parsing +# --------------------------------------------------------------------------- - def test_missing_name_returns_none(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\ndescription: A test skill\n---\n\nBody\n", - ) - assert parse_skill_file(skill_file, "public") is None - def test_missing_description_returns_none(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: my-skill\n---\n\nBody\n", - ) - assert parse_skill_file(skill_file, "public") is None +def test_parse_plain_name(tmp_path): + """Unquoted name is parsed correctly.""" + skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: A test skill") + skill = parse_skill_file(skill_file, category="custom") + assert skill is not None + assert skill.name == "my-skill" - def test_no_front_matter_returns_none(self, tmp_path): - skill_file = _write_skill(tmp_path, "# Just a markdown file\n\nNo front matter here.\n") - assert parse_skill_file(skill_file, "public") is None - def test_nonexistent_file_returns_none(self, tmp_path): - skill_file = tmp_path / "SKILL.md" - assert parse_skill_file(skill_file, "public") is None +def test_parse_quoted_name_no_quotes_in_result(tmp_path): + """Quoted name (YAML string) must not include surrounding quotes in result. - def test_wrong_filename_returns_none(self, tmp_path): - wrong_file = tmp_path / "README.md" - wrong_file.write_text("---\nname: test\ndescription: test\n---\n", encoding="utf-8") - assert parse_skill_file(wrong_file, "public") is None + Regression: the old hand-rolled parser stored ``'"my-skill"'`` instead of + ``'my-skill'`` when the YAML value was wrapped in double-quotes. + """ + skill_file = _write_skill(tmp_path, 'name: "my-skill"\ndescription: A test skill') + skill = parse_skill_file(skill_file, category="custom") + assert skill is not None + assert skill.name == "my-skill", f"Expected 'my-skill', got {skill.name!r}" - def test_optional_license_field(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: my-skill\ndescription: A test skill\n---\n\nBody\n", - ) - result = parse_skill_file(skill_file, "custom") - assert result is not None - assert result.license is None - assert result.category == "custom" - def test_custom_relative_path(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: nested-skill\ndescription: Nested\n---\n\nBody\n", - ) - rel = Path("group/nested-skill") - result = parse_skill_file(skill_file, "public", relative_path=rel) - assert result is not None - assert result.relative_path == rel +def test_parse_single_quoted_name(tmp_path): + """Single-quoted YAML strings are also handled correctly.""" + skill_file = _write_skill(tmp_path, "name: 'my-skill'\ndescription: A test skill") + skill = parse_skill_file(skill_file, category="custom") + assert skill is not None + assert skill.name == "my-skill" - def test_default_relative_path_is_parent_name(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: my-skill\ndescription: Test\n---\n\nBody\n", - ) - result = parse_skill_file(skill_file, "public") - assert result is not None - assert result.relative_path == Path(tmp_path.name) - def test_colons_in_description(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: my-skill\ndescription: A skill: does things\n---\n\nBody\n", - ) - result = parse_skill_file(skill_file, "public") - assert result is not None - assert result.description == "A skill: does things" +def test_parse_description_returned(tmp_path): + """Description field is correctly extracted.""" + skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Does amazing things") + skill = parse_skill_file(skill_file, category="custom") + assert skill is not None + assert skill.description == "Does amazing things" - def test_multiline_yaml_folded_description(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: multiline-skill\ndescription: >\n This is a multiline\n description for a skill.\n\n It spans multiple lines.\nlicense: MIT\n---\n\nBody\n", - ) - result = parse_skill_file(skill_file, "public") - assert result is not None - assert result.name == "multiline-skill" - assert result.description == "This is a multiline description for a skill.\n\nIt spans multiple lines." - assert result.license == "MIT" - def test_multiline_yaml_literal_description(self, tmp_path): - skill_file = _write_skill( - tmp_path, - "---\nname: pipe-skill\ndescription: |\n First line.\n Second line.\n---\n\nBody\n", - ) - result = parse_skill_file(skill_file, "public") - assert result is not None - assert result.name == "pipe-skill" - assert result.description == "First line.\nSecond line." +def test_parse_multiline_description(tmp_path): + """Multi-line YAML descriptions are collapsed correctly by yaml.safe_load.""" + front_matter = "name: my-skill\ndescription: >\n A folded\n description" + skill_file = _write_skill(tmp_path, front_matter) + skill = parse_skill_file(skill_file, category="custom") + assert skill is not None + assert "folded" in skill.description - def test_empty_front_matter_returns_none(self, tmp_path): - skill_file = _write_skill(tmp_path, "---\n\n---\n\nBody\n") - assert parse_skill_file(skill_file, "public") is None + +def test_parse_license_field(tmp_path): + """Optional license field is captured when present.""" + skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test\nlicense: MIT") + skill = parse_skill_file(skill_file, category="custom") + assert skill is not None + assert skill.license == "MIT" + + +def test_parse_missing_name_returns_none(tmp_path): + """Skills missing a name field are rejected.""" + skill_file = _write_skill(tmp_path, "description: A test skill") + skill = parse_skill_file(skill_file, category="custom") + assert skill is None + + +def test_parse_missing_description_returns_none(tmp_path): + """Skills missing a description field are rejected.""" + skill_file = _write_skill(tmp_path, "name: my-skill") + skill = parse_skill_file(skill_file, category="custom") + assert skill is None + + +def test_parse_no_front_matter_returns_none(tmp_path): + """Files without YAML front-matter delimiters return None.""" + skill_dir = tmp_path / "no-fm" + skill_dir.mkdir() + skill_file = skill_dir / "SKILL.md" + skill_file.write_text("# No front matter here\n", encoding="utf-8") + skill = parse_skill_file(skill_file, category="public") + assert skill is None + + +def test_parse_invalid_yaml_returns_none(tmp_path): + """Malformed YAML front-matter is handled gracefully (returns None).""" + skill_file = _write_skill(tmp_path, "name: [unclosed") + skill = parse_skill_file(skill_file, category="custom") + assert skill is None + + +def test_parse_category_stored(tmp_path): + """Category is propagated into the returned Skill object.""" + skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test") + skill = parse_skill_file(skill_file, category="public") + assert skill is not None + assert skill.category == "public" + + +def test_parse_nonexistent_file_returns_none(tmp_path): + """Non-existent files are handled gracefully.""" + skill = parse_skill_file(tmp_path / "ghost" / "SKILL.md", category="custom") + assert skill is None diff --git a/backend/tests/test_tool_deduplication.py b/backend/tests/test_tool_deduplication.py new file mode 100644 index 000000000..35ec0bea6 --- /dev/null +++ b/backend/tests/test_tool_deduplication.py @@ -0,0 +1,106 @@ +"""Tests for tool name deduplication in get_available_tools() (issue #1803). + +Duplicate tool registrations previously passed through silently and could +produce mangled function-name schemas that caused 100% tool call failures. +``get_available_tools()`` now deduplicates by name, config-loaded tools taking +priority, and logs a warning for every skipped duplicate. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +from langchain_core.tools import BaseTool, tool + +from deerflow.tools.tools import get_available_tools + +# --------------------------------------------------------------------------- +# Fixture tools +# --------------------------------------------------------------------------- + + +@tool +def _tool_alpha(x: str) -> str: + """Alpha tool.""" + return x + + +@tool +def _tool_alpha_dup(x: str) -> str: + """Duplicate of alpha — same name, different object.""" + return x + + +# Rename duplicate to share the same .name as _tool_alpha +_tool_alpha_dup.name = _tool_alpha.name # type: ignore[attr-defined] + + +@tool +def _tool_beta(x: str) -> str: + """Beta tool.""" + return x + + +# --------------------------------------------------------------------------- +# Deduplication behaviour +# --------------------------------------------------------------------------- + + +def _make_minimal_config(tools): + """Return an AppConfig-like mock with the given tools list.""" + config = MagicMock() + config.tools = tools + config.models = [] + config.tool_search.enabled = False + config.sandbox = MagicMock() + return config + + +@patch("deerflow.tools.tools.get_app_config") +@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True) +@patch("deerflow.tools.tools.reset_deferred_registry") +def test_no_duplicates_returned(mock_reset, mock_bash, mock_cfg): + """get_available_tools() never returns two tools with the same name.""" + mock_cfg.return_value = _make_minimal_config([]) + + # Patch the builtin tools so we control exactly what comes back. + with patch("deerflow.tools.tools.BUILTIN_TOOLS", [_tool_alpha, _tool_alpha_dup, _tool_beta]): + result = get_available_tools(include_mcp=False) + + names = [t.name for t in result] + assert len(names) == len(set(names)), f"Duplicate names detected: {names}" + + +@patch("deerflow.tools.tools.get_app_config") +@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True) +@patch("deerflow.tools.tools.reset_deferred_registry") +def test_first_occurrence_wins(mock_reset, mock_bash, mock_cfg): + """When duplicates exist, the first occurrence is kept.""" + mock_cfg.return_value = _make_minimal_config([]) + + sentinel_alpha = MagicMock(spec=BaseTool, name="_sentinel") + sentinel_alpha.name = _tool_alpha.name # same name + sentinel_alpha_dup = MagicMock(spec=BaseTool, name="_sentinel_dup") + sentinel_alpha_dup.name = _tool_alpha.name # same name — should be dropped + + with patch("deerflow.tools.tools.BUILTIN_TOOLS", [sentinel_alpha, sentinel_alpha_dup, _tool_beta]): + result = get_available_tools(include_mcp=False) + + returned_alpha = next(t for t in result if t.name == _tool_alpha.name) + assert returned_alpha is sentinel_alpha + + +@patch("deerflow.tools.tools.get_app_config") +@patch("deerflow.tools.tools.is_host_bash_allowed", return_value=True) +@patch("deerflow.tools.tools.reset_deferred_registry") +def test_duplicate_triggers_warning(mock_reset, mock_bash, mock_cfg, caplog): + """A warning is logged for every skipped duplicate.""" + import logging + + mock_cfg.return_value = _make_minimal_config([]) + + with patch("deerflow.tools.tools.BUILTIN_TOOLS", [_tool_alpha, _tool_alpha_dup]): + with caplog.at_level(logging.WARNING, logger="deerflow.tools.tools"): + get_available_tools(include_mcp=False) + + assert any("Duplicate tool name" in r.message for r in caplog.records), "Expected a duplicate-tool warning in log output"