"""Tests for the SKILL.md parser regression introduced in issue #1803.

The previous hand-rolled YAML parser stored quoted string values with their
surrounding quotes intact (e.g. ``name: "my-skill"`` → ``'"my-skill"'``).
This caused a mismatch with ``_validate_skill_frontmatter`` (which uses
``yaml.safe_load``) and broke skill lookup after installation.

The parser now uses ``yaml.safe_load`` consistently with ``validation.py``.
"""

from __future__ import annotations

import logging
from pathlib import Path

from deerflow.skills.parser import parse_skill_file

# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------


def _write_skill(tmp_path: Path, front_matter: str, body: str = "# My Skill\n") -> Path:
    """Write a minimal SKILL.md and return the path."""
    skill_dir = tmp_path / "my-skill"
    skill_dir.mkdir()
    skill_file = skill_dir / "SKILL.md"
    skill_file.write_text(f"---\n{front_matter}\n---\n{body}", encoding="utf-8")
    return skill_file


# ---------------------------------------------------------------------------
# Basic parsing
# ---------------------------------------------------------------------------


def test_parse_plain_name(tmp_path):
    """Unquoted name is parsed correctly."""
    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: A test skill")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.name == "my-skill"


def test_parse_quoted_name_no_quotes_in_result(tmp_path):
    """Quoted name (YAML string) must not include surrounding quotes in result.

    Regression: the old hand-rolled parser stored ``'"my-skill"'`` instead of
    ``'my-skill'`` when the YAML value was wrapped in double-quotes.
    """
    skill_file = _write_skill(tmp_path, 'name: "my-skill"\ndescription: A test skill')
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.name == "my-skill", f"Expected 'my-skill', got {skill.name!r}"


def test_parse_single_quoted_name(tmp_path):
    """Single-quoted YAML strings are also handled correctly."""
    skill_file = _write_skill(tmp_path, "name: 'my-skill'\ndescription: A test skill")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.name == "my-skill"


def test_parse_description_returned(tmp_path):
    """Description field is correctly extracted."""
    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Does amazing things")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.description == "Does amazing things"


def test_parse_multiline_description(tmp_path):
    """Multi-line YAML descriptions are collapsed correctly by yaml.safe_load."""
    front_matter = "name: my-skill\ndescription: >\n  A folded\n  description"
    skill_file = _write_skill(tmp_path, front_matter)
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert "folded" in skill.description


def test_parse_license_field(tmp_path):
    """Optional license field is captured when present."""
    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test\nlicense: MIT")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.license == "MIT"


def test_parse_missing_allowed_tools_returns_none(tmp_path):
    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.allowed_tools is None


def test_parse_allowed_tools_list(tmp_path):
    skill_file = _write_skill(tmp_path, 'name: my-skill\ndescription: Test\nallowed-tools: ["bash", "read_file"]')
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.allowed_tools == ["bash", "read_file"]


def test_parse_empty_allowed_tools_list(tmp_path):
    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test\nallowed-tools: []")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is not None
    assert skill.allowed_tools == []


def test_parse_invalid_allowed_tools_returns_none(tmp_path):
    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test\nallowed-tools: bash")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is None


def test_parse_missing_name_returns_none(tmp_path):
    """Skills missing a name field are rejected."""
    skill_file = _write_skill(tmp_path, "description: A test skill")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is None


def test_parse_missing_description_returns_none(tmp_path):
    """Skills missing a description field are rejected."""
    skill_file = _write_skill(tmp_path, "name: my-skill")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is None


def test_parse_no_front_matter_returns_none(tmp_path):
    """Files without YAML front-matter delimiters return None."""
    skill_dir = tmp_path / "no-fm"
    skill_dir.mkdir()
    skill_file = skill_dir / "SKILL.md"
    skill_file.write_text("# No front matter here\n", encoding="utf-8")
    skill = parse_skill_file(skill_file, category="public")
    assert skill is None


def test_parse_invalid_yaml_returns_none(tmp_path):
    """Malformed YAML front-matter is handled gracefully (returns None)."""
    skill_file = _write_skill(tmp_path, "name: [unclosed")
    skill = parse_skill_file(skill_file, category="custom")
    assert skill is None


def test_parse_category_stored(tmp_path):
    """Category is propagated into the returned Skill object."""
    skill_file = _write_skill(tmp_path, "name: my-skill\ndescription: Test")
    skill = parse_skill_file(skill_file, category="public")
    assert skill is not None
    assert skill.category == "public"


def test_parse_nonexistent_file_returns_none(tmp_path):
    """Non-existent files are handled gracefully."""
    skill = parse_skill_file(tmp_path / "ghost" / "SKILL.md", category="custom")
    assert skill is None


# ---------------------------------------------------------------------------
# Friendly YAML error reporting
# ---------------------------------------------------------------------------


def test_parse_unquoted_colon_value_logs_line_and_hint(tmp_path, caplog):
    """Unquoted value with ': ' produces a log that exposes the full offending line
    (PyYAML truncates long lines with `...`) and a copy-pasteable quoting hint.

    Regression for issue #3333: SKILL.md authored by an LLM frequently
    contains ``description: foo: bar`` which PyYAML rejects with
    ``mapping values are not allowed here``. The skill is correctly skipped
    (the file is not silently accepted). Before this change the only
    diagnostic was PyYAML's own message, which (a) numbers lines within
    the front-matter body rather than the file and (b) truncates long
    values with '...'. The new behaviour pins:
      * the line number an author sees in their editor (file-line, not
        front-matter-line),
      * the *full* offending line (no '...' truncation), and
      * a copy-pasteable `key: "value"` hint.
    """

    # The description value is intentionally long enough to trigger
    # PyYAML's own '...' truncation in the rendered str(exc); our hint
    # must echo the *full* value regardless.
    long_value = "StarRun collector: progress, errors, tables out, plus assorted diagnostic notes"
    front_matter = f"name: collect-startrun\ndescription: {long_value}"
    skill_file = _write_skill(tmp_path, front_matter)

    with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
        skill = parse_skill_file(skill_file, category="custom")

    assert skill is None
    combined = "\n".join(rec.getMessage() for rec in caplog.records)
    assert "Invalid YAML front-matter" in combined

    # 1. File-line, not front-matter-line. `description` is the 2nd line
    #    of the front-matter body, which is line 3 of the file (line 1
    #    is the leading `---` fence). Before this PR the log said
    #    `line 2`, which sent authors to the wrong row.
    assert f"line 3: description: {long_value}" in combined

    # 2. The full value is preserved -- PyYAML's own message truncates
    #    long values with '...', so the presence of the un-truncated tail
    #    proves we are reading the source line ourselves, not echoing
    #    PyYAML's snippet.
    assert "plus assorted diagnostic notes" in combined
    assert "..." not in [line for line in combined.splitlines() if line.startswith("  line ")][0]

    # 3. The copy-pasteable quoting hint is the actually-new diagnostic.
    assert f'hint: values containing ":" must be quoted, e.g. description: "{long_value}"' in combined


def test_parse_unquoted_colon_value_preserves_nested_key_indent(tmp_path, caplog):
    """Nested keys must keep their leading indentation in the quoting hint.

    Regression guard for CR feedback on PR #3335: an earlier version of
    the hint called ``key.strip()``, which turned ``  author: foo: bar``
    into ``author: "foo: bar"``. Pasting that back under a parent mapping
    silently moved the field to the top level. The hint must preserve
    the original indentation so authors can copy-paste-fix in place.
    """

    # A two-space-indented nested key triggers the same scanner error,
    # but its hint must keep the indentation.
    front_matter = "name: nested-skill\nmetadata:\n  author: Jane: Doe"
    skill_file = _write_skill(tmp_path, front_matter)

    with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
        skill = parse_skill_file(skill_file, category="custom")

    assert skill is None
    combined = "\n".join(rec.getMessage() for rec in caplog.records)
    # Two leading spaces in front of `author` are preserved.
    assert 'hint: values containing ":" must be quoted, e.g.   author: "Jane: Doe"' in combined


def test_parse_unrelated_yaml_error_omits_quoting_hint(tmp_path, caplog):
    """Errors other than 'mapping values are not allowed' must NOT carry the quoting hint."""

    # Unclosed flow sequence is a scanner error of a different shape; the
    # quoting hint would be misleading and must be suppressed.
    skill_file = _write_skill(tmp_path, "name: [unclosed\ndescription: x")

    with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
        skill = parse_skill_file(skill_file, category="custom")

    assert skill is None
    combined = "\n".join(rec.getMessage() for rec in caplog.records)
    assert "Invalid YAML front-matter" in combined
    assert "hint:" not in combined


def test_parse_valid_skill_emits_no_error_log(tmp_path, caplog):
    """Sanity check: a valid SKILL.md must not produce any error logs."""

    skill_file = _write_skill(tmp_path, 'name: ok-skill\ndescription: "Foo: bar"')

    with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
        skill = parse_skill_file(skill_file, category="custom")

    assert skill is not None
    assert skill.description == "Foo: bar"
    assert not caplog.records, "valid SKILL.md must not log errors"


def test_parse_unquoted_colon_value_escapes_backslashes_in_hint(tmp_path, caplog):
    """Backslashes in the offending value must be doubled in the hint.

    Regression guard for CR feedback on PR #3335: an earlier version of
    the hint only escaped ``"`` but left ``\\`` untouched. Pasting the
    suggested ``key: "..."`` back into the file would then be reparsed
    as an escape sequence by PyYAML's double-quoted scalar rules and
    either fail to load or silently change meaning (e.g. ``C:\\Temp``
    becoming ``C:<TAB>emp``). The hint must double the backslash so the
    suggested scalar is valid YAML when pasted back.
    """

    # The second ``: `` (after ``path``) is what trips PyYAML's
    # "mapping values are not allowed here"; the ``C:\Temp`` segment
    # carries the backslash that the hint must escape.
    front_matter = "name: path-skill\ndescription: Windows path: C:\\Temp"
    skill_file = _write_skill(tmp_path, front_matter)

    with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
        skill = parse_skill_file(skill_file, category="custom")

    assert skill is None
    combined = "\n".join(rec.getMessage() for rec in caplog.records)
    assert r'description: "Windows path: C:\\Temp"' in combined


def test_parse_unquoted_colon_value_escapes_regex_in_hint(tmp_path, caplog):
    """Regex-style ``\\d`` must also be escaped in the hint.

    Same root cause as the Windows-path guard above, but with a
    regex-style escape that is even more likely to appear in
    LLM-authored skills (e.g. a ``description`` that quotes a regex).
    PyYAML rejects ``\\d`` in double-quoted scalars, so the hint must
    emit ``\\\\d`` to remain valid.
    """

    front_matter = "name: regex-skill\ndescription: match: \\d+ digits"
    skill_file = _write_skill(tmp_path, front_matter)

    with caplog.at_level(logging.ERROR, logger="deerflow.skills.parser"):
        skill = parse_skill_file(skill_file, category="custom")

    assert skill is None
    combined = "\n".join(rec.getMessage() for rec in caplog.records)
    assert r'description: "match: \\d+ digits"' in combined