deer-flow/backend/tests/test_threads_router.py

import re
from unittest.mock import patch

import pytest
from _router_auth_helpers import make_authed_test_app
from fastapi import FastAPI, HTTPException
from fastapi.testclient import TestClient
from langgraph.checkpoint.memory import InMemorySaver
from langgraph.store.memory import InMemoryStore

from app.gateway.routers import threads
from deerflow.config.paths import Paths
from deerflow.persistence.thread_meta import InvalidMetadataFilterError
from deerflow.persistence.thread_meta.memory import THREADS_NS, MemoryThreadMetaStore

_ISO_TIMESTAMP_RE = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")


class _PermissiveThreadMetaStore(MemoryThreadMetaStore):
    """Memory store that skips user-id filtering for router tests.

    Owner isolation is exercised separately in
    ``test_memory_thread_meta_isolation.py``. Router tests need to drive
    the FastAPI surface end-to-end with a single fixed app user, but the
    stub auth middleware in ``_router_auth_helpers`` stamps a fresh UUID
    on every request, so the production filtering would reject every
    pre-seeded record. Bypass that filter so the test can focus on the
    timestamp wire format.
    """

    async def _get_owned_record(self, thread_id, user_id, method_name):  # type: ignore[override]
        item = await self._store.aget(THREADS_NS, thread_id)
        return dict(item.value) if item is not None else None

    async def check_access(self, thread_id, user_id, *, require_existing=False):  # type: ignore[override]
        item = await self._store.aget(THREADS_NS, thread_id)
        if item is None:
            return not require_existing
        return True

    async def create(self, thread_id, *, assistant_id=None, user_id=None, display_name=None, metadata=None):  # type: ignore[override]
        return await super().create(thread_id, assistant_id=assistant_id, user_id=None, display_name=display_name, metadata=metadata)

    async def search(self, *, metadata=None, status=None, limit=100, offset=0, user_id=None):  # type: ignore[override]
        return await super().search(metadata=metadata, status=status, limit=limit, offset=offset, user_id=None)


def _build_thread_app() -> tuple[FastAPI, InMemoryStore, InMemorySaver]:
    """Build a stub-authed FastAPI app wired with an in-memory ThreadMetaStore.

    The thread_store on ``app.state`` is a permissive subclass of
    ``MemoryThreadMetaStore`` so tests can drive ``/api/threads``
    end-to-end and pre-seed legacy records via the underlying BaseStore.

    Returns ``(app, store, checkpointer)`` for direct seeding/inspection.
    """
    app = make_authed_test_app()
    store = InMemoryStore()
    checkpointer = InMemorySaver()
    app.state.store = store
    app.state.checkpointer = checkpointer
    app.state.thread_store = _PermissiveThreadMetaStore(store)
    app.include_router(threads.router)
    return app, store, checkpointer


def test_delete_thread_data_removes_thread_directory(tmp_path):
    paths = Paths(tmp_path)
    thread_dir = paths.thread_dir("thread-cleanup")
    workspace = paths.sandbox_work_dir("thread-cleanup")
    uploads = paths.sandbox_uploads_dir("thread-cleanup")
    outputs = paths.sandbox_outputs_dir("thread-cleanup")

    for directory in [workspace, uploads, outputs]:
        directory.mkdir(parents=True, exist_ok=True)
    (workspace / "notes.txt").write_text("hello", encoding="utf-8")
    (uploads / "report.pdf").write_bytes(b"pdf")
    (outputs / "result.json").write_text("{}", encoding="utf-8")

    assert thread_dir.exists()

    response = threads._delete_thread_data("thread-cleanup", paths=paths)

    assert response.success is True
    assert not thread_dir.exists()


def test_delete_thread_data_is_idempotent_for_missing_directory(tmp_path):
    paths = Paths(tmp_path)

    response = threads._delete_thread_data("missing-thread", paths=paths)

    assert response.success is True
    assert not paths.thread_dir("missing-thread").exists()


def test_delete_thread_data_rejects_invalid_thread_id(tmp_path):
    paths = Paths(tmp_path)

    with pytest.raises(HTTPException) as exc_info:
        threads._delete_thread_data("../escape", paths=paths)

    assert exc_info.value.status_code == 422
    assert "Invalid thread_id" in exc_info.value.detail


def test_delete_thread_route_cleans_thread_directory(tmp_path):
    from deerflow.runtime.user_context import get_effective_user_id

    paths = Paths(tmp_path)
    user_id = get_effective_user_id()
    thread_dir = paths.thread_dir("thread-route", user_id=user_id)
    paths.sandbox_work_dir("thread-route", user_id=user_id).mkdir(parents=True, exist_ok=True)
    (paths.sandbox_work_dir("thread-route", user_id=user_id) / "notes.txt").write_text("hello", encoding="utf-8")

    app = make_authed_test_app()
    app.include_router(threads.router)

    with patch("app.gateway.routers.threads.get_paths", return_value=paths):
        with TestClient(app) as client:
            response = client.delete("/api/threads/thread-route")

    assert response.status_code == 200
    assert response.json() == {"success": True, "message": "Deleted local thread data for thread-route"}
    assert not thread_dir.exists()


def test_delete_thread_route_rejects_invalid_thread_id(tmp_path):
    paths = Paths(tmp_path)

    app = make_authed_test_app()
    app.include_router(threads.router)

    with patch("app.gateway.routers.threads.get_paths", return_value=paths):
        with TestClient(app) as client:
            response = client.delete("/api/threads/../escape")

    assert response.status_code == 404


def test_delete_thread_route_returns_422_for_route_safe_invalid_id(tmp_path):
    paths = Paths(tmp_path)

    app = make_authed_test_app()
    app.include_router(threads.router)

    with patch("app.gateway.routers.threads.get_paths", return_value=paths):
        with TestClient(app) as client:
            response = client.delete("/api/threads/thread.with.dot")

    assert response.status_code == 422
    assert "Invalid thread_id" in response.json()["detail"]


def test_delete_thread_data_returns_generic_500_error(tmp_path):
    paths = Paths(tmp_path)

    with (
        patch.object(paths, "delete_thread_dir", side_effect=OSError("/secret/path")),
        patch.object(threads.logger, "exception") as log_exception,
    ):
        with pytest.raises(HTTPException) as exc_info:
            threads._delete_thread_data("thread-cleanup", paths=paths)

    assert exc_info.value.status_code == 500
    assert exc_info.value.detail == "Failed to delete local thread data."
    assert "/secret/path" not in exc_info.value.detail
    log_exception.assert_called_once_with("Failed to delete thread data for %s", "thread-cleanup")


# ── Server-reserved metadata key stripping ──────────────────────────────────


def test_strip_reserved_metadata_removes_user_id():
    """Client-supplied user_id is dropped to prevent reflection attacks."""
    out = threads._strip_reserved_metadata({"user_id": "victim-id", "title": "ok"})
    assert out == {"title": "ok"}


def test_strip_reserved_metadata_passes_through_safe_keys():
    """Non-reserved keys are preserved verbatim."""
    md = {"title": "ok", "tags": ["a", "b"], "custom": {"x": 1}}
    assert threads._strip_reserved_metadata(md) == md


def test_strip_reserved_metadata_empty_input():
    """Empty / None metadata returns same object — no crash."""
    assert threads._strip_reserved_metadata({}) == {}


def test_strip_reserved_metadata_strips_all_reserved_keys():
    out = threads._strip_reserved_metadata({"user_id": "x", "keep": "me"})
    assert out == {"keep": "me"}


# ---------------------------------------------------------------------------
# ISO 8601 timestamp contract (issue #2594)
# ---------------------------------------------------------------------------
#
# Threads endpoints document ``created_at`` / ``updated_at`` as ISO
# timestamps and that is the format LangGraph Platform uses
# (``langgraph_sdk.schema.Thread.created_at: datetime`` JSON-encodes to
# ISO 8601). The tests below pin that contract end-to-end and also
# exercise the ``coerce_iso`` healing path for legacy unix-timestamp
# records written by older Gateway versions.


def test_create_thread_returns_iso_timestamps() -> None:
    app, _store, _checkpointer = _build_thread_app()

    with TestClient(app) as client:
        response = client.post("/api/threads", json={"metadata": {}})

    assert response.status_code == 200, response.text
    body = response.json()
    assert _ISO_TIMESTAMP_RE.match(body["created_at"]), body["created_at"]
    assert _ISO_TIMESTAMP_RE.match(body["updated_at"]), body["updated_at"]
    assert body["created_at"] == body["updated_at"]


def test_get_thread_returns_iso_for_legacy_unix_record() -> None:
    """A thread record written by older versions stores ``time.time()``
    floats. ``get_thread`` must transparently surface them as ISO so the
    frontend's ``new Date(...)`` parser does not break.
    """
    app, store, checkpointer = _build_thread_app()

    legacy_thread_id = "legacy-thread"
    legacy_ts = "1777252410.411327"

    async def _seed() -> None:
        await store.aput(
            THREADS_NS,
            legacy_thread_id,
            {
                "thread_id": legacy_thread_id,
                "status": "idle",
                "created_at": legacy_ts,
                "updated_at": legacy_ts,
                "metadata": {},
            },
        )
        from langgraph.checkpoint.base import empty_checkpoint

        await checkpointer.aput(
            {"configurable": {"thread_id": legacy_thread_id, "checkpoint_ns": ""}},
            empty_checkpoint(),
            {"step": -1, "source": "input", "writes": None, "parents": {}},
            {},
        )

    import asyncio

    asyncio.run(_seed())

    with TestClient(app) as client:
        response = client.get(f"/api/threads/{legacy_thread_id}")

    assert response.status_code == 200, response.text
    body = response.json()
    assert _ISO_TIMESTAMP_RE.match(body["created_at"]), body["created_at"]
    assert _ISO_TIMESTAMP_RE.match(body["updated_at"]), body["updated_at"]


def test_patch_thread_returns_iso_and_advances_updated_at() -> None:
    app, store, _checkpointer = _build_thread_app()
    thread_id = "patch-target"

    legacy_created = "1777000000.000000"
    legacy_updated = "1777000000.000000"

    async def _seed() -> None:
        await store.aput(
            THREADS_NS,
            thread_id,
            {
                "thread_id": thread_id,
                "status": "idle",
                "created_at": legacy_created,
                "updated_at": legacy_updated,
                "metadata": {"k": "v0"},
            },
        )

    import asyncio

    asyncio.run(_seed())

    with TestClient(app) as client:
        response = client.patch(f"/api/threads/{thread_id}", json={"metadata": {"k": "v1"}})

    assert response.status_code == 200, response.text
    body = response.json()
    assert _ISO_TIMESTAMP_RE.match(body["created_at"]), body["created_at"]
    assert _ISO_TIMESTAMP_RE.match(body["updated_at"]), body["updated_at"]
    # Patch issues a fresh ``updated_at`` via ``MemoryThreadMetaStore.update_metadata``,
    # so it must be > the migrated legacy ``created_at`` (both ISO strings
    # sort lexicographically by time when the format is consistent).
    assert body["updated_at"] > body["created_at"]
    assert body["metadata"] == {"k": "v1"}


def test_search_threads_normalizes_legacy_unix_seconds_to_iso() -> None:
    """``MemoryThreadMetaStore`` may hold legacy ``time.time()`` floats
    written by older Gateway versions. ``/search`` must surface them as
    ISO via ``coerce_iso`` so the frontend's ``new Date(...)`` parser
    does not break.
    """
    app, store, _checkpointer = _build_thread_app()

    async def _seed() -> None:
        # Legacy unix-second float (the literal value from issue #2594).
        await store.aput(
            THREADS_NS,
            "legacy",
            {
                "thread_id": "legacy",
                "status": "idle",
                "created_at": 1777000000.0,
                "updated_at": 1777000000.0,
                "metadata": {},
            },
        )
        # Modern ISO string, slightly later.
        await store.aput(
            THREADS_NS,
            "modern",
            {
                "thread_id": "modern",
                "status": "idle",
                "created_at": "2026-04-27T00:00:00+00:00",
                "updated_at": "2026-04-27T00:00:00+00:00",
                "metadata": {},
            },
        )

    import asyncio

    asyncio.run(_seed())

    with TestClient(app) as client:
        response = client.post("/api/threads/search", json={"limit": 10})

    assert response.status_code == 200, response.text
    items = response.json()
    assert {item["thread_id"] for item in items} == {"legacy", "modern"}
    for item in items:
        assert _ISO_TIMESTAMP_RE.match(item["created_at"]), item
        assert _ISO_TIMESTAMP_RE.match(item["updated_at"]), item


def test_memory_thread_meta_store_writes_iso_on_create() -> None:
    """``MemoryThreadMetaStore.create`` must emit ISO so newly created
    threads serialize correctly without depending on the router's
    ``coerce_iso`` heal path.
    """
    import asyncio

    store = InMemoryStore()
    repo = MemoryThreadMetaStore(store)

    async def _scenario() -> dict:
        await repo.create("fresh", user_id=None, metadata={"a": 1})
        record = (await store.aget(THREADS_NS, "fresh")).value
        return record

    record = asyncio.run(_scenario())
    assert _ISO_TIMESTAMP_RE.match(record["created_at"]), record
    assert _ISO_TIMESTAMP_RE.match(record["updated_at"]), record


def test_get_thread_state_returns_iso_for_legacy_checkpoint_metadata() -> None:
    """Checkpoints written by older Gateway versions stored
    ``created_at`` as a unix-second float in their metadata. The
    ``/state`` endpoint must surface that value as ISO so the frontend's
    ``new Date(...)`` parser does not break — same root cause as the
    thread-record bug fixed in #2594, but on the checkpoint side.
    """
    app, _store, checkpointer = _build_thread_app()
    thread_id = "legacy-state"

    async def _seed() -> None:
        from langgraph.checkpoint.base import empty_checkpoint

        await checkpointer.aput(
            {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}},
            empty_checkpoint(),
            {"step": -1, "source": "input", "writes": None, "parents": {}, "created_at": 1777252410.411327},
            {},
        )

    import asyncio

    asyncio.run(_seed())

    with TestClient(app) as client:
        response = client.get(f"/api/threads/{thread_id}/state")

    assert response.status_code == 200, response.text
    body = response.json()
    assert _ISO_TIMESTAMP_RE.match(body["created_at"]), body["created_at"]
    assert _ISO_TIMESTAMP_RE.match(body["checkpoint"]["ts"]), body["checkpoint"]


def test_get_thread_history_returns_iso_for_legacy_checkpoint_metadata() -> None:
    """``/history`` walks ``checkpointer.alist`` and emits one entry per
    checkpoint. Each entry's ``created_at`` must come out as ISO even if
    older checkpoints stored a unix-second float in their metadata.
    """
    app, _store, checkpointer = _build_thread_app()
    thread_id = "legacy-history"

    async def _seed() -> None:
        from langgraph.checkpoint.base import empty_checkpoint

        await checkpointer.aput(
            {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}},
            empty_checkpoint(),
            {"step": -1, "source": "input", "writes": None, "parents": {}, "created_at": 1777252410.411327},
            {},
        )

    import asyncio

    asyncio.run(_seed())

    with TestClient(app) as client:
        response = client.post(f"/api/threads/{thread_id}/history", json={"limit": 10})

    assert response.status_code == 200, response.text
    entries = response.json()
    assert entries, "expected at least one history entry"
    for entry in entries:
        assert _ISO_TIMESTAMP_RE.match(entry["created_at"]), entry


# ── Metadata filter validation at API boundary ────────────────────────────────


def test_search_threads_rejects_invalid_key_at_api_boundary() -> None:
    """Keys that don't match [A-Za-z0-9_-]+ are rejected by the Pydantic
    validator on ThreadSearchRequest.metadata — 422 from both backends.
    """
    app, _store, _checkpointer = _build_thread_app()

    with TestClient(app) as client:
        response = client.post("/api/threads/search", json={"metadata": {"bad;key": "x"}})

    assert response.status_code == 422


def test_search_threads_rejects_unsupported_value_type_at_api_boundary() -> None:
    """Value types outside (None, bool, int, float, str) are rejected."""
    app, _store, _checkpointer = _build_thread_app()

    with TestClient(app) as client:
        response = client.post("/api/threads/search", json={"metadata": {"env": ["a", "b"]}})

    assert response.status_code == 422


def test_search_threads_returns_400_for_backend_invalid_metadata_filter() -> None:
    """If the backend still raises InvalidMetadataFilterError (defense in
    depth), the handler surfaces it as HTTP 400.
    """
    app, _store, _checkpointer = _build_thread_app()
    thread_store = app.state.thread_store

    async def _raise(**kwargs):
        raise InvalidMetadataFilterError("rejected")

    with TestClient(app) as client:
        with patch.object(thread_store, "search", side_effect=_raise):
            response = client.post("/api/threads/search", json={"metadata": {"valid_key": "x"}})

    assert response.status_code == 400
    assert "rejected" in response.json()["detail"]


def test_search_threads_succeeds_with_valid_metadata() -> None:
    """Sanity check: valid metadata passes through without error."""
    app, _store, _checkpointer = _build_thread_app()

    with TestClient(app) as client:
        response = client.post("/api/threads/search", json={"metadata": {"env": "prod"}})

    assert response.status_code == 200