test(auth): add cross-user isolation test suite

10 tests exercising the storage-layer owner filter by manually switching the user_context contextvar between two users. Verifies the safety invariant: After a repository write with owner_id=A, a subsequent read with owner_id=B must not return the row, and vice versa. Covers all 4 tables that own user-scoped data: TC-API-17 threads_meta — read, search, update, delete cross-user TC-API-18 runs — get, list_by_thread, delete cross-user TC-API-19 run_events — list_messages, list_events, count_messages, delete_by_thread (CRITICAL: raw conversation content leak vector) TC-API-20 feedback — get, list_by_run, delete cross-user Plus two meta-tests verifying the sentinel pattern itself: - AUTO + unset contextvar raises RuntimeError - explicit owner_id=None bypasses the filter (migration escape hatch) Architecture note ----------------- These tests bypass the HTTP layer by design. The full chain (cookie → middleware → contextvar → repository) is covered piecewise: - test_auth_middleware.py: middleware sets contextvar from cookies - test_owner_isolation.py: repositories enforce isolation when contextvar is set to different users Together they prove the end-to-end safety property without the ceremony of spinning up a full TestClient + in-memory DB for every router endpoint. Tests pass: 231 (full auth + persistence + isolation suite) Lint: clean
2026-04-25 11:18:22 +00:00 · 2026-04-08 11:29:11 +08:00 · 2026-04-08 11:29:11 +08:00 · f0b065bef6
commit f0b065bef6
parent 3aa3e37532
1 changed files with 465 additions and 0 deletions
--- a/backend/tests/test_owner_isolation.py
+++ b/backend/tests/test_owner_isolation.py
@ -0,0 +1,465 @@
+"""Cross-user isolation tests — non-negotiable safety gate.
+
+Mirrors TC-API-17..20 from backend/docs/AUTH_TEST_PLAN.md. A failure
+here means users can see each other's data; PR must not merge.
+
+Architecture note
+-----------------
+These tests bypass the HTTP layer and exercise the storage-layer
+owner filter directly by switching the ``user_context`` contextvar
+between two users. The safety property under test is:
+
+  After a repository write with owner_id=A, a subsequent read with
+  owner_id=B must not return the row, and vice versa.
+
+The HTTP layer is covered by test_auth_middleware.py, which proves
+that a request cookie reaches the ``set_current_user`` call. Together
+the two suites prove the full chain:
+
+  cookie → middleware → contextvar → repository → isolation
+
+Every test in this file opts out of the autouse contextvar fixture
+(``@pytest.mark.no_auto_user``) so it can set the contextvar to the
+specific users it cares about.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from deerflow.runtime.user_context import (
+    reset_current_user,
+    set_current_user,
+)
+
+USER_A = SimpleNamespace(id="user-a", email="a@test.local")
+USER_B = SimpleNamespace(id="user-b", email="b@test.local")
+
+
+async def _make_engines(tmp_path):
+    """Initialize the shared engine against a per-test SQLite DB.
+
+    Returns a cleanup coroutine the caller should await at the end.
+    """
+    from deerflow.persistence.engine import close_engine, init_engine
+
+    url = f"sqlite+aiosqlite:///{tmp_path / 'isolation.db'}"
+    await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+    return close_engine
+
+
+def _as_user(user):
+    """Context manager-like helper that set/reset the contextvar."""
+
+    class _Ctx:
+        def __enter__(self):
+            self._token = set_current_user(user)
+            return user
+
+        def __exit__(self, *exc):
+            reset_current_user(self._token)
+
+    return _Ctx()
+
+
+# ── TC-API-17 — threads_meta isolation ────────────────────────────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_thread_meta_cross_user_isolation(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+
+        # User A creates a thread.
+        with _as_user(USER_A):
+            await repo.create("t-alpha", display_name="A's private thread")
+
+        # User B creates a thread.
+        with _as_user(USER_B):
+            await repo.create("t-beta", display_name="B's private thread")
+
+        # User A must see only A's thread.
+        with _as_user(USER_A):
+            a_view = await repo.get("t-alpha")
+            assert a_view is not None
+            assert a_view["display_name"] == "A's private thread"
+
+            # CRITICAL: User A must NOT see B's thread.
+            leaked = await repo.get("t-beta")
+            assert leaked is None, f"User A leaked User B's thread: {leaked}"
+
+            # Search should only return A's threads.
+            results = await repo.search()
+            assert [r["thread_id"] for r in results] == ["t-alpha"]
+
+        # User B must see only B's thread.
+        with _as_user(USER_B):
+            b_view = await repo.get("t-beta")
+            assert b_view is not None
+            assert b_view["display_name"] == "B's private thread"
+
+            leaked = await repo.get("t-alpha")
+            assert leaked is None, f"User B leaked User A's thread: {leaked}"
+
+            results = await repo.search()
+            assert [r["thread_id"] for r in results] == ["t-beta"]
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_thread_meta_cross_user_mutation_denied(tmp_path):
+    """User B cannot update or delete a thread owned by User A."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            await repo.create("t-alpha", display_name="original")
+
+        # User B tries to rename A's thread — must be a no-op.
+        with _as_user(USER_B):
+            await repo.update_display_name("t-alpha", "hacked")
+
+        # Verify the row is unchanged from A's perspective.
+        with _as_user(USER_A):
+            row = await repo.get("t-alpha")
+            assert row is not None
+            assert row["display_name"] == "original"
+
+        # User B tries to delete A's thread — must be a no-op.
+        with _as_user(USER_B):
+            await repo.delete("t-alpha")
+
+        # A's thread still exists.
+        with _as_user(USER_A):
+            row = await repo.get("t-alpha")
+            assert row is not None
+    finally:
+        await cleanup()
+
+
+# ── TC-API-18 — runs isolation ────────────────────────────────────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_runs_cross_user_isolation(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.run import RunRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = RunRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            await repo.put("run-a1", thread_id="t-alpha")
+            await repo.put("run-a2", thread_id="t-alpha")
+
+        with _as_user(USER_B):
+            await repo.put("run-b1", thread_id="t-beta")
+
+        # User A must see only A's runs.
+        with _as_user(USER_A):
+            r = await repo.get("run-a1")
+            assert r is not None
+            assert r["run_id"] == "run-a1"
+
+            leaked = await repo.get("run-b1")
+            assert leaked is None, "User A leaked User B's run"
+
+            a_runs = await repo.list_by_thread("t-alpha")
+            assert {r["run_id"] for r in a_runs} == {"run-a1", "run-a2"}
+
+            # Listing B's thread from A's perspective: empty
+            empty = await repo.list_by_thread("t-beta")
+            assert empty == []
+
+        # User B must see only B's runs.
+        with _as_user(USER_B):
+            leaked = await repo.get("run-a1")
+            assert leaked is None, "User B leaked User A's run"
+
+            b_runs = await repo.list_by_thread("t-beta")
+            assert [r["run_id"] for r in b_runs] == ["run-b1"]
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_runs_cross_user_delete_denied(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.run import RunRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = RunRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            await repo.put("run-a1", thread_id="t-alpha")
+
+        # User B tries to delete A's run — no-op.
+        with _as_user(USER_B):
+            await repo.delete("run-a1")
+
+        # A's run still exists.
+        with _as_user(USER_A):
+            row = await repo.get("run-a1")
+            assert row is not None
+    finally:
+        await cleanup()
+
+
+# ── TC-API-19 — run_events isolation (CRITICAL: content leak) ─────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_run_events_cross_user_isolation(tmp_path):
+    """run_events holds raw conversation content — most sensitive leak vector."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.runtime.events.store.db import DbRunEventStore
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        store = DbRunEventStore(get_session_factory())
+
+        with _as_user(USER_A):
+            await store.put(
+                thread_id="t-alpha",
+                run_id="run-a1",
+                event_type="human_message",
+                category="message",
+                content="User A private question",
+            )
+            await store.put(
+                thread_id="t-alpha",
+                run_id="run-a1",
+                event_type="ai_message",
+                category="message",
+                content="User A private answer",
+            )
+
+        with _as_user(USER_B):
+            await store.put(
+                thread_id="t-beta",
+                run_id="run-b1",
+                event_type="human_message",
+                category="message",
+                content="User B private question",
+            )
+
+        # User A must see only A's events — CRITICAL.
+        with _as_user(USER_A):
+            msgs = await store.list_messages("t-alpha")
+            contents = [m["content"] for m in msgs]
+            assert "User A private question" in contents
+            assert "User A private answer" in contents
+            # CRITICAL: User B's content must not appear.
+            assert "User B private question" not in contents
+
+            # Attempt to read B's thread by guessing thread_id.
+            leaked = await store.list_messages("t-beta")
+            assert leaked == [], f"User A leaked User B's messages: {leaked}"
+
+            leaked_events = await store.list_events("t-beta", "run-b1")
+            assert leaked_events == [], "User A leaked User B's events"
+
+            # count_messages must also be zero for B's thread from A's view.
+            count = await store.count_messages("t-beta")
+            assert count == 0
+
+        # User B must see only B's events.
+        with _as_user(USER_B):
+            msgs = await store.list_messages("t-beta")
+            contents = [m["content"] for m in msgs]
+            assert "User B private question" in contents
+            assert "User A private question" not in contents
+            assert "User A private answer" not in contents
+
+            count = await store.count_messages("t-alpha")
+            assert count == 0
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_run_events_cross_user_delete_denied(tmp_path):
+    """User B cannot delete User A's event stream."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.runtime.events.store.db import DbRunEventStore
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        store = DbRunEventStore(get_session_factory())
+
+        with _as_user(USER_A):
+            await store.put(
+                thread_id="t-alpha",
+                run_id="run-a1",
+                event_type="human_message",
+                category="message",
+                content="hello",
+            )
+
+        # User B tries to wipe A's thread events.
+        with _as_user(USER_B):
+            removed = await store.delete_by_thread("t-alpha")
+            assert removed == 0, f"User B deleted {removed} of User A's events"
+
+        # A's events still exist.
+        with _as_user(USER_A):
+            count = await store.count_messages("t-alpha")
+            assert count == 1
+    finally:
+        await cleanup()
+
+
+# ── TC-API-20 — feedback isolation ────────────────────────────────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_feedback_cross_user_isolation(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.feedback import FeedbackRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = FeedbackRepository(get_session_factory())
+
+        # User A submits positive feedback.
+        with _as_user(USER_A):
+            a_feedback = await repo.create(
+                run_id="run-a1",
+                thread_id="t-alpha",
+                rating=1,
+                comment="A liked this",
+            )
+
+        # User B submits negative feedback.
+        with _as_user(USER_B):
+            b_feedback = await repo.create(
+                run_id="run-b1",
+                thread_id="t-beta",
+                rating=-1,
+                comment="B disliked this",
+            )
+
+        # User A must see only A's feedback.
+        with _as_user(USER_A):
+            retrieved = await repo.get(a_feedback["feedback_id"])
+            assert retrieved is not None
+            assert retrieved["comment"] == "A liked this"
+
+            # CRITICAL: cannot read B's feedback by id.
+            leaked = await repo.get(b_feedback["feedback_id"])
+            assert leaked is None, "User A leaked User B's feedback"
+
+            # list_by_run for B's run must be empty.
+            empty = await repo.list_by_run("t-beta", "run-b1")
+            assert empty == []
+
+        # User B must see only B's feedback.
+        with _as_user(USER_B):
+            leaked = await repo.get(a_feedback["feedback_id"])
+            assert leaked is None, "User B leaked User A's feedback"
+
+            b_list = await repo.list_by_run("t-beta", "run-b1")
+            assert len(b_list) == 1
+            assert b_list[0]["comment"] == "B disliked this"
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_feedback_cross_user_delete_denied(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.feedback import FeedbackRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = FeedbackRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            fb = await repo.create(run_id="run-a1", thread_id="t-alpha", rating=1)
+
+        # User B tries to delete A's feedback — must return False (no-op).
+        with _as_user(USER_B):
+            deleted = await repo.delete(fb["feedback_id"])
+            assert deleted is False, "User B deleted User A's feedback"
+
+        # A's feedback still retrievable.
+        with _as_user(USER_A):
+            row = await repo.get(fb["feedback_id"])
+            assert row is not None
+    finally:
+        await cleanup()
+
+
+# ── Regression: AUTO sentinel without contextvar must raise ───────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_repository_without_context_raises(tmp_path):
+    """Defense-in-depth: calling repo methods without a user context errors."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+        # Contextvar is explicitly unset under @pytest.mark.no_auto_user.
+        with pytest.raises(RuntimeError, match="no user context is set"):
+            await repo.get("anything")
+    finally:
+        await cleanup()
+
+
+# ── Escape hatch: explicit owner_id=None bypasses filter (for migration) ──
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_explicit_none_bypasses_filter(tmp_path):
+    """Migration scripts pass owner_id=None to see all rows regardless of owner."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+
+        # Seed data as two different users.
+        with _as_user(USER_A):
+            await repo.create("t-alpha")
+        with _as_user(USER_B):
+            await repo.create("t-beta")
+
+        # Migration-style read: no contextvar, explicit None bypass.
+        all_rows = await repo.search(owner_id=None)
+        thread_ids = {r["thread_id"] for r in all_rows}
+        assert thread_ids == {"t-alpha", "t-beta"}
+
+        # Explicit get with None does not apply the filter either.
+        row_a = await repo.get("t-alpha", owner_id=None)
+        assert row_a is not None
+        row_b = await repo.get("t-beta", owner_id=None)
+        assert row_b is not None
+    finally:
+        await cleanup()