test(auth): add cross-user isolation test suite

10 tests exercising the storage-layer owner filter by manually
switching the user_context contextvar between two users. Verifies
the safety invariant:

  After a repository write with owner_id=A, a subsequent read with
  owner_id=B must not return the row, and vice versa.

Covers all 4 tables that own user-scoped data:

TC-API-17  threads_meta  — read, search, update, delete cross-user
TC-API-18  runs          — get, list_by_thread, delete cross-user
TC-API-19  run_events    — list_messages, list_events, count_messages,
                           delete_by_thread (CRITICAL: raw conversation
                           content leak vector)
TC-API-20  feedback      — get, list_by_run, delete cross-user

Plus two meta-tests verifying the sentinel pattern itself:
- AUTO + unset contextvar raises RuntimeError
- explicit owner_id=None bypasses the filter (migration escape hatch)

Architecture note
-----------------
These tests bypass the HTTP layer by design. The full chain
(cookie → middleware → contextvar → repository) is covered piecewise:

- test_auth_middleware.py: middleware sets contextvar from cookies
- test_owner_isolation.py: repositories enforce isolation when
  contextvar is set to different users

Together they prove the end-to-end safety property without the
ceremony of spinning up a full TestClient + in-memory DB for every
router endpoint.

Tests pass: 231 (full auth + persistence + isolation suite)
Lint: clean
This commit is contained in:
greatmengqi 2026-04-08 11:29:11 +08:00
parent 3aa3e37532
commit f0b065bef6

View File

@ -0,0 +1,465 @@
"""Cross-user isolation tests — non-negotiable safety gate.
Mirrors TC-API-17..20 from backend/docs/AUTH_TEST_PLAN.md. A failure
here means users can see each other's data; PR must not merge.
Architecture note
-----------------
These tests bypass the HTTP layer and exercise the storage-layer
owner filter directly by switching the ``user_context`` contextvar
between two users. The safety property under test is:
After a repository write with owner_id=A, a subsequent read with
owner_id=B must not return the row, and vice versa.
The HTTP layer is covered by test_auth_middleware.py, which proves
that a request cookie reaches the ``set_current_user`` call. Together
the two suites prove the full chain:
cookie middleware contextvar repository isolation
Every test in this file opts out of the autouse contextvar fixture
(``@pytest.mark.no_auto_user``) so it can set the contextvar to the
specific users it cares about.
"""
from __future__ import annotations
from types import SimpleNamespace
import pytest
from deerflow.runtime.user_context import (
reset_current_user,
set_current_user,
)
USER_A = SimpleNamespace(id="user-a", email="a@test.local")
USER_B = SimpleNamespace(id="user-b", email="b@test.local")
async def _make_engines(tmp_path):
"""Initialize the shared engine against a per-test SQLite DB.
Returns a cleanup coroutine the caller should await at the end.
"""
from deerflow.persistence.engine import close_engine, init_engine
url = f"sqlite+aiosqlite:///{tmp_path / 'isolation.db'}"
await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
return close_engine
def _as_user(user):
"""Context manager-like helper that set/reset the contextvar."""
class _Ctx:
def __enter__(self):
self._token = set_current_user(user)
return user
def __exit__(self, *exc):
reset_current_user(self._token)
return _Ctx()
# ── TC-API-17 — threads_meta isolation ────────────────────────────────────
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_thread_meta_cross_user_isolation(tmp_path):
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.thread_meta import ThreadMetaRepository
cleanup = await _make_engines(tmp_path)
try:
repo = ThreadMetaRepository(get_session_factory())
# User A creates a thread.
with _as_user(USER_A):
await repo.create("t-alpha", display_name="A's private thread")
# User B creates a thread.
with _as_user(USER_B):
await repo.create("t-beta", display_name="B's private thread")
# User A must see only A's thread.
with _as_user(USER_A):
a_view = await repo.get("t-alpha")
assert a_view is not None
assert a_view["display_name"] == "A's private thread"
# CRITICAL: User A must NOT see B's thread.
leaked = await repo.get("t-beta")
assert leaked is None, f"User A leaked User B's thread: {leaked}"
# Search should only return A's threads.
results = await repo.search()
assert [r["thread_id"] for r in results] == ["t-alpha"]
# User B must see only B's thread.
with _as_user(USER_B):
b_view = await repo.get("t-beta")
assert b_view is not None
assert b_view["display_name"] == "B's private thread"
leaked = await repo.get("t-alpha")
assert leaked is None, f"User B leaked User A's thread: {leaked}"
results = await repo.search()
assert [r["thread_id"] for r in results] == ["t-beta"]
finally:
await cleanup()
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_thread_meta_cross_user_mutation_denied(tmp_path):
"""User B cannot update or delete a thread owned by User A."""
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.thread_meta import ThreadMetaRepository
cleanup = await _make_engines(tmp_path)
try:
repo = ThreadMetaRepository(get_session_factory())
with _as_user(USER_A):
await repo.create("t-alpha", display_name="original")
# User B tries to rename A's thread — must be a no-op.
with _as_user(USER_B):
await repo.update_display_name("t-alpha", "hacked")
# Verify the row is unchanged from A's perspective.
with _as_user(USER_A):
row = await repo.get("t-alpha")
assert row is not None
assert row["display_name"] == "original"
# User B tries to delete A's thread — must be a no-op.
with _as_user(USER_B):
await repo.delete("t-alpha")
# A's thread still exists.
with _as_user(USER_A):
row = await repo.get("t-alpha")
assert row is not None
finally:
await cleanup()
# ── TC-API-18 — runs isolation ────────────────────────────────────────────
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_runs_cross_user_isolation(tmp_path):
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.run import RunRepository
cleanup = await _make_engines(tmp_path)
try:
repo = RunRepository(get_session_factory())
with _as_user(USER_A):
await repo.put("run-a1", thread_id="t-alpha")
await repo.put("run-a2", thread_id="t-alpha")
with _as_user(USER_B):
await repo.put("run-b1", thread_id="t-beta")
# User A must see only A's runs.
with _as_user(USER_A):
r = await repo.get("run-a1")
assert r is not None
assert r["run_id"] == "run-a1"
leaked = await repo.get("run-b1")
assert leaked is None, "User A leaked User B's run"
a_runs = await repo.list_by_thread("t-alpha")
assert {r["run_id"] for r in a_runs} == {"run-a1", "run-a2"}
# Listing B's thread from A's perspective: empty
empty = await repo.list_by_thread("t-beta")
assert empty == []
# User B must see only B's runs.
with _as_user(USER_B):
leaked = await repo.get("run-a1")
assert leaked is None, "User B leaked User A's run"
b_runs = await repo.list_by_thread("t-beta")
assert [r["run_id"] for r in b_runs] == ["run-b1"]
finally:
await cleanup()
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_runs_cross_user_delete_denied(tmp_path):
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.run import RunRepository
cleanup = await _make_engines(tmp_path)
try:
repo = RunRepository(get_session_factory())
with _as_user(USER_A):
await repo.put("run-a1", thread_id="t-alpha")
# User B tries to delete A's run — no-op.
with _as_user(USER_B):
await repo.delete("run-a1")
# A's run still exists.
with _as_user(USER_A):
row = await repo.get("run-a1")
assert row is not None
finally:
await cleanup()
# ── TC-API-19 — run_events isolation (CRITICAL: content leak) ─────────────
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_run_events_cross_user_isolation(tmp_path):
"""run_events holds raw conversation content — most sensitive leak vector."""
from deerflow.persistence.engine import get_session_factory
from deerflow.runtime.events.store.db import DbRunEventStore
cleanup = await _make_engines(tmp_path)
try:
store = DbRunEventStore(get_session_factory())
with _as_user(USER_A):
await store.put(
thread_id="t-alpha",
run_id="run-a1",
event_type="human_message",
category="message",
content="User A private question",
)
await store.put(
thread_id="t-alpha",
run_id="run-a1",
event_type="ai_message",
category="message",
content="User A private answer",
)
with _as_user(USER_B):
await store.put(
thread_id="t-beta",
run_id="run-b1",
event_type="human_message",
category="message",
content="User B private question",
)
# User A must see only A's events — CRITICAL.
with _as_user(USER_A):
msgs = await store.list_messages("t-alpha")
contents = [m["content"] for m in msgs]
assert "User A private question" in contents
assert "User A private answer" in contents
# CRITICAL: User B's content must not appear.
assert "User B private question" not in contents
# Attempt to read B's thread by guessing thread_id.
leaked = await store.list_messages("t-beta")
assert leaked == [], f"User A leaked User B's messages: {leaked}"
leaked_events = await store.list_events("t-beta", "run-b1")
assert leaked_events == [], "User A leaked User B's events"
# count_messages must also be zero for B's thread from A's view.
count = await store.count_messages("t-beta")
assert count == 0
# User B must see only B's events.
with _as_user(USER_B):
msgs = await store.list_messages("t-beta")
contents = [m["content"] for m in msgs]
assert "User B private question" in contents
assert "User A private question" not in contents
assert "User A private answer" not in contents
count = await store.count_messages("t-alpha")
assert count == 0
finally:
await cleanup()
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_run_events_cross_user_delete_denied(tmp_path):
"""User B cannot delete User A's event stream."""
from deerflow.persistence.engine import get_session_factory
from deerflow.runtime.events.store.db import DbRunEventStore
cleanup = await _make_engines(tmp_path)
try:
store = DbRunEventStore(get_session_factory())
with _as_user(USER_A):
await store.put(
thread_id="t-alpha",
run_id="run-a1",
event_type="human_message",
category="message",
content="hello",
)
# User B tries to wipe A's thread events.
with _as_user(USER_B):
removed = await store.delete_by_thread("t-alpha")
assert removed == 0, f"User B deleted {removed} of User A's events"
# A's events still exist.
with _as_user(USER_A):
count = await store.count_messages("t-alpha")
assert count == 1
finally:
await cleanup()
# ── TC-API-20 — feedback isolation ────────────────────────────────────────
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_feedback_cross_user_isolation(tmp_path):
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.feedback import FeedbackRepository
cleanup = await _make_engines(tmp_path)
try:
repo = FeedbackRepository(get_session_factory())
# User A submits positive feedback.
with _as_user(USER_A):
a_feedback = await repo.create(
run_id="run-a1",
thread_id="t-alpha",
rating=1,
comment="A liked this",
)
# User B submits negative feedback.
with _as_user(USER_B):
b_feedback = await repo.create(
run_id="run-b1",
thread_id="t-beta",
rating=-1,
comment="B disliked this",
)
# User A must see only A's feedback.
with _as_user(USER_A):
retrieved = await repo.get(a_feedback["feedback_id"])
assert retrieved is not None
assert retrieved["comment"] == "A liked this"
# CRITICAL: cannot read B's feedback by id.
leaked = await repo.get(b_feedback["feedback_id"])
assert leaked is None, "User A leaked User B's feedback"
# list_by_run for B's run must be empty.
empty = await repo.list_by_run("t-beta", "run-b1")
assert empty == []
# User B must see only B's feedback.
with _as_user(USER_B):
leaked = await repo.get(a_feedback["feedback_id"])
assert leaked is None, "User B leaked User A's feedback"
b_list = await repo.list_by_run("t-beta", "run-b1")
assert len(b_list) == 1
assert b_list[0]["comment"] == "B disliked this"
finally:
await cleanup()
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_feedback_cross_user_delete_denied(tmp_path):
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.feedback import FeedbackRepository
cleanup = await _make_engines(tmp_path)
try:
repo = FeedbackRepository(get_session_factory())
with _as_user(USER_A):
fb = await repo.create(run_id="run-a1", thread_id="t-alpha", rating=1)
# User B tries to delete A's feedback — must return False (no-op).
with _as_user(USER_B):
deleted = await repo.delete(fb["feedback_id"])
assert deleted is False, "User B deleted User A's feedback"
# A's feedback still retrievable.
with _as_user(USER_A):
row = await repo.get(fb["feedback_id"])
assert row is not None
finally:
await cleanup()
# ── Regression: AUTO sentinel without contextvar must raise ───────────────
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_repository_without_context_raises(tmp_path):
"""Defense-in-depth: calling repo methods without a user context errors."""
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.thread_meta import ThreadMetaRepository
cleanup = await _make_engines(tmp_path)
try:
repo = ThreadMetaRepository(get_session_factory())
# Contextvar is explicitly unset under @pytest.mark.no_auto_user.
with pytest.raises(RuntimeError, match="no user context is set"):
await repo.get("anything")
finally:
await cleanup()
# ── Escape hatch: explicit owner_id=None bypasses filter (for migration) ──
@pytest.mark.anyio
@pytest.mark.no_auto_user
async def test_explicit_none_bypasses_filter(tmp_path):
"""Migration scripts pass owner_id=None to see all rows regardless of owner."""
from deerflow.persistence.engine import get_session_factory
from deerflow.persistence.thread_meta import ThreadMetaRepository
cleanup = await _make_engines(tmp_path)
try:
repo = ThreadMetaRepository(get_session_factory())
# Seed data as two different users.
with _as_user(USER_A):
await repo.create("t-alpha")
with _as_user(USER_B):
await repo.create("t-beta")
# Migration-style read: no contextvar, explicit None bypass.
all_rows = await repo.search(owner_id=None)
thread_ids = {r["thread_id"] for r in all_rows}
assert thread_ids == {"t-alpha", "t-beta"}
# Explicit get with None does not apply the filter either.
row_a = await repo.get("t-alpha", owner_id=None)
assert row_a is not None
row_b = await repo.get("t-beta", owner_id=None)
assert row_b is not None
finally:
await cleanup()