mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-01 14:28:28 +00:00
Squashes 25 PR commits onto current main. AppConfig becomes a pure value object with no ambient lookup. Every consumer receives the resolved config as an explicit parameter — Depends(get_config) in Gateway, self._app_config in DeerFlowClient, runtime.context.app_config in agent runs, AppConfig.from_file() at the LangGraph Server registration boundary. Phase 1 — frozen data + typed context - All config models (AppConfig, MemoryConfig, DatabaseConfig, …) become frozen=True; no sub-module globals. - AppConfig.from_file() is pure (no side-effect singleton loaders). - Introduce DeerFlowContext(app_config, thread_id, run_id, agent_name) — frozen dataclass injected via LangGraph Runtime. - Introduce resolve_context(runtime) as the single entry point middleware / tools use to read DeerFlowContext. Phase 2 — pure explicit parameter passing - Gateway: app.state.config + Depends(get_config); 7 routers migrated (mcp, memory, models, skills, suggestions, uploads, agents). - DeerFlowClient: __init__(config=...) captures config locally. - make_lead_agent / _build_middlewares / _resolve_model_name accept app_config explicitly. - RunContext.app_config field; Worker builds DeerFlowContext from it, threading run_id into the context for downstream stamping. - Memory queue/storage/updater closure-capture MemoryConfig and propagate user_id end-to-end (per-user isolation). - Sandbox/skills/community/factories/tools thread app_config. - resolve_context() rejects non-typed runtime.context. - Test suite migrated off AppConfig.current() monkey-patches. - AppConfig.current() classmethod deleted. Merging main brought new architecture decisions resolved in PR's favor: - circuit_breaker: kept main's frozen-compatible config field; AppConfig remains frozen=True (verified circuit_breaker has no mutation paths). - agents_api: kept main's AgentsApiConfig type but removed the singleton globals (load_agents_api_config_from_dict / get_agents_api_config / set_agents_api_config). 8 routes in agents.py now read via Depends(get_config). - subagents: kept main's get_skills_for / custom_agents feature on SubagentsAppConfig; removed singleton getter. registry.py now reads app_config.subagents directly. - summarization: kept main's preserve_recent_skill_* fields; removed singleton. - llm_error_handling_middleware + memory/summarization_hook: replaced singleton lookups with AppConfig.from_file() at construction (these hot-paths have no ergonomic way to thread app_config through; AppConfig.from_file is a pure load). - worker.py + thread_data_middleware.py: DeerFlowContext.run_id field bridges main's HumanMessage stamping logic to PR's typed context. Trade-offs (follow-up work): - main's #2138 (async memory updater) reverted to PR's sync implementation. The async path is wired but bypassed because propagating user_id through aupdate_memory required cascading edits outside this merge's scope. - tests/test_subagent_skills_config.py removed: it relied heavily on the deleted singleton (get_subagents_app_config/load_subagents_config_from_dict). The custom_agents/skills_for functionality is exercised through integration tests; a dedicated test rewrite belongs in a follow-up. Verification: backend test suite — 2560 passed, 4 skipped, 84 failures. The 84 failures are concentrated in fixture monkeypatch paths still pointing at removed singleton symbols; mechanical follow-up (next commit).
466 lines
16 KiB
Python
466 lines
16 KiB
Python
"""Cross-user isolation tests — non-negotiable safety gate.
|
|
|
|
Mirrors TC-API-17..20 from backend/docs/AUTH_TEST_PLAN.md. A failure
|
|
here means users can see each other's data; PR must not merge.
|
|
|
|
Architecture note
|
|
-----------------
|
|
These tests bypass the HTTP layer and exercise the storage-layer
|
|
owner filter directly by switching the ``user_context`` contextvar
|
|
between two users. The safety property under test is:
|
|
|
|
After a repository write with user_id=A, a subsequent read with
|
|
user_id=B must not return the row, and vice versa.
|
|
|
|
The HTTP layer is covered by test_auth_middleware.py, which proves
|
|
that a request cookie reaches the ``set_current_user`` call. Together
|
|
the two suites prove the full chain:
|
|
|
|
cookie → middleware → contextvar → repository → isolation
|
|
|
|
Every test in this file opts out of the autouse contextvar fixture
|
|
(``@pytest.mark.no_auto_user``) so it can set the contextvar to the
|
|
specific users it cares about.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from types import SimpleNamespace
|
|
|
|
import pytest
|
|
|
|
from deerflow.runtime.user_context import (
|
|
reset_current_user,
|
|
set_current_user,
|
|
)
|
|
|
|
USER_A = SimpleNamespace(id="user-a", email="a@test.local")
|
|
USER_B = SimpleNamespace(id="user-b", email="b@test.local")
|
|
|
|
|
|
async def _make_engines(tmp_path):
|
|
"""Initialize the shared engine against a per-test SQLite DB.
|
|
|
|
Returns a cleanup coroutine the caller should await at the end.
|
|
"""
|
|
from deerflow.persistence.engine import close_engine, init_engine
|
|
|
|
url = f"sqlite+aiosqlite:///{tmp_path / 'isolation.db'}"
|
|
await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
|
|
return close_engine
|
|
|
|
|
|
def _as_user(user):
|
|
"""Context manager-like helper that set/reset the contextvar."""
|
|
|
|
class _Ctx:
|
|
def __enter__(self):
|
|
self._token = set_current_user(user)
|
|
return user
|
|
|
|
def __exit__(self, *exc):
|
|
reset_current_user(self._token)
|
|
|
|
return _Ctx()
|
|
|
|
|
|
# ── TC-API-17 — threads_meta isolation ────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_thread_meta_cross_user_isolation(tmp_path):
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.thread_meta import ThreadMetaRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = ThreadMetaRepository(get_session_factory())
|
|
|
|
# User A creates a thread.
|
|
with _as_user(USER_A):
|
|
await repo.create("t-alpha", display_name="A's private thread")
|
|
|
|
# User B creates a thread.
|
|
with _as_user(USER_B):
|
|
await repo.create("t-beta", display_name="B's private thread")
|
|
|
|
# User A must see only A's thread.
|
|
with _as_user(USER_A):
|
|
a_view = await repo.get("t-alpha")
|
|
assert a_view is not None
|
|
assert a_view["display_name"] == "A's private thread"
|
|
|
|
# CRITICAL: User A must NOT see B's thread.
|
|
leaked = await repo.get("t-beta")
|
|
assert leaked is None, f"User A leaked User B's thread: {leaked}"
|
|
|
|
# Search should only return A's threads.
|
|
results = await repo.search()
|
|
assert [r["thread_id"] for r in results] == ["t-alpha"]
|
|
|
|
# User B must see only B's thread.
|
|
with _as_user(USER_B):
|
|
b_view = await repo.get("t-beta")
|
|
assert b_view is not None
|
|
assert b_view["display_name"] == "B's private thread"
|
|
|
|
leaked = await repo.get("t-alpha")
|
|
assert leaked is None, f"User B leaked User A's thread: {leaked}"
|
|
|
|
results = await repo.search()
|
|
assert [r["thread_id"] for r in results] == ["t-beta"]
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_thread_meta_cross_user_mutation_denied(tmp_path):
|
|
"""User B cannot update or delete a thread owned by User A."""
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.thread_meta import ThreadMetaRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = ThreadMetaRepository(get_session_factory())
|
|
|
|
with _as_user(USER_A):
|
|
await repo.create("t-alpha", display_name="original")
|
|
|
|
# User B tries to rename A's thread — must be a no-op.
|
|
with _as_user(USER_B):
|
|
await repo.update_display_name("t-alpha", "hacked")
|
|
|
|
# Verify the row is unchanged from A's perspective.
|
|
with _as_user(USER_A):
|
|
row = await repo.get("t-alpha")
|
|
assert row is not None
|
|
assert row["display_name"] == "original"
|
|
|
|
# User B tries to delete A's thread — must be a no-op.
|
|
with _as_user(USER_B):
|
|
await repo.delete("t-alpha")
|
|
|
|
# A's thread still exists.
|
|
with _as_user(USER_A):
|
|
row = await repo.get("t-alpha")
|
|
assert row is not None
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
# ── TC-API-18 — runs isolation ────────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_runs_cross_user_isolation(tmp_path):
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.run import RunRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = RunRepository(get_session_factory())
|
|
|
|
with _as_user(USER_A):
|
|
await repo.put("run-a1", thread_id="t-alpha")
|
|
await repo.put("run-a2", thread_id="t-alpha")
|
|
|
|
with _as_user(USER_B):
|
|
await repo.put("run-b1", thread_id="t-beta")
|
|
|
|
# User A must see only A's runs.
|
|
with _as_user(USER_A):
|
|
r = await repo.get("run-a1")
|
|
assert r is not None
|
|
assert r["run_id"] == "run-a1"
|
|
|
|
leaked = await repo.get("run-b1")
|
|
assert leaked is None, "User A leaked User B's run"
|
|
|
|
a_runs = await repo.list_by_thread("t-alpha")
|
|
assert {r["run_id"] for r in a_runs} == {"run-a1", "run-a2"}
|
|
|
|
# Listing B's thread from A's perspective: empty
|
|
empty = await repo.list_by_thread("t-beta")
|
|
assert empty == []
|
|
|
|
# User B must see only B's runs.
|
|
with _as_user(USER_B):
|
|
leaked = await repo.get("run-a1")
|
|
assert leaked is None, "User B leaked User A's run"
|
|
|
|
b_runs = await repo.list_by_thread("t-beta")
|
|
assert [r["run_id"] for r in b_runs] == ["run-b1"]
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_runs_cross_user_delete_denied(tmp_path):
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.run import RunRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = RunRepository(get_session_factory())
|
|
|
|
with _as_user(USER_A):
|
|
await repo.put("run-a1", thread_id="t-alpha")
|
|
|
|
# User B tries to delete A's run — no-op.
|
|
with _as_user(USER_B):
|
|
await repo.delete("run-a1")
|
|
|
|
# A's run still exists.
|
|
with _as_user(USER_A):
|
|
row = await repo.get("run-a1")
|
|
assert row is not None
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
# ── TC-API-19 — run_events isolation (CRITICAL: content leak) ─────────────
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_run_events_cross_user_isolation(tmp_path):
|
|
"""run_events holds raw conversation content — most sensitive leak vector."""
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.runtime.events.store.db import DbRunEventStore
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
store = DbRunEventStore(get_session_factory())
|
|
|
|
with _as_user(USER_A):
|
|
await store.put(
|
|
thread_id="t-alpha",
|
|
run_id="run-a1",
|
|
event_type="human_message",
|
|
category="message",
|
|
content="User A private question",
|
|
)
|
|
await store.put(
|
|
thread_id="t-alpha",
|
|
run_id="run-a1",
|
|
event_type="ai_message",
|
|
category="message",
|
|
content="User A private answer",
|
|
)
|
|
|
|
with _as_user(USER_B):
|
|
await store.put(
|
|
thread_id="t-beta",
|
|
run_id="run-b1",
|
|
event_type="human_message",
|
|
category="message",
|
|
content="User B private question",
|
|
)
|
|
|
|
# User A must see only A's events — CRITICAL.
|
|
with _as_user(USER_A):
|
|
msgs = await store.list_messages("t-alpha")
|
|
contents = [m["content"] for m in msgs]
|
|
assert "User A private question" in contents
|
|
assert "User A private answer" in contents
|
|
# CRITICAL: User B's content must not appear.
|
|
assert "User B private question" not in contents
|
|
|
|
# Attempt to read B's thread by guessing thread_id.
|
|
leaked = await store.list_messages("t-beta")
|
|
assert leaked == [], f"User A leaked User B's messages: {leaked}"
|
|
|
|
leaked_events = await store.list_events("t-beta", "run-b1")
|
|
assert leaked_events == [], "User A leaked User B's events"
|
|
|
|
# count_messages must also be zero for B's thread from A's view.
|
|
count = await store.count_messages("t-beta")
|
|
assert count == 0
|
|
|
|
# User B must see only B's events.
|
|
with _as_user(USER_B):
|
|
msgs = await store.list_messages("t-beta")
|
|
contents = [m["content"] for m in msgs]
|
|
assert "User B private question" in contents
|
|
assert "User A private question" not in contents
|
|
assert "User A private answer" not in contents
|
|
|
|
count = await store.count_messages("t-alpha")
|
|
assert count == 0
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_run_events_cross_user_delete_denied(tmp_path):
|
|
"""User B cannot delete User A's event stream."""
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.runtime.events.store.db import DbRunEventStore
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
store = DbRunEventStore(get_session_factory())
|
|
|
|
with _as_user(USER_A):
|
|
await store.put(
|
|
thread_id="t-alpha",
|
|
run_id="run-a1",
|
|
event_type="human_message",
|
|
category="message",
|
|
content="hello",
|
|
)
|
|
|
|
# User B tries to wipe A's thread events.
|
|
with _as_user(USER_B):
|
|
removed = await store.delete_by_thread("t-alpha")
|
|
assert removed == 0, f"User B deleted {removed} of User A's events"
|
|
|
|
# A's events still exist.
|
|
with _as_user(USER_A):
|
|
count = await store.count_messages("t-alpha")
|
|
assert count == 1
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
# ── TC-API-20 — feedback isolation ────────────────────────────────────────
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_feedback_cross_user_isolation(tmp_path):
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.feedback import FeedbackRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = FeedbackRepository(get_session_factory())
|
|
|
|
# User A submits positive feedback.
|
|
with _as_user(USER_A):
|
|
a_feedback = await repo.create(
|
|
run_id="run-a1",
|
|
thread_id="t-alpha",
|
|
rating=1,
|
|
comment="A liked this",
|
|
)
|
|
|
|
# User B submits negative feedback.
|
|
with _as_user(USER_B):
|
|
b_feedback = await repo.create(
|
|
run_id="run-b1",
|
|
thread_id="t-beta",
|
|
rating=-1,
|
|
comment="B disliked this",
|
|
)
|
|
|
|
# User A must see only A's feedback.
|
|
with _as_user(USER_A):
|
|
retrieved = await repo.get(a_feedback["feedback_id"])
|
|
assert retrieved is not None
|
|
assert retrieved["comment"] == "A liked this"
|
|
|
|
# CRITICAL: cannot read B's feedback by id.
|
|
leaked = await repo.get(b_feedback["feedback_id"])
|
|
assert leaked is None, "User A leaked User B's feedback"
|
|
|
|
# list_by_run for B's run must be empty.
|
|
empty = await repo.list_by_run("t-beta", "run-b1")
|
|
assert empty == []
|
|
|
|
# User B must see only B's feedback.
|
|
with _as_user(USER_B):
|
|
leaked = await repo.get(a_feedback["feedback_id"])
|
|
assert leaked is None, "User B leaked User A's feedback"
|
|
|
|
b_list = await repo.list_by_run("t-beta", "run-b1")
|
|
assert len(b_list) == 1
|
|
assert b_list[0]["comment"] == "B disliked this"
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_feedback_cross_user_delete_denied(tmp_path):
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.feedback import FeedbackRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = FeedbackRepository(get_session_factory())
|
|
|
|
with _as_user(USER_A):
|
|
fb = await repo.create(run_id="run-a1", thread_id="t-alpha", rating=1)
|
|
|
|
# User B tries to delete A's feedback — must return False (no-op).
|
|
with _as_user(USER_B):
|
|
deleted = await repo.delete(fb["feedback_id"])
|
|
assert deleted is False, "User B deleted User A's feedback"
|
|
|
|
# A's feedback still retrievable.
|
|
with _as_user(USER_A):
|
|
row = await repo.get(fb["feedback_id"])
|
|
assert row is not None
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
# ── Regression: AUTO sentinel without contextvar must raise ───────────────
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_repository_without_context_raises(tmp_path):
|
|
"""Defense-in-depth: calling repo methods without a user context errors."""
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.thread_meta import ThreadMetaRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = ThreadMetaRepository(get_session_factory())
|
|
# Contextvar is explicitly unset under @pytest.mark.no_auto_user.
|
|
with pytest.raises(RuntimeError, match="no user context is set"):
|
|
await repo.get("anything")
|
|
finally:
|
|
await cleanup()
|
|
|
|
|
|
# ── Escape hatch: explicit user_id=None bypasses filter (for migration) ──
|
|
|
|
|
|
@pytest.mark.anyio
|
|
@pytest.mark.no_auto_user
|
|
async def test_explicit_none_bypasses_filter(tmp_path):
|
|
"""Migration scripts pass user_id=None to see all rows regardless of owner."""
|
|
from deerflow.persistence.engine import get_session_factory
|
|
from deerflow.persistence.thread_meta import ThreadMetaRepository
|
|
|
|
cleanup = await _make_engines(tmp_path)
|
|
try:
|
|
repo = ThreadMetaRepository(get_session_factory())
|
|
|
|
# Seed data as two different users.
|
|
with _as_user(USER_A):
|
|
await repo.create("t-alpha")
|
|
with _as_user(USER_B):
|
|
await repo.create("t-beta")
|
|
|
|
# Migration-style read: no contextvar, explicit None bypass.
|
|
all_rows = await repo.search(user_id=None)
|
|
thread_ids = {r["thread_id"] for r in all_rows}
|
|
assert thread_ids == {"t-alpha", "t-beta"}
|
|
|
|
# Explicit get with None does not apply the filter either.
|
|
row_a = await repo.get("t-alpha", user_id=None)
|
|
assert row_a is not None
|
|
row_b = await repo.get("t-beta", user_id=None)
|
|
assert row_b is not None
|
|
finally:
|
|
await cleanup()
|