mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-02 06:48:21 +00:00
Squashes 25 PR commits onto current main. AppConfig becomes a pure value object with no ambient lookup. Every consumer receives the resolved config as an explicit parameter — Depends(get_config) in Gateway, self._app_config in DeerFlowClient, runtime.context.app_config in agent runs, AppConfig.from_file() at the LangGraph Server registration boundary. Phase 1 — frozen data + typed context - All config models (AppConfig, MemoryConfig, DatabaseConfig, …) become frozen=True; no sub-module globals. - AppConfig.from_file() is pure (no side-effect singleton loaders). - Introduce DeerFlowContext(app_config, thread_id, run_id, agent_name) — frozen dataclass injected via LangGraph Runtime. - Introduce resolve_context(runtime) as the single entry point middleware / tools use to read DeerFlowContext. Phase 2 — pure explicit parameter passing - Gateway: app.state.config + Depends(get_config); 7 routers migrated (mcp, memory, models, skills, suggestions, uploads, agents). - DeerFlowClient: __init__(config=...) captures config locally. - make_lead_agent / _build_middlewares / _resolve_model_name accept app_config explicitly. - RunContext.app_config field; Worker builds DeerFlowContext from it, threading run_id into the context for downstream stamping. - Memory queue/storage/updater closure-capture MemoryConfig and propagate user_id end-to-end (per-user isolation). - Sandbox/skills/community/factories/tools thread app_config. - resolve_context() rejects non-typed runtime.context. - Test suite migrated off AppConfig.current() monkey-patches. - AppConfig.current() classmethod deleted. Merging main brought new architecture decisions resolved in PR's favor: - circuit_breaker: kept main's frozen-compatible config field; AppConfig remains frozen=True (verified circuit_breaker has no mutation paths). - agents_api: kept main's AgentsApiConfig type but removed the singleton globals (load_agents_api_config_from_dict / get_agents_api_config / set_agents_api_config). 8 routes in agents.py now read via Depends(get_config). - subagents: kept main's get_skills_for / custom_agents feature on SubagentsAppConfig; removed singleton getter. registry.py now reads app_config.subagents directly. - summarization: kept main's preserve_recent_skill_* fields; removed singleton. - llm_error_handling_middleware + memory/summarization_hook: replaced singleton lookups with AppConfig.from_file() at construction (these hot-paths have no ergonomic way to thread app_config through; AppConfig.from_file is a pure load). - worker.py + thread_data_middleware.py: DeerFlowContext.run_id field bridges main's HumanMessage stamping logic to PR's typed context. Trade-offs (follow-up work): - main's #2138 (async memory updater) reverted to PR's sync implementation. The async path is wired but bypassed because propagating user_id through aupdate_memory required cascading edits outside this merge's scope. - tests/test_subagent_skills_config.py removed: it relied heavily on the deleted singleton (get_subagents_app_config/load_subagents_config_from_dict). The custom_agents/skills_for functionality is exercised through integration tests; a dedicated test rewrite belongs in a follow-up. Verification: backend test suite — 2560 passed, 4 skipped, 84 failures. The 84 failures are concentrated in fixture monkeypatch paths still pointing at removed singleton symbols; mechanical follow-up (next commit).
119 lines
4.6 KiB
Python
119 lines
4.6 KiB
Python
"""Global authentication middleware — fail-closed safety net.
|
|
|
|
Rejects unauthenticated requests to non-public paths with 401. When a
|
|
request passes the cookie check, resolves the JWT payload to a real
|
|
``User`` object and stamps it into both ``request.state.user`` and the
|
|
``deerflow.runtime.user_context`` contextvar so that repository-layer
|
|
owner filtering works automatically via the sentinel pattern.
|
|
|
|
Fine-grained permission checks remain in authz.py decorators.
|
|
"""
|
|
|
|
from collections.abc import Callable
|
|
|
|
from fastapi import HTTPException, Request, Response
|
|
from starlette.middleware.base import BaseHTTPMiddleware
|
|
from starlette.responses import JSONResponse
|
|
from starlette.types import ASGIApp
|
|
|
|
from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse
|
|
from app.gateway.authz import _ALL_PERMISSIONS, AuthContext
|
|
from deerflow.runtime.user_context import reset_current_user, set_current_user
|
|
|
|
# Paths that never require authentication.
|
|
_PUBLIC_PATH_PREFIXES: tuple[str, ...] = (
|
|
"/health",
|
|
"/docs",
|
|
"/redoc",
|
|
"/openapi.json",
|
|
)
|
|
|
|
# Exact auth paths that are public (login/register/status check).
|
|
# /api/v1/auth/me, /api/v1/auth/change-password etc. are NOT public.
|
|
_PUBLIC_EXACT_PATHS: frozenset[str] = frozenset(
|
|
{
|
|
"/api/v1/auth/login/local",
|
|
"/api/v1/auth/register",
|
|
"/api/v1/auth/logout",
|
|
"/api/v1/auth/setup-status",
|
|
"/api/v1/auth/initialize",
|
|
}
|
|
)
|
|
|
|
|
|
def _is_public(path: str) -> bool:
|
|
stripped = path.rstrip("/")
|
|
if stripped in _PUBLIC_EXACT_PATHS:
|
|
return True
|
|
return any(path.startswith(prefix) for prefix in _PUBLIC_PATH_PREFIXES)
|
|
|
|
|
|
class AuthMiddleware(BaseHTTPMiddleware):
|
|
"""Strict auth gate: reject requests without a valid session.
|
|
|
|
Two-stage check for non-public paths:
|
|
|
|
1. Cookie presence — return 401 NOT_AUTHENTICATED if missing
|
|
2. JWT validation via ``get_optional_user_from_request`` — return 401
|
|
TOKEN_INVALID if the token is absent, malformed, expired, or the
|
|
signed user does not exist / is stale
|
|
|
|
On success, stamps ``request.state.user`` and the
|
|
``deerflow.runtime.user_context`` contextvar so that repository-layer
|
|
owner filters work downstream without every route needing a
|
|
``@require_auth`` decorator. Routes that need per-resource
|
|
authorization (e.g. "user A cannot read user B's thread by guessing
|
|
the URL") should additionally use ``@require_permission(...,
|
|
owner_check=True)`` for explicit enforcement — but authentication
|
|
itself is fully handled here.
|
|
"""
|
|
|
|
def __init__(self, app: ASGIApp) -> None:
|
|
super().__init__(app)
|
|
|
|
async def dispatch(self, request: Request, call_next: Callable) -> Response:
|
|
if _is_public(request.url.path):
|
|
return await call_next(request)
|
|
|
|
# Non-public path: require session cookie
|
|
if not request.cookies.get("access_token"):
|
|
return JSONResponse(
|
|
status_code=401,
|
|
content={
|
|
"detail": AuthErrorResponse(
|
|
code=AuthErrorCode.NOT_AUTHENTICATED,
|
|
message="Authentication required",
|
|
).model_dump()
|
|
},
|
|
)
|
|
|
|
# Strict JWT validation: reject junk/expired tokens with 401
|
|
# right here instead of silently passing through. This closes
|
|
# the "junk cookie bypass" gap (AUTH_TEST_PLAN test 7.5.8):
|
|
# without this, non-isolation routes like /api/models would
|
|
# accept any cookie-shaped string as authentication.
|
|
#
|
|
# We call the *strict* resolver so that fine-grained error
|
|
# codes (token_expired, token_invalid, user_not_found, …)
|
|
# propagate from AuthErrorCode, not get flattened into one
|
|
# generic code. BaseHTTPMiddleware doesn't let HTTPException
|
|
# bubble up, so we catch and render it as JSONResponse here.
|
|
from app.gateway.deps import get_current_user_from_request
|
|
|
|
try:
|
|
user = await get_current_user_from_request(request)
|
|
except HTTPException as exc:
|
|
return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
|
|
|
|
# Stamp both request.state.user (for the contextvar pattern)
|
|
# and request.state.auth (so @require_permission's "auth is
|
|
# None" branch short-circuits instead of running the entire
|
|
# JWT-decode + DB-lookup pipeline a second time per request).
|
|
request.state.user = user
|
|
request.state.auth = AuthContext(user=user, permissions=_ALL_PERMISSIONS)
|
|
token = set_current_user(user)
|
|
try:
|
|
return await call_next(request)
|
|
finally:
|
|
reset_current_user(token)
|