diff --git a/.env.example b/.env.example
index a0d38f51c..93c76b1fa 100644
--- a/.env.example
+++ b/.env.example
@@ -34,5 +34,9 @@ INFOQUEST_API_KEY=your-infoquest-api-key
 
 # GitHub API Token
 # GITHUB_TOKEN=your-github-token
+
+# Database (only needed when config.yaml has database.backend: postgres)
+# DATABASE_URL=postgresql://deerflow:password@localhost:5432/deerflow
+#
 # WECOM_BOT_ID=your-wecom-bot-id
 # WECOM_BOT_SECRET=your-wecom-bot-secret
diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md
index 7a2242d7e..cc2fd5c98 100644
--- a/backend/CLAUDE.md
+++ b/backend/CLAUDE.md
@@ -130,7 +130,7 @@ from app.gateway.app import app
 from app.channels.service import start_channel_service
 
 # App → Harness (allowed)
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 
 # Harness → App (FORBIDDEN — enforced by test_harness_boundary.py)
 # from app.gateway.routers.uploads import ...  # ← will fail CI
@@ -158,7 +158,7 @@ from deerflow.config import get_app_config
 
 Lead-agent middlewares are assembled in strict append order across `packages/harness/deerflow/agents/middlewares/tool_error_handling_middleware.py` (`build_lead_runtime_middlewares`) and `packages/harness/deerflow/agents/lead_agent/agent.py` (`_build_middlewares`):
 
-1. **ThreadDataMiddleware** - Creates per-thread directories (`backend/.deer-flow/threads/{thread_id}/user-data/{workspace,uploads,outputs}`); Web UI thread deletion now follows LangGraph thread removal with Gateway cleanup of the local `.deer-flow/threads/{thread_id}` directory
+1. **ThreadDataMiddleware** - Creates per-thread directories under the user's isolation scope (`backend/.deer-flow/users/{user_id}/threads/{thread_id}/user-data/{workspace,uploads,outputs}`); resolves `user_id` via `get_effective_user_id()` (falls back to `"default"` in no-auth mode); Web UI thread deletion now follows LangGraph thread removal with Gateway cleanup of the local thread directory
 2. **UploadsMiddleware** - Tracks and injects newly uploaded files into conversation
 3. **SandboxMiddleware** - Acquires sandbox, stores `sandbox_id` in state
 4. **DanglingToolCallMiddleware** - Injects placeholder ToolMessages for AIMessage tool_calls that lack responses (e.g., due to user interruption), including raw provider tool-call payloads preserved only in `additional_kwargs["tool_calls"]`
@@ -185,7 +185,16 @@ Setup: Copy `config.example.yaml` to `config.yaml` in the **project root** direc
 
 **Config Versioning**: `config.example.yaml` has a `config_version` field. On startup, `AppConfig.from_file()` compares user version vs example version and emits a warning if outdated. Missing `config_version` = version 0. Run `make config-upgrade` to auto-merge missing fields. When changing the config schema, bump `config_version` in `config.example.yaml`.
 
-**Config Caching**: `get_app_config()` caches the parsed config, but automatically reloads it when the resolved config path changes or the file's mtime increases. This keeps Gateway and LangGraph reads aligned with `config.yaml` edits without requiring a manual process restart.
+**Config Lifecycle**: All config models are `frozen=True` (immutable after construction). `AppConfig.from_file()` is a pure function — no side effects, no process-global state. The resolved `AppConfig` is passed as an explicit parameter down every consumer lane:
+
+- **Gateway**: `app.state.config` populated in lifespan; routers receive it via `Depends(get_config)` from `app/gateway/deps.py`.
+- **Client**: `DeerFlowClient._app_config` captured in the constructor; every method reads `self._app_config`.
+- **Agent run**: wrapped in `DeerFlowContext(app_config=…)` and injected via LangGraph `Runtime[DeerFlowContext].context`. Middleware and tools read `runtime.context.app_config` directly or via `resolve_context(runtime)`.
+- **LangGraph Server bootstrap**: `make_lead_agent` (registered in `langgraph.json`) calls `AppConfig.from_file()` itself — the only place in production that loads from disk at agent-build time.
+
+To update config at runtime (Gateway API mutations for MCP/Skills), write the new file and call `AppConfig.from_file()` to build a fresh snapshot, then swap `app.state.config`. No mtime detection, no auto-reload, no ambient ContextVar lookup (`AppConfig.current()` has been removed).
+
+**DeerFlowContext**: Per-invocation typed context for the agent execution path, injected via LangGraph `Runtime[DeerFlowContext]`. Holds `app_config: AppConfig`, `thread_id: str`, `agent_name: str | None`. Gateway runtime and `DeerFlowClient` construct full `DeerFlowContext` at invoke time; the LangGraph Server boundary builds one inside `make_lead_agent`. Middleware and tools access context through `resolve_context(runtime)` which returns the typed `DeerFlowContext` — legacy dict/None shapes are rejected. Mutable runtime state (`sandbox_id`) flows through `ThreadState.sandbox`, not context.
 
 Configuration priority:
 1. Explicit `config_path` argument
@@ -222,6 +231,9 @@ FastAPI application on port 8001 with health check at `GET /health`.
 | **Threads** (`/api/threads/{id}`) | `DELETE /` - remove DeerFlow-managed local thread data after LangGraph thread deletion; unexpected failures are logged server-side and return a generic 500 detail |
 | **Artifacts** (`/api/threads/{id}/artifacts`) | `GET /{path}` - serve artifacts; active content types (`text/html`, `application/xhtml+xml`, `image/svg+xml`) are always forced as download attachments to reduce XSS risk; `?download=true` still forces download for other file types |
 | **Suggestions** (`/api/threads/{id}/suggestions`) | `POST /` - generate follow-up questions; rich list/block model content is normalized before JSON parsing |
+| **Thread Runs** (`/api/threads/{id}/runs`) | `POST /` - create background run; `POST /stream` - create + SSE stream; `POST /wait` - create + block; `GET /` - list runs; `GET /{rid}` - run details; `POST /{rid}/cancel` - cancel; `GET /{rid}/join` - join SSE; `GET /{rid}/messages` - paginated messages `{data, has_more}`; `GET /{rid}/events` - full event stream; `GET /../messages` - thread messages with feedback; `GET /../token-usage` - aggregate tokens |
+| **Feedback** (`/api/threads/{id}/runs/{rid}/feedback`) | `PUT /` - upsert feedback; `DELETE /` - delete user feedback; `POST /` - create feedback; `GET /` - list feedback; `GET /stats` - aggregate stats; `DELETE /{fid}` - delete specific |
+| **Runs** (`/api/runs`) | `POST /stream` - stateless run + SSE; `POST /wait` - stateless run + block; `GET /{rid}/messages` - paginated messages by run_id `{data, has_more}` (cursor: `after_seq`/`before_seq`); `GET /{rid}/feedback` - list feedback by run_id |
 
 Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → Gateway.
 
@@ -235,7 +247,7 @@ Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` →
 
 **Virtual Path System**:
 - Agent sees: `/mnt/user-data/{workspace,uploads,outputs}`, `/mnt/skills`
-- Physical: `backend/.deer-flow/threads/{thread_id}/user-data/...`, `deer-flow/skills/`
+- Physical: `backend/.deer-flow/users/{user_id}/threads/{thread_id}/user-data/...`, `deer-flow/skills/`
 - Translation: `replace_virtual_path()` / `replace_virtual_paths_in_command()`
 - Detection: `is_local_sandbox()` checks `sandbox_id == "local"`
 
@@ -275,7 +287,7 @@ Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` →
 - `invoke_acp_agent` - Invokes external ACP-compatible agents from `config.yaml`
 - ACP launchers must be real ACP adapters. The standard `codex` CLI is not ACP-compatible by itself; configure a wrapper such as `npx -y @zed-industries/codex-acp` or an installed `codex-acp` binary
 - Missing ACP executables now return an actionable error message instead of a raw `[Errno 2]`
-- Each ACP agent uses a per-thread workspace at `{base_dir}/threads/{thread_id}/acp-workspace/`. The workspace is accessible to the lead agent via the virtual path `/mnt/acp-workspace/` (read-only). In docker sandbox mode, the directory is volume-mounted into the container at `/mnt/acp-workspace` (read-only); in local sandbox mode, path translation is handled by `tools.py`
+- Each ACP agent uses a per-thread workspace at `{base_dir}/users/{user_id}/threads/{thread_id}/acp-workspace/`. The workspace is accessible to the lead agent via the virtual path `/mnt/acp-workspace/` (read-only). In docker sandbox mode, the directory is volume-mounted into the container at `/mnt/acp-workspace` (read-only); in local sandbox mode, path translation is handled by `tools.py`
 - `image_search/` - Image search via DuckDuckGo
 
 ### MCP System (`packages/harness/deerflow/mcp/`)
@@ -344,18 +356,27 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a
 
 **Components**:
 - `updater.py` - LLM-based memory updates with fact extraction, whitespace-normalized fact deduplication (trims leading/trailing whitespace before comparing), and atomic file I/O
-- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time)
+- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time); captures `user_id` at enqueue time so it survives the `threading.Timer` boundary
 - `prompt.py` - Prompt templates for memory updates
+- `storage.py` - File-based storage with per-user isolation; cache keyed by `(user_id, agent_name)` tuple
 
-**Data Structure** (stored in `backend/.deer-flow/memory.json`):
+**Per-User Isolation**:
+- Memory is stored per-user at `{base_dir}/users/{user_id}/memory.json`
+- Per-agent per-user memory at `{base_dir}/users/{user_id}/agents/{agent_name}/memory.json`
+- `user_id` is resolved via `get_effective_user_id()` from `deerflow.runtime.user_context`
+- In no-auth mode, `user_id` defaults to `"default"` (constant `DEFAULT_USER_ID`)
+- Absolute `storage_path` in config opts out of per-user isolation
+- **Migration**: Run `PYTHONPATH=. python scripts/migrate_user_isolation.py` to move legacy `memory.json` and `threads/` into per-user layout; supports `--dry-run`
+
+**Data Structure** (stored in `{base_dir}/users/{user_id}/memory.json`):
 - **User Context**: `workContext`, `personalContext`, `topOfMind` (1-3 sentence summaries)
 - **History**: `recentMonths`, `earlierContext`, `longTermBackground`
 - **Facts**: Discrete facts with `id`, `content`, `category` (preference/knowledge/context/behavior/goal), `confidence` (0-1), `createdAt`, `source`
 
 **Workflow**:
-1. `MemoryMiddleware` filters messages (user inputs + final AI responses) and queues conversation
+1. `MemoryMiddleware` filters messages (user inputs + final AI responses), captures `user_id` via `get_effective_user_id()`, and queues conversation with the captured `user_id`
 2. Queue debounces (30s default), batches updates, deduplicates per-thread
-3. Background thread invokes LLM to extract context updates and facts
+3. Background thread invokes LLM to extract context updates and facts, using the stored `user_id` (not the contextvar, which is unavailable on timer threads)
 4. Applies updates atomically (temp file + rename) with cache invalidation, skipping duplicate fact content before append
 5. Next interaction injects top 15 facts + context into `<memory>` tags in system prompt
 
@@ -363,7 +384,7 @@ Focused regression coverage for the updater lives in `backend/tests/test_memory_
 
 **Configuration** (`config.yaml` → `memory`):
 - `enabled` / `injection_enabled` - Master switches
-- `storage_path` - Path to memory.json
+- `storage_path` - Path to memory.json (absolute path opts out of per-user isolation)
 - `debounce_seconds` - Wait time before processing (default: 30)
 - `model_name` - LLM for updates (null = default model)
 - `max_facts` / `fact_confidence_threshold` - Fact storage limits (100 / 0.7)
diff --git a/backend/Dockerfile b/backend/Dockerfile
index c0f59d2f1..c046268d3 100644
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@@ -13,6 +13,9 @@ FROM python:3.12-slim-bookworm AS builder
 ARG NODE_MAJOR=22
 ARG APT_MIRROR
 ARG UV_INDEX_URL
+# Optional extras to install (e.g. "postgres" for PostgreSQL support)
+# Usage: docker build --build-arg UV_EXTRAS=postgres ...
+ARG UV_EXTRAS
 
 # Optionally override apt mirror for restricted networks (e.g. APT_MIRROR=mirrors.aliyun.com)
 RUN if [ -n "${APT_MIRROR}" ]; then \
@@ -43,8 +46,9 @@ WORKDIR /app
 COPY backend ./backend
 
 # Install dependencies with cache mount
+# When UV_EXTRAS is set (e.g. "postgres"), installs optional dependencies.
 RUN --mount=type=cache,target=/root/.cache/uv \
-    sh -c "cd backend && UV_INDEX_URL=${UV_INDEX_URL:-https://pypi.org/simple} uv sync"
+    sh -c "cd backend && UV_INDEX_URL=${UV_INDEX_URL:-https://pypi.org/simple} uv sync ${UV_EXTRAS:+--extra $UV_EXTRAS}"
 
 # ── Stage 2: Dev ──────────────────────────────────────────────────────────────
 # Retains compiler toolchain from builder so startup-time `uv sync` can build
diff --git a/backend/app/channels/feishu.py b/backend/app/channels/feishu.py
index c2a637ff9..10d77d729 100644
--- a/backend/app/channels/feishu.py
+++ b/backend/app/channels/feishu.py
@@ -13,6 +13,7 @@ from app.channels.base import Channel
 from app.channels.commands import KNOWN_CHANNEL_COMMANDS
 from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.sandbox.sandbox_provider import get_sandbox_provider
 
 logger = logging.getLogger(__name__)
@@ -344,8 +345,9 @@ class FeishuChannel(Channel):
             return f"Failed to obtain the [{type}]"
 
         paths = get_paths()
-        paths.ensure_thread_dirs(thread_id)
-        uploads_dir = paths.sandbox_uploads_dir(thread_id).resolve()
+        user_id = get_effective_user_id()
+        paths.ensure_thread_dirs(thread_id, user_id=user_id)
+        uploads_dir = paths.sandbox_uploads_dir(thread_id, user_id=user_id).resolve()
 
         ext = "png" if type == "image" else "bin"
         raw_filename = getattr(response, "file_name", "") or f"feishu_{file_key[-12:]}.{ext}"
@@ -373,7 +375,9 @@ class FeishuChannel(Channel):
         virtual_path = f"{VIRTUAL_PATH_PREFIX}/uploads/{resolved_target.name}"
 
         try:
-            sandbox_provider = get_sandbox_provider()
+            from deerflow.config.app_config import AppConfig
+
+            sandbox_provider = get_sandbox_provider(AppConfig.from_file())
             sandbox_id = sandbox_provider.acquire(thread_id)
             if sandbox_id != "local":
                 sandbox = sandbox_provider.get(sandbox_id)
diff --git a/backend/app/channels/manager.py b/backend/app/channels/manager.py
index 286635d3f..778c8c860 100644
--- a/backend/app/channels/manager.py
+++ b/backend/app/channels/manager.py
@@ -17,6 +17,7 @@ from langgraph_sdk.errors import ConflictError
 from app.channels.commands import KNOWN_CHANNEL_COMMANDS
 from app.channels.message_bus import InboundMessage, InboundMessageType, MessageBus, OutboundMessage, ResolvedAttachment
 from app.channels.store import ChannelStore
+from deerflow.runtime.user_context import get_effective_user_id
 
 logger = logging.getLogger(__name__)
 
@@ -342,14 +343,15 @@ def _resolve_attachments(thread_id: str, artifacts: list[str]) -> list[ResolvedA
 
     attachments: list[ResolvedAttachment] = []
     paths = get_paths()
-    outputs_dir = paths.sandbox_outputs_dir(thread_id).resolve()
+    user_id = get_effective_user_id()
+    outputs_dir = paths.sandbox_outputs_dir(thread_id, user_id=user_id).resolve()
     for virtual_path in artifacts:
         # Security: only allow files from the agent outputs directory
         if not virtual_path.startswith(_OUTPUTS_VIRTUAL_PREFIX):
             logger.warning("[Manager] rejected non-outputs artifact path: %s", virtual_path)
             continue
         try:
-            actual = paths.resolve_virtual_path(thread_id, virtual_path)
+            actual = paths.resolve_virtual_path(thread_id, virtual_path, user_id=user_id)
             # Verify the resolved path is actually under the outputs directory
             # (guards against path-traversal even after prefix check)
             try:
diff --git a/backend/app/channels/service.py b/backend/app/channels/service.py
index 8042733c2..0b84bbc15 100644
--- a/backend/app/channels/service.py
+++ b/backend/app/channels/service.py
@@ -4,13 +4,16 @@ from __future__ import annotations
 
 import logging
 import os
-from typing import Any
+from typing import TYPE_CHECKING, Any
 
 from app.channels.base import Channel
 from app.channels.manager import DEFAULT_GATEWAY_URL, DEFAULT_LANGGRAPH_URL, ChannelManager
 from app.channels.message_bus import MessageBus
 from app.channels.store import ChannelStore
 
+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+
 logger = logging.getLogger(__name__)
 
 # Channel name → import path for lazy loading
@@ -75,14 +78,11 @@ class ChannelService:
         self._running = False
 
     @classmethod
-    def from_app_config(cls) -> ChannelService:
-        """Create a ChannelService from the application config."""
-        from deerflow.config.app_config import get_app_config
-
-        config = get_app_config()
+    def from_app_config(cls, app_config: AppConfig) -> ChannelService:
+        """Create a ChannelService from an explicit application config."""
         channels_config = {}
         # extra fields are allowed by AppConfig (extra="allow")
-        extra = config.model_extra or {}
+        extra = app_config.model_extra or {}
         if "channels" in extra:
             channels_config = extra["channels"]
         return cls(channels_config=channels_config)
@@ -201,12 +201,12 @@ def get_channel_service() -> ChannelService | None:
     return _channel_service
 
 
-async def start_channel_service() -> ChannelService:
+async def start_channel_service(app_config: AppConfig) -> ChannelService:
     """Create and start the global ChannelService from app config."""
     global _channel_service
     if _channel_service is not None:
         return _channel_service
-    _channel_service = ChannelService.from_app_config()
+    _channel_service = ChannelService.from_app_config(app_config)
     await _channel_service.start()
     return _channel_service
 
diff --git a/backend/app/gateway/app.py b/backend/app/gateway/app.py
index 92f50b324..b960b4729 100644
--- a/backend/app/gateway/app.py
+++ b/backend/app/gateway/app.py
@@ -1,17 +1,23 @@
 import asyncio
 import logging
+import os
 from collections.abc import AsyncGenerator
 from contextlib import asynccontextmanager
 
 from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
 
+from app.gateway.auth_middleware import AuthMiddleware
 from app.gateway.config import get_gateway_config
+from app.gateway.csrf_middleware import CSRFMiddleware
 from app.gateway.deps import langgraph_runtime
 from app.gateway.routers import (
     agents,
     artifacts,
     assistants_compat,
+    auth,
     channels,
+    feedback,
     mcp,
     memory,
     models,
@@ -22,7 +28,7 @@ from app.gateway.routers import (
     threads,
     uploads,
 )
-from deerflow.config.app_config import get_app_config
+from deerflow.config.app_config import AppConfig
 
 # Configure logging
 logging.basicConfig(
@@ -39,13 +45,117 @@ logger = logging.getLogger(__name__)
 _SHUTDOWN_HOOK_TIMEOUT_SECONDS = 5.0
 
 
+async def _ensure_admin_user(app: FastAPI) -> None:
+    """Startup hook: handle first boot and migrate orphan threads otherwise.
+
+    After admin creation, migrate orphan threads from the LangGraph
+    store (metadata.user_id unset) to the admin account. This is the
+    "no-auth → with-auth" upgrade path: users who ran DeerFlow without
+    authentication have existing LangGraph thread data that needs an
+    owner assigned.
+        First boot (no admin exists):
+            - Does NOT create any user accounts automatically.
+            - The operator must visit ``/setup`` to create the first admin.
+
+    Subsequent boots (admin already exists):
+      - Runs the one-time "no-auth → with-auth" orphan thread migration for
+        existing LangGraph thread metadata that has no owner_id.
+
+    No SQL persistence migration is needed: the four user_id columns
+    (threads_meta, runs, run_events, feedback) only come into existence
+    alongside the auth module via create_all, so freshly created tables
+    never contain NULL-owner rows.
+    """
+    from sqlalchemy import select
+
+    from app.gateway.deps import get_local_provider
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.user.model import UserRow
+
+    provider = get_local_provider()
+    admin_count = await provider.count_admin_users()
+
+    if admin_count == 0:
+        logger.info("=" * 60)
+        logger.info("  First boot detected — no admin account exists.")
+        logger.info("  Visit /setup to complete admin account creation.")
+        logger.info("=" * 60)
+        return
+
+    # Admin already exists — run orphan thread migration for any
+    # LangGraph thread metadata that pre-dates the auth module.
+    sf = get_session_factory()
+    if sf is None:
+        return
+
+    async with sf() as session:
+        stmt = select(UserRow).where(UserRow.system_role == "admin").limit(1)
+        row = (await session.execute(stmt)).scalar_one_or_none()
+
+    if row is None:
+        return  # Should not happen (admin_count > 0 above), but be safe.
+
+    admin_id = str(row.id)
+
+    # LangGraph store orphan migration — non-fatal.
+    # This covers the "no-auth → with-auth" upgrade path for users
+    # whose existing LangGraph thread metadata has no user_id set.
+    store = getattr(app.state, "store", None)
+    if store is not None:
+        try:
+            migrated = await _migrate_orphaned_threads(store, admin_id)
+            if migrated:
+                logger.info("Migrated %d orphan LangGraph thread(s) to admin", migrated)
+        except Exception:
+            logger.exception("LangGraph thread migration failed (non-fatal)")
+
+
+async def _iter_store_items(store, namespace, *, page_size: int = 500):
+    """Paginated async iterator over a LangGraph store namespace.
+
+    Replaces the old hardcoded ``limit=1000`` call with a cursor-style
+    loop so that environments with more than one page of orphans do
+    not silently lose data. Terminates when a page is empty OR when a
+    short page arrives (indicating the last page).
+    """
+    offset = 0
+    while True:
+        batch = await store.asearch(namespace, limit=page_size, offset=offset)
+        if not batch:
+            return
+        for item in batch:
+            yield item
+        if len(batch) < page_size:
+            return
+        offset += page_size
+
+
+async def _migrate_orphaned_threads(store, admin_user_id: str) -> int:
+    """Migrate LangGraph store threads with no user_id to the given admin.
+
+    Uses cursor pagination so all orphans are migrated regardless of
+    count. Returns the number of rows migrated.
+    """
+    migrated = 0
+    async for item in _iter_store_items(store, ("threads",)):
+        metadata = item.value.get("metadata", {})
+        if not metadata.get("user_id"):
+            metadata["user_id"] = admin_user_id
+            item.value["metadata"] = metadata
+            await store.aput(("threads",), item.key, item.value)
+            migrated += 1
+    return migrated
+
+
 @asynccontextmanager
 async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     """Application lifespan handler."""
 
-    # Load config and check necessary environment variables at startup
     try:
-        get_app_config()
+        # ``app.state.config`` is the sole source of truth for
+        # ``Depends(get_config)``. Consumers that want AppConfig must receive
+        # it as an explicit parameter; there is no ambient singleton.
+        app.state.config = AppConfig.from_file()
         logger.info("Configuration loaded successfully")
     except Exception as e:
         error_msg = f"Failed to load configuration during gateway startup: {e}"
@@ -58,11 +168,15 @@ async def lifespan(app: FastAPI) -> AsyncGenerator[None, None]:
     async with langgraph_runtime(app):
         logger.info("LangGraph runtime initialised")
 
+        # Ensure admin user exists (auto-create on first boot)
+        # Must run AFTER langgraph_runtime so app.state.store is available for thread migration
+        await _ensure_admin_user(app)
+
         # Start IM channel service if any channels are configured
         try:
             from app.channels.service import start_channel_service
 
-            channel_service = await start_channel_service()
+            channel_service = await start_channel_service(app.state.config)
             logger.info("Channel service started: %s", channel_service.get_status())
         except Exception:
             logger.exception("No IM channels configured or channel service failed to start")
@@ -177,7 +291,31 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an
         ],
     )
 
-    # CORS is handled by nginx - no need for FastAPI middleware
+    # Auth: reject unauthenticated requests to non-public paths (fail-closed safety net)
+    app.add_middleware(AuthMiddleware)
+
+    # CSRF: Double Submit Cookie pattern for state-changing requests
+    app.add_middleware(CSRFMiddleware)
+
+    # CORS: when GATEWAY_CORS_ORIGINS is set (dev without nginx), add CORS middleware.
+    # In production, nginx handles CORS and no middleware is needed.
+    cors_origins_env = os.environ.get("GATEWAY_CORS_ORIGINS", "")
+    if cors_origins_env:
+        cors_origins = [o.strip() for o in cors_origins_env.split(",") if o.strip()]
+        # Validate: wildcard origin with credentials is a security misconfiguration
+        for origin in cors_origins:
+            if origin == "*":
+                logger.error("GATEWAY_CORS_ORIGINS contains wildcard '*' with allow_credentials=True. This is a security misconfiguration — browsers will reject the response. Use explicit scheme://host:port origins instead.")
+                cors_origins = [o for o in cors_origins if o != "*"]
+                break
+        if cors_origins:
+            app.add_middleware(
+                CORSMiddleware,
+                allow_origins=cors_origins,
+                allow_credentials=True,
+                allow_methods=["*"],
+                allow_headers=["*"],
+            )
 
     # Include routers
     # Models API is mounted at /api/models
@@ -213,6 +351,12 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an
     # Assistants compatibility API (LangGraph Platform stub)
     app.include_router(assistants_compat.router)
 
+    # Auth API is mounted at /api/v1/auth
+    app.include_router(auth.router)
+
+    # Feedback API is mounted at /api/threads/{thread_id}/runs/{run_id}/feedback
+    app.include_router(feedback.router)
+
     # Thread Runs API (LangGraph Platform-compatible runs lifecycle)
     app.include_router(thread_runs.router)
 
diff --git a/backend/app/gateway/auth/__init__.py b/backend/app/gateway/auth/__init__.py
new file mode 100644
index 000000000..4e9b71c42
--- /dev/null
+++ b/backend/app/gateway/auth/__init__.py
@@ -0,0 +1,42 @@
+"""Authentication module for DeerFlow.
+
+This module provides:
+- JWT-based authentication
+- Provider Factory pattern for extensible auth methods
+- UserRepository interface for storage backends (SQLite)
+"""
+
+from app.gateway.auth.config import AuthConfig, get_auth_config, set_auth_config
+from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse, TokenError
+from app.gateway.auth.jwt import TokenPayload, create_access_token, decode_token
+from app.gateway.auth.local_provider import LocalAuthProvider
+from app.gateway.auth.models import User, UserResponse
+from app.gateway.auth.password import hash_password, verify_password
+from app.gateway.auth.providers import AuthProvider
+from app.gateway.auth.repositories.base import UserRepository
+
+__all__ = [
+    # Config
+    "AuthConfig",
+    "get_auth_config",
+    "set_auth_config",
+    # Errors
+    "AuthErrorCode",
+    "AuthErrorResponse",
+    "TokenError",
+    # JWT
+    "TokenPayload",
+    "create_access_token",
+    "decode_token",
+    # Password
+    "hash_password",
+    "verify_password",
+    # Models
+    "User",
+    "UserResponse",
+    # Providers
+    "AuthProvider",
+    "LocalAuthProvider",
+    # Repository
+    "UserRepository",
+]
diff --git a/backend/app/gateway/auth/config.py b/backend/app/gateway/auth/config.py
new file mode 100644
index 000000000..01f0870fd
--- /dev/null
+++ b/backend/app/gateway/auth/config.py
@@ -0,0 +1,57 @@
+"""Authentication configuration for DeerFlow."""
+
+import logging
+import os
+import secrets
+
+from dotenv import load_dotenv
+from pydantic import BaseModel, Field
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+
+
+class AuthConfig(BaseModel):
+    """JWT and auth-related configuration. Parsed once at startup.
+
+    Note: the ``users`` table now lives in the shared persistence
+    database managed by ``deerflow.persistence.engine``. The old
+    ``users_db_path`` config key has been removed — user storage is
+    configured through ``config.database`` like every other table.
+    """
+
+    jwt_secret: str = Field(
+        ...,
+        description="Secret key for JWT signing. MUST be set via AUTH_JWT_SECRET.",
+    )
+    token_expiry_days: int = Field(default=7, ge=1, le=30)
+    oauth_github_client_id: str | None = Field(default=None)
+    oauth_github_client_secret: str | None = Field(default=None)
+
+
+_auth_config: AuthConfig | None = None
+
+
+def get_auth_config() -> AuthConfig:
+    """Get the global AuthConfig instance. Parses from env on first call."""
+    global _auth_config
+    if _auth_config is None:
+        jwt_secret = os.environ.get("AUTH_JWT_SECRET")
+        if not jwt_secret:
+            jwt_secret = secrets.token_urlsafe(32)
+            os.environ["AUTH_JWT_SECRET"] = jwt_secret
+            logger.warning(
+                "⚠ AUTH_JWT_SECRET is not set — using an auto-generated ephemeral secret. "
+                "Sessions will be invalidated on restart. "
+                "For production, add AUTH_JWT_SECRET to your .env file: "
+                'python -c "import secrets; print(secrets.token_urlsafe(32))"'
+            )
+        _auth_config = AuthConfig(jwt_secret=jwt_secret)
+    return _auth_config
+
+
+def set_auth_config(config: AuthConfig) -> None:
+    """Set the global AuthConfig instance (for testing)."""
+    global _auth_config
+    _auth_config = config
diff --git a/backend/app/gateway/auth/credential_file.py b/backend/app/gateway/auth/credential_file.py
new file mode 100644
index 000000000..100ca3b04
--- /dev/null
+++ b/backend/app/gateway/auth/credential_file.py
@@ -0,0 +1,48 @@
+"""Write initial admin credentials to a restricted file instead of logs.
+
+Logging secrets to stdout/stderr is a well-known CodeQL finding
+(py/clear-text-logging-sensitive-data) — in production those logs
+get collected into ELK/Splunk/etc and become a secret sprawl
+source. This helper writes the credential to a 0600 file that only
+the process user can read, and returns the path so the caller can
+log **the path** (not the password) for the operator to pick up.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+from deerflow.config.paths import get_paths
+
+_CREDENTIAL_FILENAME = "admin_initial_credentials.txt"
+
+
+def write_initial_credentials(email: str, password: str, *, label: str = "initial") -> Path:
+    """Write the admin email + password to ``{base_dir}/admin_initial_credentials.txt``.
+
+    The file is created **atomically** with mode 0600 via ``os.open``
+    so the password is never world-readable, even for the single syscall
+    window between ``write_text`` and ``chmod``.
+
+    ``label`` distinguishes "initial" (fresh creation) from "reset"
+    (password reset) in the file header so an operator picking up the
+    file after a restart can tell which event produced it.
+
+    Returns the absolute :class:`Path` to the file.
+    """
+    target = get_paths().base_dir / _CREDENTIAL_FILENAME
+    target.parent.mkdir(parents=True, exist_ok=True)
+
+    content = (
+        f"# DeerFlow admin {label} credentials\n# This file is generated on first boot or password reset.\n# Change the password after login via Settings -> Account,\n# then delete this file.\n#\nemail: {email}\npassword: {password}\n"
+    )
+
+    # Atomic 0600 create-or-truncate. O_TRUNC (not O_EXCL) so the
+    # reset-password path can rewrite an existing file without a
+    # separate unlink-then-create dance.
+    fd = os.open(target, os.O_WRONLY | os.O_CREAT | os.O_TRUNC, 0o600)
+    with os.fdopen(fd, "w", encoding="utf-8") as fh:
+        fh.write(content)
+
+    return target.resolve()
diff --git a/backend/app/gateway/auth/errors.py b/backend/app/gateway/auth/errors.py
new file mode 100644
index 000000000..b5899ebd8
--- /dev/null
+++ b/backend/app/gateway/auth/errors.py
@@ -0,0 +1,45 @@
+"""Typed error definitions for auth module.
+
+AuthErrorCode: exhaustive enum of all auth failure conditions.
+TokenError: exhaustive enum of JWT decode failures.
+AuthErrorResponse: structured error payload for HTTP responses.
+"""
+
+from enum import StrEnum
+
+from pydantic import BaseModel
+
+
+class AuthErrorCode(StrEnum):
+    """Exhaustive list of auth error conditions."""
+
+    INVALID_CREDENTIALS = "invalid_credentials"
+    TOKEN_EXPIRED = "token_expired"
+    TOKEN_INVALID = "token_invalid"
+    USER_NOT_FOUND = "user_not_found"
+    EMAIL_ALREADY_EXISTS = "email_already_exists"
+    PROVIDER_NOT_FOUND = "provider_not_found"
+    NOT_AUTHENTICATED = "not_authenticated"
+    SYSTEM_ALREADY_INITIALIZED = "system_already_initialized"
+
+
+class TokenError(StrEnum):
+    """Exhaustive list of JWT decode failure reasons."""
+
+    EXPIRED = "expired"
+    INVALID_SIGNATURE = "invalid_signature"
+    MALFORMED = "malformed"
+
+
+class AuthErrorResponse(BaseModel):
+    """Structured error response — replaces bare `detail` strings."""
+
+    code: AuthErrorCode
+    message: str
+
+
+def token_error_to_code(err: TokenError) -> AuthErrorCode:
+    """Map TokenError to AuthErrorCode — single source of truth."""
+    if err == TokenError.EXPIRED:
+        return AuthErrorCode.TOKEN_EXPIRED
+    return AuthErrorCode.TOKEN_INVALID
diff --git a/backend/app/gateway/auth/jwt.py b/backend/app/gateway/auth/jwt.py
new file mode 100644
index 000000000..3853692b7
--- /dev/null
+++ b/backend/app/gateway/auth/jwt.py
@@ -0,0 +1,55 @@
+"""JWT token creation and verification."""
+
+from datetime import UTC, datetime, timedelta
+
+import jwt
+from pydantic import BaseModel
+
+from app.gateway.auth.config import get_auth_config
+from app.gateway.auth.errors import TokenError
+
+
+class TokenPayload(BaseModel):
+    """JWT token payload."""
+
+    sub: str  # user_id
+    exp: datetime
+    iat: datetime | None = None
+    ver: int = 0  # token_version — must match User.token_version
+
+
+def create_access_token(user_id: str, expires_delta: timedelta | None = None, token_version: int = 0) -> str:
+    """Create a JWT access token.
+
+    Args:
+        user_id: The user's UUID as string
+        expires_delta: Optional custom expiry, defaults to 7 days
+        token_version: User's current token_version for invalidation
+
+    Returns:
+        Encoded JWT string
+    """
+    config = get_auth_config()
+    expiry = expires_delta or timedelta(days=config.token_expiry_days)
+
+    now = datetime.now(UTC)
+    payload = {"sub": user_id, "exp": now + expiry, "iat": now, "ver": token_version}
+    return jwt.encode(payload, config.jwt_secret, algorithm="HS256")
+
+
+def decode_token(token: str) -> TokenPayload | TokenError:
+    """Decode and validate a JWT token.
+
+    Returns:
+        TokenPayload if valid, or a specific TokenError variant.
+    """
+    config = get_auth_config()
+    try:
+        payload = jwt.decode(token, config.jwt_secret, algorithms=["HS256"])
+        return TokenPayload(**payload)
+    except jwt.ExpiredSignatureError:
+        return TokenError.EXPIRED
+    except jwt.InvalidSignatureError:
+        return TokenError.INVALID_SIGNATURE
+    except jwt.PyJWTError:
+        return TokenError.MALFORMED
diff --git a/backend/app/gateway/auth/local_provider.py b/backend/app/gateway/auth/local_provider.py
new file mode 100644
index 000000000..8bfd15e59
--- /dev/null
+++ b/backend/app/gateway/auth/local_provider.py
@@ -0,0 +1,91 @@
+"""Local email/password authentication provider."""
+
+from app.gateway.auth.models import User
+from app.gateway.auth.password import hash_password_async, verify_password_async
+from app.gateway.auth.providers import AuthProvider
+from app.gateway.auth.repositories.base import UserRepository
+
+
+class LocalAuthProvider(AuthProvider):
+    """Email/password authentication provider using local database."""
+
+    def __init__(self, repository: UserRepository):
+        """Initialize with a UserRepository.
+
+        Args:
+            repository: UserRepository implementation (SQLite)
+        """
+        self._repo = repository
+
+    async def authenticate(self, credentials: dict) -> User | None:
+        """Authenticate with email and password.
+
+        Args:
+            credentials: dict with 'email' and 'password' keys
+
+        Returns:
+            User if authentication succeeds, None otherwise
+        """
+        email = credentials.get("email")
+        password = credentials.get("password")
+
+        if not email or not password:
+            return None
+
+        user = await self._repo.get_user_by_email(email)
+        if user is None:
+            return None
+
+        if user.password_hash is None:
+            # OAuth user without local password
+            return None
+
+        if not await verify_password_async(password, user.password_hash):
+            return None
+
+        return user
+
+    async def get_user(self, user_id: str) -> User | None:
+        """Get user by ID."""
+        return await self._repo.get_user_by_id(user_id)
+
+    async def create_user(self, email: str, password: str | None = None, system_role: str = "user", needs_setup: bool = False) -> User:
+        """Create a new local user.
+
+        Args:
+            email: User email address
+            password: Plain text password (will be hashed)
+            system_role: Role to assign ("admin" or "user")
+            needs_setup: If True, user must complete setup on first login
+
+        Returns:
+            Created User instance
+        """
+        password_hash = await hash_password_async(password) if password else None
+        user = User(
+            email=email,
+            password_hash=password_hash,
+            system_role=system_role,
+            needs_setup=needs_setup,
+        )
+        return await self._repo.create_user(user)
+
+    async def get_user_by_oauth(self, provider: str, oauth_id: str) -> User | None:
+        """Get user by OAuth provider and ID."""
+        return await self._repo.get_user_by_oauth(provider, oauth_id)
+
+    async def count_users(self) -> int:
+        """Return total number of registered users."""
+        return await self._repo.count_users()
+
+    async def count_admin_users(self) -> int:
+        """Return number of admin users."""
+        return await self._repo.count_admin_users()
+
+    async def update_user(self, user: User) -> User:
+        """Update an existing user."""
+        return await self._repo.update_user(user)
+
+    async def get_user_by_email(self, email: str) -> User | None:
+        """Get user by email."""
+        return await self._repo.get_user_by_email(email)
diff --git a/backend/app/gateway/auth/models.py b/backend/app/gateway/auth/models.py
new file mode 100644
index 000000000..d8f9b954a
--- /dev/null
+++ b/backend/app/gateway/auth/models.py
@@ -0,0 +1,41 @@
+"""User Pydantic models for authentication."""
+
+from datetime import UTC, datetime
+from typing import Literal
+from uuid import UUID, uuid4
+
+from pydantic import BaseModel, ConfigDict, EmailStr, Field
+
+
+def _utc_now() -> datetime:
+    """Return current UTC time (timezone-aware)."""
+    return datetime.now(UTC)
+
+
+class User(BaseModel):
+    """Internal user representation."""
+
+    model_config = ConfigDict(from_attributes=True)
+
+    id: UUID = Field(default_factory=uuid4, description="Primary key")
+    email: EmailStr = Field(..., description="Unique email address")
+    password_hash: str | None = Field(None, description="bcrypt hash, nullable for OAuth users")
+    system_role: Literal["admin", "user"] = Field(default="user")
+    created_at: datetime = Field(default_factory=_utc_now)
+
+    # OAuth linkage (optional)
+    oauth_provider: str | None = Field(None, description="e.g. 'github', 'google'")
+    oauth_id: str | None = Field(None, description="User ID from OAuth provider")
+
+    # Auth lifecycle
+    needs_setup: bool = Field(default=False, description="True for auto-created admin until setup completes")
+    token_version: int = Field(default=0, description="Incremented on password change to invalidate old JWTs")
+
+
+class UserResponse(BaseModel):
+    """Response model for user info endpoint."""
+
+    id: str
+    email: str
+    system_role: Literal["admin", "user"]
+    needs_setup: bool = False
diff --git a/backend/app/gateway/auth/password.py b/backend/app/gateway/auth/password.py
new file mode 100644
index 000000000..588b7a643
--- /dev/null
+++ b/backend/app/gateway/auth/password.py
@@ -0,0 +1,33 @@
+"""Password hashing utilities using bcrypt directly."""
+
+import asyncio
+
+import bcrypt
+
+
+def hash_password(password: str) -> str:
+    """Hash a password using bcrypt."""
+    return bcrypt.hashpw(password.encode("utf-8"), bcrypt.gensalt()).decode("utf-8")
+
+
+def verify_password(plain_password: str, hashed_password: str) -> bool:
+    """Verify a password against its hash."""
+    return bcrypt.checkpw(plain_password.encode("utf-8"), hashed_password.encode("utf-8"))
+
+
+async def hash_password_async(password: str) -> str:
+    """Hash a password using bcrypt (non-blocking).
+
+    Wraps the blocking bcrypt operation in a thread pool to avoid
+    blocking the event loop during password hashing.
+    """
+    return await asyncio.to_thread(hash_password, password)
+
+
+async def verify_password_async(plain_password: str, hashed_password: str) -> bool:
+    """Verify a password against its hash (non-blocking).
+
+    Wraps the blocking bcrypt operation in a thread pool to avoid
+    blocking the event loop during password verification.
+    """
+    return await asyncio.to_thread(verify_password, plain_password, hashed_password)
diff --git a/backend/app/gateway/auth/providers.py b/backend/app/gateway/auth/providers.py
new file mode 100644
index 000000000..25e782ce3
--- /dev/null
+++ b/backend/app/gateway/auth/providers.py
@@ -0,0 +1,24 @@
+"""Auth provider abstraction."""
+
+from abc import ABC, abstractmethod
+
+
+class AuthProvider(ABC):
+    """Abstract base class for authentication providers."""
+
+    @abstractmethod
+    async def authenticate(self, credentials: dict) -> "User | None":
+        """Authenticate user with given credentials.
+
+        Returns User if authentication succeeds, None otherwise.
+        """
+        ...
+
+    @abstractmethod
+    async def get_user(self, user_id: str) -> "User | None":
+        """Retrieve user by ID."""
+        ...
+
+
+# Import User at runtime to avoid circular imports
+from app.gateway.auth.models import User  # noqa: E402
diff --git a/backend/app/gateway/auth/repositories/__init__.py b/backend/app/gateway/auth/repositories/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/app/gateway/auth/repositories/base.py b/backend/app/gateway/auth/repositories/base.py
new file mode 100644
index 000000000..d96753171
--- /dev/null
+++ b/backend/app/gateway/auth/repositories/base.py
@@ -0,0 +1,102 @@
+"""User repository interface for abstracting database operations."""
+
+from abc import ABC, abstractmethod
+
+from app.gateway.auth.models import User
+
+
+class UserNotFoundError(LookupError):
+    """Raised when a user repository operation targets a non-existent row.
+
+    Subclass of :class:`LookupError` so callers that already catch
+    ``LookupError`` for "missing entity" can keep working unchanged,
+    while specific call sites can pin to this class to distinguish
+    "concurrent delete during update" from other lookups.
+    """
+
+
+class UserRepository(ABC):
+    """Abstract interface for user data storage.
+
+    Implement this interface to support different storage backends
+    (SQLite)
+    """
+
+    @abstractmethod
+    async def create_user(self, user: User) -> User:
+        """Create a new user.
+
+        Args:
+            user: User object to create
+
+        Returns:
+            Created User with ID assigned
+
+        Raises:
+            ValueError: If email already exists
+        """
+        ...
+
+    @abstractmethod
+    async def get_user_by_id(self, user_id: str) -> User | None:
+        """Get user by ID.
+
+        Args:
+            user_id: User UUID as string
+
+        Returns:
+            User if found, None otherwise
+        """
+        ...
+
+    @abstractmethod
+    async def get_user_by_email(self, email: str) -> User | None:
+        """Get user by email.
+
+        Args:
+            email: User email address
+
+        Returns:
+            User if found, None otherwise
+        """
+        ...
+
+    @abstractmethod
+    async def update_user(self, user: User) -> User:
+        """Update an existing user.
+
+        Args:
+            user: User object with updated fields
+
+        Returns:
+            Updated User
+
+        Raises:
+            UserNotFoundError: If no row exists for ``user.id``. This is
+                a hard failure (not a no-op) so callers cannot mistake a
+                concurrent-delete race for a successful update.
+        """
+        ...
+
+    @abstractmethod
+    async def count_users(self) -> int:
+        """Return total number of registered users."""
+        ...
+
+    @abstractmethod
+    async def count_admin_users(self) -> int:
+        """Return number of users with system_role == 'admin'."""
+        ...
+
+    @abstractmethod
+    async def get_user_by_oauth(self, provider: str, oauth_id: str) -> User | None:
+        """Get user by OAuth provider and ID.
+
+        Args:
+            provider: OAuth provider name (e.g. 'github', 'google')
+            oauth_id: User ID from the OAuth provider
+
+        Returns:
+            User if found, None otherwise
+        """
+        ...
diff --git a/backend/app/gateway/auth/repositories/sqlite.py b/backend/app/gateway/auth/repositories/sqlite.py
new file mode 100644
index 000000000..3ee3978e3
--- /dev/null
+++ b/backend/app/gateway/auth/repositories/sqlite.py
@@ -0,0 +1,127 @@
+"""SQLAlchemy-backed UserRepository implementation.
+
+Uses the shared async session factory from
+``deerflow.persistence.engine`` — the ``users`` table lives in the
+same database as ``threads_meta``, ``runs``, ``run_events``, and
+``feedback``.
+
+Constructor takes the session factory directly (same pattern as the
+other four repositories in ``deerflow.persistence.*``). Callers
+construct this after ``init_engine_from_config()`` has run.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC
+from uuid import UUID
+
+from sqlalchemy import func, select
+from sqlalchemy.exc import IntegrityError
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from app.gateway.auth.models import User
+from app.gateway.auth.repositories.base import UserNotFoundError, UserRepository
+from deerflow.persistence.user.model import UserRow
+
+
+class SQLiteUserRepository(UserRepository):
+    """Async user repository backed by the shared SQLAlchemy engine."""
+
+    def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
+        self._sf = session_factory
+
+    # ── Converters ────────────────────────────────────────────────────
+
+    @staticmethod
+    def _row_to_user(row: UserRow) -> User:
+        return User(
+            id=UUID(row.id),
+            email=row.email,
+            password_hash=row.password_hash,
+            system_role=row.system_role,  # type: ignore[arg-type]
+            # SQLite loses tzinfo on read; reattach UTC so downstream
+            # code can compare timestamps reliably.
+            created_at=row.created_at if row.created_at.tzinfo else row.created_at.replace(tzinfo=UTC),
+            oauth_provider=row.oauth_provider,
+            oauth_id=row.oauth_id,
+            needs_setup=row.needs_setup,
+            token_version=row.token_version,
+        )
+
+    @staticmethod
+    def _user_to_row(user: User) -> UserRow:
+        return UserRow(
+            id=str(user.id),
+            email=user.email,
+            password_hash=user.password_hash,
+            system_role=user.system_role,
+            created_at=user.created_at,
+            oauth_provider=user.oauth_provider,
+            oauth_id=user.oauth_id,
+            needs_setup=user.needs_setup,
+            token_version=user.token_version,
+        )
+
+    # ── CRUD ──────────────────────────────────────────────────────────
+
+    async def create_user(self, user: User) -> User:
+        """Insert a new user. Raises ``ValueError`` on duplicate email."""
+        row = self._user_to_row(user)
+        async with self._sf() as session:
+            session.add(row)
+            try:
+                await session.commit()
+            except IntegrityError as exc:
+                await session.rollback()
+                raise ValueError(f"Email already registered: {user.email}") from exc
+        return user
+
+    async def get_user_by_id(self, user_id: str) -> User | None:
+        async with self._sf() as session:
+            row = await session.get(UserRow, user_id)
+            return self._row_to_user(row) if row is not None else None
+
+    async def get_user_by_email(self, email: str) -> User | None:
+        stmt = select(UserRow).where(UserRow.email == email)
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            row = result.scalar_one_or_none()
+            return self._row_to_user(row) if row is not None else None
+
+    async def update_user(self, user: User) -> User:
+        async with self._sf() as session:
+            row = await session.get(UserRow, str(user.id))
+            if row is None:
+                # Hard fail on concurrent delete: callers (reset_admin,
+                # password change handlers, _ensure_admin_user) all
+                # fetched the user just before this call, so a missing
+                # row here means the row vanished underneath us. Silent
+                # success would let the caller log "password reset" for
+                # a row that no longer exists.
+                raise UserNotFoundError(f"User {user.id} no longer exists")
+            row.email = user.email
+            row.password_hash = user.password_hash
+            row.system_role = user.system_role
+            row.oauth_provider = user.oauth_provider
+            row.oauth_id = user.oauth_id
+            row.needs_setup = user.needs_setup
+            row.token_version = user.token_version
+            await session.commit()
+        return user
+
+    async def count_users(self) -> int:
+        stmt = select(func.count()).select_from(UserRow)
+        async with self._sf() as session:
+            return await session.scalar(stmt) or 0
+
+    async def count_admin_users(self) -> int:
+        stmt = select(func.count()).select_from(UserRow).where(UserRow.system_role == "admin")
+        async with self._sf() as session:
+            return await session.scalar(stmt) or 0
+
+    async def get_user_by_oauth(self, provider: str, oauth_id: str) -> User | None:
+        stmt = select(UserRow).where(UserRow.oauth_provider == provider, UserRow.oauth_id == oauth_id)
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            row = result.scalar_one_or_none()
+            return self._row_to_user(row) if row is not None else None
diff --git a/backend/app/gateway/auth/reset_admin.py b/backend/app/gateway/auth/reset_admin.py
new file mode 100644
index 000000000..65c294dbe
--- /dev/null
+++ b/backend/app/gateway/auth/reset_admin.py
@@ -0,0 +1,92 @@
+"""CLI tool to reset an admin password.
+
+Usage:
+    python -m app.gateway.auth.reset_admin
+    python -m app.gateway.auth.reset_admin --email admin@example.com
+
+Writes the new password to ``.deer-flow/admin_initial_credentials.txt``
+(mode 0600) instead of printing it, so CI / log aggregators never see
+the cleartext secret.
+"""
+
+from __future__ import annotations
+
+import argparse
+import asyncio
+import secrets
+import sys
+
+from sqlalchemy import select
+
+from app.gateway.auth.credential_file import write_initial_credentials
+from app.gateway.auth.password import hash_password
+from app.gateway.auth.repositories.sqlite import SQLiteUserRepository
+from deerflow.persistence.user.model import UserRow
+
+
+async def _run(email: str | None) -> int:
+    from deerflow.config import AppConfig
+    from deerflow.persistence.engine import (
+        close_engine,
+        get_session_factory,
+        init_engine_from_config,
+    )
+
+    # CLI entry: load config explicitly at the top, pass down through the closure.
+    config = AppConfig.from_file()
+    await init_engine_from_config(config.database)
+    try:
+        sf = get_session_factory()
+        if sf is None:
+            print("Error: persistence engine not available (check config.database).", file=sys.stderr)
+            return 1
+
+        repo = SQLiteUserRepository(sf)
+
+        if email:
+            user = await repo.get_user_by_email(email)
+        else:
+            # Find first admin via direct SELECT — repository does not
+            # expose a "first admin" helper and we do not want to add
+            # one just for this CLI.
+            async with sf() as session:
+                stmt = select(UserRow).where(UserRow.system_role == "admin").limit(1)
+                row = (await session.execute(stmt)).scalar_one_or_none()
+            if row is None:
+                user = None
+            else:
+                user = await repo.get_user_by_id(row.id)
+
+        if user is None:
+            if email:
+                print(f"Error: user '{email}' not found.", file=sys.stderr)
+            else:
+                print("Error: no admin user found.", file=sys.stderr)
+            return 1
+
+        new_password = secrets.token_urlsafe(16)
+        user.password_hash = hash_password(new_password)
+        user.token_version += 1
+        user.needs_setup = True
+        await repo.update_user(user)
+
+        cred_path = write_initial_credentials(user.email, new_password, label="reset")
+        print(f"Password reset for: {user.email}")
+        print(f"Credentials written to: {cred_path} (mode 0600)")
+        print("Next login will require setup (new email + password).")
+        return 0
+    finally:
+        await close_engine()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Reset admin password")
+    parser.add_argument("--email", help="Admin email (default: first admin found)")
+    args = parser.parse_args()
+
+    exit_code = asyncio.run(_run(args.email))
+    sys.exit(exit_code)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/app/gateway/auth_middleware.py b/backend/app/gateway/auth_middleware.py
new file mode 100644
index 000000000..fd982cd79
--- /dev/null
+++ b/backend/app/gateway/auth_middleware.py
@@ -0,0 +1,118 @@
+"""Global authentication middleware — fail-closed safety net.
+
+Rejects unauthenticated requests to non-public paths with 401. When a
+request passes the cookie check, resolves the JWT payload to a real
+``User`` object and stamps it into both ``request.state.user`` and the
+``deerflow.runtime.user_context`` contextvar so that repository-layer
+owner filtering works automatically via the sentinel pattern.
+
+Fine-grained permission checks remain in authz.py decorators.
+"""
+
+from collections.abc import Callable
+
+from fastapi import HTTPException, Request, Response
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.responses import JSONResponse
+from starlette.types import ASGIApp
+
+from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse
+from app.gateway.authz import _ALL_PERMISSIONS, AuthContext
+from deerflow.runtime.user_context import reset_current_user, set_current_user
+
+# Paths that never require authentication.
+_PUBLIC_PATH_PREFIXES: tuple[str, ...] = (
+    "/health",
+    "/docs",
+    "/redoc",
+    "/openapi.json",
+)
+
+# Exact auth paths that are public (login/register/status check).
+# /api/v1/auth/me, /api/v1/auth/change-password etc. are NOT public.
+_PUBLIC_EXACT_PATHS: frozenset[str] = frozenset(
+    {
+        "/api/v1/auth/login/local",
+        "/api/v1/auth/register",
+        "/api/v1/auth/logout",
+        "/api/v1/auth/setup-status",
+        "/api/v1/auth/initialize",
+    }
+)
+
+
+def _is_public(path: str) -> bool:
+    stripped = path.rstrip("/")
+    if stripped in _PUBLIC_EXACT_PATHS:
+        return True
+    return any(path.startswith(prefix) for prefix in _PUBLIC_PATH_PREFIXES)
+
+
+class AuthMiddleware(BaseHTTPMiddleware):
+    """Strict auth gate: reject requests without a valid session.
+
+    Two-stage check for non-public paths:
+
+    1. Cookie presence — return 401 NOT_AUTHENTICATED if missing
+    2. JWT validation via ``get_optional_user_from_request`` — return 401
+       TOKEN_INVALID if the token is absent, malformed, expired, or the
+       signed user does not exist / is stale
+
+    On success, stamps ``request.state.user`` and the
+    ``deerflow.runtime.user_context`` contextvar so that repository-layer
+    owner filters work downstream without every route needing a
+    ``@require_auth`` decorator. Routes that need per-resource
+    authorization (e.g. "user A cannot read user B's thread by guessing
+    the URL") should additionally use ``@require_permission(...,
+    owner_check=True)`` for explicit enforcement — but authentication
+    itself is fully handled here.
+    """
+
+    def __init__(self, app: ASGIApp) -> None:
+        super().__init__(app)
+
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        if _is_public(request.url.path):
+            return await call_next(request)
+
+        # Non-public path: require session cookie
+        if not request.cookies.get("access_token"):
+            return JSONResponse(
+                status_code=401,
+                content={
+                    "detail": AuthErrorResponse(
+                        code=AuthErrorCode.NOT_AUTHENTICATED,
+                        message="Authentication required",
+                    ).model_dump()
+                },
+            )
+
+        # Strict JWT validation: reject junk/expired tokens with 401
+        # right here instead of silently passing through. This closes
+        # the "junk cookie bypass" gap (AUTH_TEST_PLAN test 7.5.8):
+        # without this, non-isolation routes like /api/models would
+        # accept any cookie-shaped string as authentication.
+        #
+        # We call the *strict* resolver so that fine-grained error
+        # codes (token_expired, token_invalid, user_not_found, …)
+        # propagate from AuthErrorCode, not get flattened into one
+        # generic code. BaseHTTPMiddleware doesn't let HTTPException
+        # bubble up, so we catch and render it as JSONResponse here.
+        from app.gateway.deps import get_current_user_from_request
+
+        try:
+            user = await get_current_user_from_request(request)
+        except HTTPException as exc:
+            return JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
+
+        # Stamp both request.state.user (for the contextvar pattern)
+        # and request.state.auth (so @require_permission's "auth is
+        # None" branch short-circuits instead of running the entire
+        # JWT-decode + DB-lookup pipeline a second time per request).
+        request.state.user = user
+        request.state.auth = AuthContext(user=user, permissions=_ALL_PERMISSIONS)
+        token = set_current_user(user)
+        try:
+            return await call_next(request)
+        finally:
+            reset_current_user(token)
diff --git a/backend/app/gateway/authz.py b/backend/app/gateway/authz.py
new file mode 100644
index 000000000..5842a24c7
--- /dev/null
+++ b/backend/app/gateway/authz.py
@@ -0,0 +1,262 @@
+"""Authorization decorators and context for DeerFlow.
+
+Inspired by LangGraph Auth system: https://github.com/langchain-ai/langgraph/blob/main/libs/sdk-py/langgraph_sdk/auth/__init__.py
+
+**Usage:**
+
+1. Use ``@require_auth`` on routes that need authentication
+2. Use ``@require_permission("resource", "action", filter_key=...)`` for permission checks
+3. The decorator chain processes from bottom to top
+
+**Example:**
+
+    @router.get("/{thread_id}")
+    @require_auth
+    @require_permission("threads", "read", owner_check=True)
+    async def get_thread(thread_id: str, request: Request):
+        # User is authenticated and has threads:read permission
+        ...
+
+**Permission Model:**
+
+- threads:read   - View thread
+- threads:write  - Create/update thread
+- threads:delete - Delete thread
+- runs:create   - Run agent
+- runs:read     - View run
+- runs:cancel   - Cancel run
+"""
+
+from __future__ import annotations
+
+import functools
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any, ParamSpec, TypeVar
+
+from fastapi import HTTPException, Request
+
+if TYPE_CHECKING:
+    from app.gateway.auth.models import User
+
+P = ParamSpec("P")
+T = TypeVar("T")
+
+
+# Permission constants
+class Permissions:
+    """Permission constants for resource:action format."""
+
+    # Threads
+    THREADS_READ = "threads:read"
+    THREADS_WRITE = "threads:write"
+    THREADS_DELETE = "threads:delete"
+
+    # Runs
+    RUNS_CREATE = "runs:create"
+    RUNS_READ = "runs:read"
+    RUNS_CANCEL = "runs:cancel"
+
+
+class AuthContext:
+    """Authentication context for the current request.
+
+    Stored in request.state.auth after require_auth decoration.
+
+    Attributes:
+        user: The authenticated user, or None if anonymous
+        permissions: List of permission strings (e.g., "threads:read")
+    """
+
+    __slots__ = ("user", "permissions")
+
+    def __init__(self, user: User | None = None, permissions: list[str] | None = None):
+        self.user = user
+        self.permissions = permissions or []
+
+    @property
+    def is_authenticated(self) -> bool:
+        """Check if user is authenticated."""
+        return self.user is not None
+
+    def has_permission(self, resource: str, action: str) -> bool:
+        """Check if context has permission for resource:action.
+
+        Args:
+            resource: Resource name (e.g., "threads")
+            action: Action name (e.g., "read")
+
+        Returns:
+            True if user has permission
+        """
+        permission = f"{resource}:{action}"
+        return permission in self.permissions
+
+    def require_user(self) -> User:
+        """Get user or raise 401.
+
+        Raises:
+            HTTPException 401 if not authenticated
+        """
+        if not self.user:
+            raise HTTPException(status_code=401, detail="Authentication required")
+        return self.user
+
+
+def get_auth_context(request: Request) -> AuthContext | None:
+    """Get AuthContext from request state."""
+    return getattr(request.state, "auth", None)
+
+
+_ALL_PERMISSIONS: list[str] = [
+    Permissions.THREADS_READ,
+    Permissions.THREADS_WRITE,
+    Permissions.THREADS_DELETE,
+    Permissions.RUNS_CREATE,
+    Permissions.RUNS_READ,
+    Permissions.RUNS_CANCEL,
+]
+
+
+async def _authenticate(request: Request) -> AuthContext:
+    """Authenticate request and return AuthContext.
+
+    Delegates to deps.get_optional_user_from_request() for the JWT→User pipeline.
+    Returns AuthContext with user=None for anonymous requests.
+    """
+    from app.gateway.deps import get_optional_user_from_request
+
+    user = await get_optional_user_from_request(request)
+    if user is None:
+        return AuthContext(user=None, permissions=[])
+
+    # In future, permissions could be stored in user record
+    return AuthContext(user=user, permissions=_ALL_PERMISSIONS)
+
+
+def require_auth[**P, T](func: Callable[P, T]) -> Callable[P, T]:
+    """Decorator that authenticates the request and sets AuthContext.
+
+    Must be placed ABOVE other decorators (executes after them).
+
+    Usage:
+        @router.get("/{thread_id}")
+        @require_auth  # Bottom decorator (executes first after permission check)
+        @require_permission("threads", "read")
+        async def get_thread(thread_id: str, request: Request):
+            auth: AuthContext = request.state.auth
+            ...
+
+    Raises:
+        ValueError: If 'request' parameter is missing
+    """
+
+    @functools.wraps(func)
+    async def wrapper(*args: Any, **kwargs: Any) -> Any:
+        request = kwargs.get("request")
+        if request is None:
+            raise ValueError("require_auth decorator requires 'request' parameter")
+
+        # Authenticate and set context
+        auth_context = await _authenticate(request)
+        request.state.auth = auth_context
+
+        return await func(*args, **kwargs)
+
+    return wrapper
+
+
+def require_permission(
+    resource: str,
+    action: str,
+    owner_check: bool = False,
+    require_existing: bool = False,
+) -> Callable[[Callable[P, T]], Callable[P, T]]:
+    """Decorator that checks permission for resource:action.
+
+    Must be used AFTER @require_auth.
+
+    Args:
+        resource: Resource name (e.g., "threads", "runs")
+        action: Action name (e.g., "read", "write", "delete")
+        owner_check: If True, validates that the current user owns the resource.
+                     Requires 'thread_id' path parameter and performs ownership check.
+        require_existing: Only meaningful with ``owner_check=True``. If True, a
+                          missing ``threads_meta`` row counts as a denial (404)
+                          instead of "untracked legacy thread, allow". Use on
+                          **destructive / mutating** routes (DELETE, PATCH,
+                          state-update) so a deleted thread can't be re-targeted
+                          by another user via the missing-row code path.
+
+    Usage:
+        # Read-style: legacy untracked threads are allowed
+        @require_permission("threads", "read", owner_check=True)
+        async def get_thread(thread_id: str, request: Request):
+            ...
+
+        # Destructive: thread row MUST exist and be owned by caller
+        @require_permission("threads", "delete", owner_check=True, require_existing=True)
+        async def delete_thread(thread_id: str, request: Request):
+            ...
+
+    Raises:
+        HTTPException 401: If authentication required but user is anonymous
+        HTTPException 403: If user lacks permission
+        HTTPException 404: If owner_check=True but user doesn't own the thread
+        ValueError: If owner_check=True but 'thread_id' parameter is missing
+    """
+
+    def decorator(func: Callable[P, T]) -> Callable[P, T]:
+        @functools.wraps(func)
+        async def wrapper(*args: Any, **kwargs: Any) -> Any:
+            request = kwargs.get("request")
+            if request is None:
+                raise ValueError("require_permission decorator requires 'request' parameter")
+
+            auth: AuthContext = getattr(request.state, "auth", None)
+            if auth is None:
+                auth = await _authenticate(request)
+                request.state.auth = auth
+
+            if not auth.is_authenticated:
+                raise HTTPException(status_code=401, detail="Authentication required")
+
+            # Check permission
+            if not auth.has_permission(resource, action):
+                raise HTTPException(
+                    status_code=403,
+                    detail=f"Permission denied: {resource}:{action}",
+                )
+
+            # Owner check for thread-specific resources.
+            #
+            # 2.0-rc moved thread metadata into the SQL persistence layer
+            # (``threads_meta`` table). We verify ownership via
+            # ``ThreadMetaStore.check_access``: it returns True for
+            # missing rows (untracked legacy thread) and for rows whose
+            # ``user_id`` is NULL (shared / pre-auth data), so this is
+            # strict-deny rather than strict-allow — only an *existing*
+            # row with a *different* user_id triggers 404.
+            if owner_check:
+                thread_id = kwargs.get("thread_id")
+                if thread_id is None:
+                    raise ValueError("require_permission with owner_check=True requires 'thread_id' parameter")
+
+                from app.gateway.deps import get_thread_store
+
+                thread_store = get_thread_store(request)
+                allowed = await thread_store.check_access(
+                    thread_id,
+                    str(auth.user.id),
+                    require_existing=require_existing,
+                )
+                if not allowed:
+                    raise HTTPException(
+                        status_code=404,
+                        detail=f"Thread {thread_id} not found",
+                    )
+
+            return await func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
diff --git a/backend/app/gateway/csrf_middleware.py b/backend/app/gateway/csrf_middleware.py
new file mode 100644
index 000000000..4c9b0f36a
--- /dev/null
+++ b/backend/app/gateway/csrf_middleware.py
@@ -0,0 +1,113 @@
+"""CSRF protection middleware for FastAPI.
+
+Per RFC-001:
+State-changing operations require CSRF protection.
+"""
+
+import secrets
+from collections.abc import Callable
+
+from fastapi import Request, Response
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.responses import JSONResponse
+from starlette.types import ASGIApp
+
+CSRF_COOKIE_NAME = "csrf_token"
+CSRF_HEADER_NAME = "X-CSRF-Token"
+CSRF_TOKEN_LENGTH = 64  # bytes
+
+
+def is_secure_request(request: Request) -> bool:
+    """Detect whether the original client request was made over HTTPS."""
+    return request.headers.get("x-forwarded-proto", request.url.scheme) == "https"
+
+
+def generate_csrf_token() -> str:
+    """Generate a secure random CSRF token."""
+    return secrets.token_urlsafe(CSRF_TOKEN_LENGTH)
+
+
+def should_check_csrf(request: Request) -> bool:
+    """Determine if a request needs CSRF validation.
+
+    CSRF is checked for state-changing methods (POST, PUT, DELETE, PATCH).
+    GET, HEAD, OPTIONS, and TRACE are exempt per RFC 7231.
+    """
+    if request.method not in ("POST", "PUT", "DELETE", "PATCH"):
+        return False
+
+    path = request.url.path.rstrip("/")
+    # Exempt /api/v1/auth/me endpoint
+    if path == "/api/v1/auth/me":
+        return False
+    return True
+
+
+_AUTH_EXEMPT_PATHS: frozenset[str] = frozenset(
+    {
+        "/api/v1/auth/login/local",
+        "/api/v1/auth/logout",
+        "/api/v1/auth/register",
+        "/api/v1/auth/initialize",
+    }
+)
+
+
+def is_auth_endpoint(request: Request) -> bool:
+    """Check if the request is to an auth endpoint.
+
+    Auth endpoints don't need CSRF validation on first call (no token).
+    """
+    return request.url.path.rstrip("/") in _AUTH_EXEMPT_PATHS
+
+
+class CSRFMiddleware(BaseHTTPMiddleware):
+    """Middleware that implements CSRF protection using Double Submit Cookie pattern."""
+
+    def __init__(self, app: ASGIApp) -> None:
+        super().__init__(app)
+
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        _is_auth = is_auth_endpoint(request)
+
+        if should_check_csrf(request) and not _is_auth:
+            cookie_token = request.cookies.get(CSRF_COOKIE_NAME)
+            header_token = request.headers.get(CSRF_HEADER_NAME)
+
+            if not cookie_token or not header_token:
+                return JSONResponse(
+                    status_code=403,
+                    content={"detail": "CSRF token missing. Include X-CSRF-Token header."},
+                )
+
+            if not secrets.compare_digest(cookie_token, header_token):
+                return JSONResponse(
+                    status_code=403,
+                    content={"detail": "CSRF token mismatch."},
+                )
+
+        response = await call_next(request)
+
+        # For auth endpoints that set up session, also set CSRF cookie
+        if _is_auth and request.method == "POST":
+            # Generate a new CSRF token for the session
+            csrf_token = generate_csrf_token()
+            is_https = is_secure_request(request)
+            response.set_cookie(
+                key=CSRF_COOKIE_NAME,
+                value=csrf_token,
+                httponly=False,  # Must be JS-readable for Double Submit Cookie pattern
+                secure=is_https,
+                samesite="strict",
+            )
+
+        return response
+
+
+def get_csrf_token(request: Request) -> str | None:
+    """Get the CSRF token from the current request's cookies.
+
+    This is useful for server-side rendering where you need to embed
+    token in forms or headers.
+    """
+    return request.cookies.get(CSRF_COOKIE_NAME)
diff --git a/backend/app/gateway/deps.py b/backend/app/gateway/deps.py
index 115868331..73b1ffe18 100644
--- a/backend/app/gateway/deps.py
+++ b/backend/app/gateway/deps.py
@@ -10,10 +10,30 @@ from __future__ import annotations
 
 from collections.abc import AsyncGenerator
 from contextlib import AsyncExitStack, asynccontextmanager
+from typing import TYPE_CHECKING
 
 from fastapi import FastAPI, HTTPException, Request
 
-from deerflow.runtime import RunManager, StreamBridge
+from deerflow.config.app_config import AppConfig
+from deerflow.runtime import RunContext, RunManager
+
+if TYPE_CHECKING:
+    from app.gateway.auth.local_provider import LocalAuthProvider
+    from app.gateway.auth.repositories.sqlite import SQLiteUserRepository
+    from deerflow.persistence.thread_meta.base import ThreadMetaStore
+
+
+def get_config(request: Request) -> AppConfig:
+    """FastAPI dependency returning the app-scoped ``AppConfig``.
+
+    Reads from ``request.app.state.config`` which is set at startup
+    (``app.py`` lifespan) and swapped on config reload (``routers/mcp.py``,
+    ``routers/skills.py``).
+    """
+    cfg = getattr(request.app.state, "config", None)
+    if cfg is None:
+        raise HTTPException(status_code=503, detail="Configuration not available")
+    return cfg
 
 
 @asynccontextmanager
@@ -25,15 +45,54 @@ async def langgraph_runtime(app: FastAPI) -> AsyncGenerator[None, None]:
         async with langgraph_runtime(app):
             yield
     """
-    from deerflow.agents.checkpointer.async_provider import make_checkpointer
+    from deerflow.persistence.engine import close_engine, get_session_factory, init_engine_from_config
     from deerflow.runtime import make_store, make_stream_bridge
+    from deerflow.runtime.checkpointer.async_provider import make_checkpointer
+    from deerflow.runtime.events.store import make_run_event_store
 
     async with AsyncExitStack() as stack:
-        app.state.stream_bridge = await stack.enter_async_context(make_stream_bridge())
-        app.state.checkpointer = await stack.enter_async_context(make_checkpointer())
-        app.state.store = await stack.enter_async_context(make_store())
-        app.state.run_manager = RunManager()
-        yield
+        # app.state.config is populated earlier in lifespan(); thread it
+        # explicitly into every provider below.
+        config = app.state.config
+
+        app.state.stream_bridge = await stack.enter_async_context(make_stream_bridge(config))
+
+        # Initialize persistence engine BEFORE checkpointer so that
+        # auto-create-database logic runs first (postgres backend).
+        await init_engine_from_config(config.database)
+
+        app.state.checkpointer = await stack.enter_async_context(make_checkpointer(config))
+        app.state.store = await stack.enter_async_context(make_store(config))
+
+        # Initialize repositories — one get_session_factory() call for all.
+        sf = get_session_factory()
+        if sf is not None:
+            from deerflow.persistence.feedback import FeedbackRepository
+            from deerflow.persistence.run import RunRepository
+
+            app.state.run_store = RunRepository(sf)
+            app.state.feedback_repo = FeedbackRepository(sf)
+        else:
+            from deerflow.runtime.runs.store.memory import MemoryRunStore
+
+            app.state.run_store = MemoryRunStore()
+            app.state.feedback_repo = None
+
+        from deerflow.persistence.thread_meta import make_thread_store
+
+        app.state.thread_store = make_thread_store(sf, app.state.store)
+
+        # Run event store (has its own factory with config-driven backend selection)
+        run_events_config = getattr(config, "run_events", None)
+        app.state.run_event_store = make_run_event_store(run_events_config)
+
+        # RunManager with store backing for persistence
+        app.state.run_manager = RunManager(store=app.state.run_store)
+
+        try:
+            yield
+        finally:
+            await close_engine()
 
 
 # ---------------------------------------------------------------------------
@@ -41,30 +100,148 @@ async def langgraph_runtime(app: FastAPI) -> AsyncGenerator[None, None]:
 # ---------------------------------------------------------------------------
 
 
-def get_stream_bridge(request: Request) -> StreamBridge:
-    """Return the global :class:`StreamBridge`, or 503."""
-    bridge = getattr(request.app.state, "stream_bridge", None)
-    if bridge is None:
-        raise HTTPException(status_code=503, detail="Stream bridge not available")
-    return bridge
+def _require(attr: str, label: str):
+    """Create a FastAPI dependency that returns ``app.state.<attr>`` or 503."""
+
+    def dep(request: Request):
+        val = getattr(request.app.state, attr, None)
+        if val is None:
+            raise HTTPException(status_code=503, detail=f"{label} not available")
+        return val
+
+    dep.__name__ = dep.__qualname__ = f"get_{attr}"
+    return dep
 
 
-def get_run_manager(request: Request) -> RunManager:
-    """Return the global :class:`RunManager`, or 503."""
-    mgr = getattr(request.app.state, "run_manager", None)
-    if mgr is None:
-        raise HTTPException(status_code=503, detail="Run manager not available")
-    return mgr
-
-
-def get_checkpointer(request: Request):
-    """Return the global checkpointer, or 503."""
-    cp = getattr(request.app.state, "checkpointer", None)
-    if cp is None:
-        raise HTTPException(status_code=503, detail="Checkpointer not available")
-    return cp
+get_stream_bridge = _require("stream_bridge", "Stream bridge")
+get_run_manager = _require("run_manager", "Run manager")
+get_checkpointer = _require("checkpointer", "Checkpointer")
+get_run_event_store = _require("run_event_store", "Run event store")
+get_feedback_repo = _require("feedback_repo", "Feedback")
+get_run_store = _require("run_store", "Run store")
 
 
 def get_store(request: Request):
     """Return the global store (may be ``None`` if not configured)."""
     return getattr(request.app.state, "store", None)
+
+
+def get_thread_store(request: Request) -> ThreadMetaStore:
+    """Return the thread metadata store (SQL or memory-backed)."""
+    val = getattr(request.app.state, "thread_store", None)
+    if val is None:
+        raise HTTPException(status_code=503, detail="Thread metadata store not available")
+    return val
+
+
+def get_run_context(request: Request) -> RunContext:
+    """Build a :class:`RunContext` from ``app.state`` singletons.
+
+    Returns a *base* context with infrastructure dependencies.  Callers that
+    need per-run fields (e.g. ``follow_up_to_run_id``) should use
+    ``dataclasses.replace(ctx, follow_up_to_run_id=...)`` before passing it
+    to :func:`run_agent`.
+    """
+    config = get_config(request)
+    return RunContext(
+        checkpointer=get_checkpointer(request),
+        store=get_store(request),
+        event_store=get_run_event_store(request),
+        run_events_config=getattr(config, "run_events", None),
+        thread_store=get_thread_store(request),
+        app_config=config,
+    )
+
+
+
+# ---------------------------------------------------------------------------
+# Auth helpers (used by authz.py and auth middleware)
+# ---------------------------------------------------------------------------
+
+# Cached singletons to avoid repeated instantiation per request
+_cached_local_provider: LocalAuthProvider | None = None
+_cached_repo: SQLiteUserRepository | None = None
+
+
+def get_local_provider() -> LocalAuthProvider:
+    """Get or create the cached LocalAuthProvider singleton.
+
+    Must be called after ``init_engine_from_config()`` — the shared
+    session factory is required to construct the user repository.
+    """
+    global _cached_local_provider, _cached_repo
+    if _cached_repo is None:
+        from app.gateway.auth.repositories.sqlite import SQLiteUserRepository
+        from deerflow.persistence.engine import get_session_factory
+
+        sf = get_session_factory()
+        if sf is None:
+            raise RuntimeError("get_local_provider() called before init_engine_from_config(); cannot access users table")
+        _cached_repo = SQLiteUserRepository(sf)
+    if _cached_local_provider is None:
+        from app.gateway.auth.local_provider import LocalAuthProvider
+
+        _cached_local_provider = LocalAuthProvider(repository=_cached_repo)
+    return _cached_local_provider
+
+
+async def get_current_user_from_request(request: Request):
+    """Get the current authenticated user from the request cookie.
+
+    Raises HTTPException 401 if not authenticated.
+    """
+    from app.gateway.auth import decode_token
+    from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse, TokenError, token_error_to_code
+
+    access_token = request.cookies.get("access_token")
+    if not access_token:
+        raise HTTPException(
+            status_code=401,
+            detail=AuthErrorResponse(code=AuthErrorCode.NOT_AUTHENTICATED, message="Not authenticated").model_dump(),
+        )
+
+    payload = decode_token(access_token)
+    if isinstance(payload, TokenError):
+        raise HTTPException(
+            status_code=401,
+            detail=AuthErrorResponse(code=token_error_to_code(payload), message=f"Token error: {payload.value}").model_dump(),
+        )
+
+    provider = get_local_provider()
+    user = await provider.get_user(payload.sub)
+    if user is None:
+        raise HTTPException(
+            status_code=401,
+            detail=AuthErrorResponse(code=AuthErrorCode.USER_NOT_FOUND, message="User not found").model_dump(),
+        )
+
+    # Token version mismatch → password was changed, token is stale
+    if user.token_version != payload.ver:
+        raise HTTPException(
+            status_code=401,
+            detail=AuthErrorResponse(code=AuthErrorCode.TOKEN_INVALID, message="Token revoked (password changed)").model_dump(),
+        )
+
+    return user
+
+
+async def get_optional_user_from_request(request: Request):
+    """Get optional authenticated user from request.
+
+    Returns None if not authenticated.
+    """
+    try:
+        return await get_current_user_from_request(request)
+    except HTTPException:
+        return None
+
+
+async def get_current_user(request: Request) -> str | None:
+    """Extract user_id from request cookie, or None if not authenticated.
+
+    Thin adapter that returns the string id for callers that only need
+    identification (e.g., ``feedback.py``). Full-user callers should use
+    ``get_current_user_from_request`` or ``get_optional_user_from_request``.
+    """
+    user = await get_optional_user_from_request(request)
+    return str(user.id) if user else None
diff --git a/backend/app/gateway/langgraph_auth.py b/backend/app/gateway/langgraph_auth.py
new file mode 100644
index 000000000..06074b9b8
--- /dev/null
+++ b/backend/app/gateway/langgraph_auth.py
@@ -0,0 +1,106 @@
+"""LangGraph Server auth handler — shares JWT logic with Gateway.
+
+Loaded by LangGraph Server via langgraph.json ``auth.path``.
+Reuses the same ``decode_token`` / ``get_auth_config`` as Gateway,
+so both modes validate tokens with the same secret and rules.
+
+Two layers:
+  1. @auth.authenticate — validates JWT cookie, extracts user_id,
+     and enforces CSRF on state-changing methods (POST/PUT/DELETE/PATCH)
+  2. @auth.on — returns metadata filter so each user only sees own threads
+"""
+
+import secrets
+
+from langgraph_sdk import Auth
+
+from app.gateway.auth.errors import TokenError
+from app.gateway.auth.jwt import decode_token
+from app.gateway.deps import get_local_provider
+
+auth = Auth()
+
+# Methods that require CSRF validation (state-changing per RFC 7231).
+_CSRF_METHODS = frozenset({"POST", "PUT", "DELETE", "PATCH"})
+
+
+def _check_csrf(request) -> None:
+    """Enforce Double Submit Cookie CSRF check for state-changing requests.
+
+    Mirrors Gateway's CSRFMiddleware logic so that LangGraph routes
+    proxied directly by nginx have the same CSRF protection.
+    """
+    method = getattr(request, "method", "") or ""
+    if method.upper() not in _CSRF_METHODS:
+        return
+
+    cookie_token = request.cookies.get("csrf_token")
+    header_token = request.headers.get("x-csrf-token")
+
+    if not cookie_token or not header_token:
+        raise Auth.exceptions.HTTPException(
+            status_code=403,
+            detail="CSRF token missing. Include X-CSRF-Token header.",
+        )
+
+    if not secrets.compare_digest(cookie_token, header_token):
+        raise Auth.exceptions.HTTPException(
+            status_code=403,
+            detail="CSRF token mismatch.",
+        )
+
+
+@auth.authenticate
+async def authenticate(request):
+    """Validate the session cookie, decode JWT, and check token_version.
+
+    Same validation chain as Gateway's get_current_user_from_request:
+      cookie → decode JWT → DB lookup → token_version match
+    Also enforces CSRF on state-changing methods.
+    """
+    # CSRF check before authentication so forged cross-site requests
+    # are rejected early, even if the cookie carries a valid JWT.
+    _check_csrf(request)
+
+    token = request.cookies.get("access_token")
+    if not token:
+        raise Auth.exceptions.HTTPException(
+            status_code=401,
+            detail="Not authenticated",
+        )
+
+    payload = decode_token(token)
+    if isinstance(payload, TokenError):
+        raise Auth.exceptions.HTTPException(
+            status_code=401,
+            detail=f"Token error: {payload.value}",
+        )
+
+    user = await get_local_provider().get_user(payload.sub)
+    if user is None:
+        raise Auth.exceptions.HTTPException(
+            status_code=401,
+            detail="User not found",
+        )
+    if user.token_version != payload.ver:
+        raise Auth.exceptions.HTTPException(
+            status_code=401,
+            detail="Token revoked (password changed)",
+        )
+
+    return payload.sub
+
+
+@auth.on
+async def add_owner_filter(ctx: Auth.types.AuthContext, value: dict):
+    """Inject user_id metadata on writes; filter by user_id on reads.
+
+    Gateway stores thread ownership as ``metadata.user_id``.
+    This handler ensures LangGraph Server enforces the same isolation.
+    """
+    # On create/update: stamp user_id into metadata
+    metadata = value.setdefault("metadata", {})
+    metadata["user_id"] = ctx.user.identity
+
+    # Return filter dict — LangGraph applies it to search/read/delete
+    return {"user_id": ctx.user.identity}
diff --git a/backend/app/gateway/path_utils.py b/backend/app/gateway/path_utils.py
index 4869c9404..ded348c78 100644
--- a/backend/app/gateway/path_utils.py
+++ b/backend/app/gateway/path_utils.py
@@ -5,6 +5,7 @@ from pathlib import Path
 from fastapi import HTTPException
 
 from deerflow.config.paths import get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 
 
 def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
@@ -22,7 +23,7 @@ def resolve_thread_virtual_path(thread_id: str, virtual_path: str) -> Path:
         HTTPException: If the path is invalid or outside allowed directories.
     """
     try:
-        return get_paths().resolve_virtual_path(thread_id, virtual_path)
+        return get_paths().resolve_virtual_path(thread_id, virtual_path, user_id=get_effective_user_id())
     except ValueError as e:
         status = 403 if "traversal" in str(e) else 400
         raise HTTPException(status_code=status, detail=str(e))
diff --git a/backend/app/gateway/routers/agents.py b/backend/app/gateway/routers/agents.py
index ff4476893..3b1fcb733 100644
--- a/backend/app/gateway/routers/agents.py
+++ b/backend/app/gateway/routers/agents.py
@@ -5,11 +5,12 @@ import re
 import shutil
 
 import yaml
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel, Field
 
-from deerflow.config.agents_api_config import get_agents_api_config
+from app.gateway.deps import get_config
 from deerflow.config.agents_config import AgentConfig, list_custom_agents, load_agent_config, load_agent_soul
+from deerflow.config.app_config import AppConfig
 from deerflow.config.paths import get_paths
 
 logger = logging.getLogger(__name__)
@@ -77,9 +78,9 @@ def _normalize_agent_name(name: str) -> str:
     return name.lower()
 
 
-def _require_agents_api_enabled() -> None:
+def _require_agents_api_enabled(app_config: AppConfig) -> None:
     """Reject access unless the custom-agent management API is explicitly enabled."""
-    if not get_agents_api_config().enabled:
+    if not app_config.agents_api.enabled:
         raise HTTPException(
             status_code=403,
             detail=("Custom-agent management API is disabled. Set agents_api.enabled=true to expose agent and user-profile routes over HTTP."),
@@ -108,13 +109,13 @@ def _agent_config_to_response(agent_cfg: AgentConfig, include_soul: bool = False
     summary="List Custom Agents",
     description="List all custom agents available in the agents directory, including their soul content.",
 )
-async def list_agents() -> AgentsListResponse:
+async def list_agents(app_config: AppConfig = Depends(get_config)) -> AgentsListResponse:
     """List all custom agents.
 
     Returns:
         List of all custom agents with their metadata and soul content.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
 
     try:
         agents = list_custom_agents()
@@ -141,7 +142,7 @@ async def check_agent_name(name: str) -> dict:
     Raises:
         HTTPException: 422 if the name is invalid.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
     _validate_agent_name(name)
     normalized = _normalize_agent_name(name)
     available = not get_paths().agent_dir(normalized).exists()
@@ -154,7 +155,7 @@ async def check_agent_name(name: str) -> dict:
     summary="Get Custom Agent",
     description="Retrieve details and SOUL.md content for a specific custom agent.",
 )
-async def get_agent(name: str) -> AgentResponse:
+async def get_agent(name: str, app_config: AppConfig = Depends(get_config)) -> AgentResponse:
     """Get a specific custom agent by name.
 
     Args:
@@ -166,7 +167,7 @@ async def get_agent(name: str) -> AgentResponse:
     Raises:
         HTTPException: 404 if agent not found.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
     _validate_agent_name(name)
     name = _normalize_agent_name(name)
 
@@ -187,7 +188,7 @@ async def get_agent(name: str) -> AgentResponse:
     summary="Create Custom Agent",
     description="Create a new custom agent with its config and SOUL.md.",
 )
-async def create_agent_endpoint(request: AgentCreateRequest) -> AgentResponse:
+async def create_agent_endpoint(request: AgentCreateRequest, app_config: AppConfig = Depends(get_config)) -> AgentResponse:
     """Create a new custom agent.
 
     Args:
@@ -199,7 +200,7 @@ async def create_agent_endpoint(request: AgentCreateRequest) -> AgentResponse:
     Raises:
         HTTPException: 409 if agent already exists, 422 if name is invalid.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
     _validate_agent_name(request.name)
     normalized_name = _normalize_agent_name(request.name)
 
@@ -251,7 +252,7 @@ async def create_agent_endpoint(request: AgentCreateRequest) -> AgentResponse:
     summary="Update Custom Agent",
     description="Update an existing custom agent's config and/or SOUL.md.",
 )
-async def update_agent(name: str, request: AgentUpdateRequest) -> AgentResponse:
+async def update_agent(name: str, request: AgentUpdateRequest, app_config: AppConfig = Depends(get_config)) -> AgentResponse:
     """Update an existing custom agent.
 
     Args:
@@ -264,7 +265,7 @@ async def update_agent(name: str, request: AgentUpdateRequest) -> AgentResponse:
     Raises:
         HTTPException: 404 if agent not found.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
     _validate_agent_name(name)
     name = _normalize_agent_name(name)
 
@@ -342,13 +343,13 @@ class UserProfileUpdateRequest(BaseModel):
     summary="Get User Profile",
     description="Read the global USER.md file that is injected into all custom agents.",
 )
-async def get_user_profile() -> UserProfileResponse:
+async def get_user_profile(app_config: AppConfig = Depends(get_config)) -> UserProfileResponse:
     """Return the current USER.md content.
 
     Returns:
         UserProfileResponse with content=None if USER.md does not exist yet.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
 
     try:
         user_md_path = get_paths().user_md_file
@@ -367,7 +368,7 @@ async def get_user_profile() -> UserProfileResponse:
     summary="Update User Profile",
     description="Write the global USER.md file that is injected into all custom agents.",
 )
-async def update_user_profile(request: UserProfileUpdateRequest) -> UserProfileResponse:
+async def update_user_profile(request: UserProfileUpdateRequest, app_config: AppConfig = Depends(get_config)) -> UserProfileResponse:
     """Create or overwrite the global USER.md.
 
     Args:
@@ -376,7 +377,7 @@ async def update_user_profile(request: UserProfileUpdateRequest) -> UserProfileR
     Returns:
         UserProfileResponse with the saved content.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
 
     try:
         paths = get_paths()
@@ -395,7 +396,7 @@ async def update_user_profile(request: UserProfileUpdateRequest) -> UserProfileR
     summary="Delete Custom Agent",
     description="Delete a custom agent and all its files (config, SOUL.md, memory).",
 )
-async def delete_agent(name: str) -> None:
+async def delete_agent(name: str, app_config: AppConfig = Depends(get_config)) -> None:
     """Delete a custom agent.
 
     Args:
@@ -404,7 +405,7 @@ async def delete_agent(name: str) -> None:
     Raises:
         HTTPException: 404 if agent not found.
     """
-    _require_agents_api_enabled()
+    _require_agents_api_enabled(app_config)
     _validate_agent_name(name)
     name = _normalize_agent_name(name)
 
diff --git a/backend/app/gateway/routers/artifacts.py b/backend/app/gateway/routers/artifacts.py
index a58fd5c0b..78ea5fa00 100644
--- a/backend/app/gateway/routers/artifacts.py
+++ b/backend/app/gateway/routers/artifacts.py
@@ -7,6 +7,7 @@ from urllib.parse import quote
 from fastapi import APIRouter, HTTPException, Request
 from fastapi.responses import FileResponse, PlainTextResponse, Response
 
+from app.gateway.authz import require_permission
 from app.gateway.path_utils import resolve_thread_virtual_path
 
 logger = logging.getLogger(__name__)
@@ -81,6 +82,7 @@ def _extract_file_from_skill_archive(zip_path: Path, internal_path: str) -> byte
     summary="Get Artifact File",
     description="Retrieve an artifact file generated by the AI agent. Text and binary files can be viewed inline, while active web content is always downloaded.",
 )
+@require_permission("threads", "read", owner_check=True)
 async def get_artifact(thread_id: str, path: str, request: Request, download: bool = False) -> Response:
     """Get an artifact file by its path.
 
diff --git a/backend/app/gateway/routers/auth.py b/backend/app/gateway/routers/auth.py
new file mode 100644
index 000000000..44b996331
--- /dev/null
+++ b/backend/app/gateway/routers/auth.py
@@ -0,0 +1,459 @@
+"""Authentication endpoints."""
+
+import logging
+import os
+import time
+from ipaddress import ip_address, ip_network
+
+from fastapi import APIRouter, Depends, HTTPException, Request, Response, status
+from fastapi.security import OAuth2PasswordRequestForm
+from pydantic import BaseModel, EmailStr, Field, field_validator
+
+from app.gateway.auth import (
+    UserResponse,
+    create_access_token,
+)
+from app.gateway.auth.config import get_auth_config
+from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse
+from app.gateway.csrf_middleware import is_secure_request
+from app.gateway.deps import get_current_user_from_request, get_local_provider
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/api/v1/auth", tags=["auth"])
+
+
+# ── Request/Response Models ──────────────────────────────────────────────
+
+
+class LoginResponse(BaseModel):
+    """Response model for login — token only lives in HttpOnly cookie."""
+
+    expires_in: int  # seconds
+    needs_setup: bool = False
+
+
+# Top common-password blocklist. Drawn from the public SecLists "10k worst
+# passwords" set, lowercased + length>=8 only (shorter ones already fail
+# the min_length check). Kept tight on purpose: this is the **lower bound**
+# defense, not a full HIBP / passlib check, and runs in-process per request.
+_COMMON_PASSWORDS: frozenset[str] = frozenset(
+    {
+        "password",
+        "password1",
+        "password12",
+        "password123",
+        "password1234",
+        "12345678",
+        "123456789",
+        "1234567890",
+        "qwerty12",
+        "qwertyui",
+        "qwerty123",
+        "abc12345",
+        "abcd1234",
+        "iloveyou",
+        "letmein1",
+        "welcome1",
+        "welcome123",
+        "admin123",
+        "administrator",
+        "passw0rd",
+        "p@ssw0rd",
+        "monkey12",
+        "trustno1",
+        "sunshine",
+        "princess",
+        "football",
+        "baseball",
+        "superman",
+        "batman123",
+        "starwars",
+        "dragon123",
+        "master123",
+        "shadow12",
+        "michael1",
+        "jennifer",
+        "computer",
+    }
+)
+
+
+def _password_is_common(password: str) -> bool:
+    """Case-insensitive blocklist check.
+
+    Lowercases the input so trivial mutations like ``Password`` /
+    ``PASSWORD`` are also rejected. Does not normalize digit substitutions
+    (``p@ssw0rd`` is included as a literal entry instead) — keeping the
+    rule cheap and predictable.
+    """
+    return password.lower() in _COMMON_PASSWORDS
+
+
+def _validate_strong_password(value: str) -> str:
+    """Pydantic field-validator body shared by Register + ChangePassword.
+
+    Constraint = function, not type-level mixin. The two request models
+    have no "is-a" relationship; they only share the password-strength
+    rule. Lifting it into a free function lets each model bind it via
+    ``@field_validator(field_name)`` without inheritance gymnastics.
+    """
+    if _password_is_common(value):
+        raise ValueError("Password is too common; choose a stronger password.")
+    return value
+
+
+class RegisterRequest(BaseModel):
+    """Request model for user registration."""
+
+    email: EmailStr
+    password: str = Field(..., min_length=8)
+
+    _strong_password = field_validator("password")(classmethod(lambda cls, v: _validate_strong_password(v)))
+
+
+class ChangePasswordRequest(BaseModel):
+    """Request model for password change (also handles setup flow)."""
+
+    current_password: str
+    new_password: str = Field(..., min_length=8)
+    new_email: EmailStr | None = None
+
+    _strong_password = field_validator("new_password")(classmethod(lambda cls, v: _validate_strong_password(v)))
+
+
+class MessageResponse(BaseModel):
+    """Generic message response."""
+
+    message: str
+
+
+# ── Helpers ───────────────────────────────────────────────────────────────
+
+
+def _set_session_cookie(response: Response, token: str, request: Request) -> None:
+    """Set the access_token HttpOnly cookie on the response."""
+    config = get_auth_config()
+    is_https = is_secure_request(request)
+    response.set_cookie(
+        key="access_token",
+        value=token,
+        httponly=True,
+        secure=is_https,
+        samesite="lax",
+        max_age=config.token_expiry_days * 24 * 3600 if is_https else None,
+    )
+
+
+# ── Rate Limiting ────────────────────────────────────────────────────────
+# In-process dict — not shared across workers. Sufficient for single-worker deployments.
+
+_MAX_LOGIN_ATTEMPTS = 5
+_LOCKOUT_SECONDS = 300  # 5 minutes
+
+# ip → (fail_count, lock_until_timestamp)
+_login_attempts: dict[str, tuple[int, float]] = {}
+
+
+def _trusted_proxies() -> list:
+    """Parse ``AUTH_TRUSTED_PROXIES`` env var into a list of ip_network objects.
+
+    Comma-separated CIDR or single-IP entries. Empty / unset = no proxy is
+    trusted (direct mode). Invalid entries are skipped with a logger warning.
+    Read live so env-var overrides take effect immediately and tests can
+    ``monkeypatch.setenv`` without poking a module-level cache.
+    """
+    raw = os.getenv("AUTH_TRUSTED_PROXIES", "").strip()
+    if not raw:
+        return []
+    nets = []
+    for entry in raw.split(","):
+        entry = entry.strip()
+        if not entry:
+            continue
+        try:
+            nets.append(ip_network(entry, strict=False))
+        except ValueError:
+            logger.warning("AUTH_TRUSTED_PROXIES: ignoring invalid entry %r", entry)
+    return nets
+
+
+def _get_client_ip(request: Request) -> str:
+    """Extract the real client IP for rate limiting.
+
+    Trust model:
+
+    - The TCP peer (``request.client.host``) is always the baseline. It is
+      whatever the kernel reports as the connecting socket — unforgeable
+      by the client itself.
+    - ``X-Real-IP`` is **only** honored if the TCP peer is in the
+      ``AUTH_TRUSTED_PROXIES`` allowlist (set via env var, comma-separated
+      CIDR or single IPs). When set, the gateway is assumed to be behind a
+      reverse proxy (nginx, Cloudflare, ALB, …) that overwrites
+      ``X-Real-IP`` with the original client address.
+    - With no ``AUTH_TRUSTED_PROXIES`` set, ``X-Real-IP`` is silently
+      ignored — closing the bypass where any client could rotate the
+      header to dodge per-IP rate limits in dev / direct-gateway mode.
+
+    ``X-Forwarded-For`` is intentionally NOT used because it is naturally
+    client-controlled at the *first* hop and the trust chain is harder to
+    audit per-request.
+    """
+    peer_host = request.client.host if request.client else None
+
+    trusted = _trusted_proxies()
+    if trusted and peer_host:
+        try:
+            peer_ip = ip_address(peer_host)
+            if any(peer_ip in net for net in trusted):
+                real_ip = request.headers.get("x-real-ip", "").strip()
+                if real_ip:
+                    return real_ip
+        except ValueError:
+            # peer_host wasn't a parseable IP (e.g. "unknown") — fall through
+            pass
+
+    return peer_host or "unknown"
+
+
+def _check_rate_limit(ip: str) -> None:
+    """Raise 429 if the IP is currently locked out."""
+    record = _login_attempts.get(ip)
+    if record is None:
+        return
+    fail_count, lock_until = record
+    if fail_count >= _MAX_LOGIN_ATTEMPTS:
+        if time.time() < lock_until:
+            raise HTTPException(
+                status_code=429,
+                detail="Too many login attempts. Try again later.",
+            )
+        del _login_attempts[ip]
+
+
+_MAX_TRACKED_IPS = 10000
+
+
+def _record_login_failure(ip: str) -> None:
+    """Record a failed login attempt for the given IP."""
+    # Evict expired lockouts when dict grows too large
+    if len(_login_attempts) >= _MAX_TRACKED_IPS:
+        now = time.time()
+        expired = [k for k, (c, t) in _login_attempts.items() if c >= _MAX_LOGIN_ATTEMPTS and now >= t]
+        for k in expired:
+            del _login_attempts[k]
+        # If still too large, evict cheapest-to-lose half: below-threshold
+        # IPs (lock_until=0.0) sort first, then earliest-expiring lockouts.
+        if len(_login_attempts) >= _MAX_TRACKED_IPS:
+            by_time = sorted(_login_attempts.items(), key=lambda kv: kv[1][1])
+            for k, _ in by_time[: len(by_time) // 2]:
+                del _login_attempts[k]
+
+    record = _login_attempts.get(ip)
+    if record is None:
+        _login_attempts[ip] = (1, 0.0)
+    else:
+        new_count = record[0] + 1
+        lock_until = time.time() + _LOCKOUT_SECONDS if new_count >= _MAX_LOGIN_ATTEMPTS else 0.0
+        _login_attempts[ip] = (new_count, lock_until)
+
+
+def _record_login_success(ip: str) -> None:
+    """Clear failure counter for the given IP on successful login."""
+    _login_attempts.pop(ip, None)
+
+
+# ── Endpoints ─────────────────────────────────────────────────────────────
+
+
+@router.post("/login/local", response_model=LoginResponse)
+async def login_local(
+    request: Request,
+    response: Response,
+    form_data: OAuth2PasswordRequestForm = Depends(),
+):
+    """Local email/password login."""
+    client_ip = _get_client_ip(request)
+    _check_rate_limit(client_ip)
+
+    user = await get_local_provider().authenticate({"email": form_data.username, "password": form_data.password})
+
+    if user is None:
+        _record_login_failure(client_ip)
+        raise HTTPException(
+            status_code=status.HTTP_401_UNAUTHORIZED,
+            detail=AuthErrorResponse(code=AuthErrorCode.INVALID_CREDENTIALS, message="Incorrect email or password").model_dump(),
+        )
+
+    _record_login_success(client_ip)
+    token = create_access_token(str(user.id), token_version=user.token_version)
+    _set_session_cookie(response, token, request)
+
+    return LoginResponse(
+        expires_in=get_auth_config().token_expiry_days * 24 * 3600,
+        needs_setup=user.needs_setup,
+    )
+
+
+@router.post("/register", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
+async def register(request: Request, response: Response, body: RegisterRequest):
+    """Register a new user account (always 'user' role).
+
+    Admin is auto-created on first boot. This endpoint creates regular users.
+    Auto-login by setting the session cookie.
+    """
+    try:
+        user = await get_local_provider().create_user(email=body.email, password=body.password, system_role="user")
+    except ValueError:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=AuthErrorResponse(code=AuthErrorCode.EMAIL_ALREADY_EXISTS, message="Email already registered").model_dump(),
+        )
+
+    token = create_access_token(str(user.id), token_version=user.token_version)
+    _set_session_cookie(response, token, request)
+
+    return UserResponse(id=str(user.id), email=user.email, system_role=user.system_role)
+
+
+@router.post("/logout", response_model=MessageResponse)
+async def logout(request: Request, response: Response):
+    """Logout current user by clearing the cookie."""
+    response.delete_cookie(key="access_token", secure=is_secure_request(request), samesite="lax")
+    return MessageResponse(message="Successfully logged out")
+
+
+@router.post("/change-password", response_model=MessageResponse)
+async def change_password(request: Request, response: Response, body: ChangePasswordRequest):
+    """Change password for the currently authenticated user.
+
+    Also handles the first-boot setup flow:
+    - If new_email is provided, updates email (checks uniqueness)
+    - If user.needs_setup is True and new_email is given, clears needs_setup
+    - Always increments token_version to invalidate old sessions
+    - Re-issues session cookie with new token_version
+    """
+    from app.gateway.auth.password import hash_password_async, verify_password_async
+
+    user = await get_current_user_from_request(request)
+
+    if user.password_hash is None:
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=AuthErrorResponse(code=AuthErrorCode.INVALID_CREDENTIALS, message="OAuth users cannot change password").model_dump())
+
+    if not await verify_password_async(body.current_password, user.password_hash):
+        raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=AuthErrorResponse(code=AuthErrorCode.INVALID_CREDENTIALS, message="Current password is incorrect").model_dump())
+
+    provider = get_local_provider()
+
+    # Update email if provided
+    if body.new_email is not None:
+        existing = await provider.get_user_by_email(body.new_email)
+        if existing and str(existing.id) != str(user.id):
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=AuthErrorResponse(code=AuthErrorCode.EMAIL_ALREADY_EXISTS, message="Email already in use").model_dump())
+        user.email = body.new_email
+
+    # Update password + bump version
+    user.password_hash = await hash_password_async(body.new_password)
+    user.token_version += 1
+
+    # Clear setup flag if this is the setup flow
+    if user.needs_setup and body.new_email is not None:
+        user.needs_setup = False
+
+    await provider.update_user(user)
+
+    # Re-issue cookie with new token_version
+    token = create_access_token(str(user.id), token_version=user.token_version)
+    _set_session_cookie(response, token, request)
+
+    return MessageResponse(message="Password changed successfully")
+
+
+@router.get("/me", response_model=UserResponse)
+async def get_me(request: Request):
+    """Get current authenticated user info."""
+    user = await get_current_user_from_request(request)
+    return UserResponse(id=str(user.id), email=user.email, system_role=user.system_role, needs_setup=user.needs_setup)
+
+
+@router.get("/setup-status")
+async def setup_status():
+    """Check if an admin account exists. Returns needs_setup=True when no admin exists."""
+    admin_count = await get_local_provider().count_admin_users()
+    return {"needs_setup": admin_count == 0}
+
+
+class InitializeAdminRequest(BaseModel):
+    """Request model for first-boot admin account creation."""
+
+    email: EmailStr
+    password: str = Field(..., min_length=8)
+
+    _strong_password = field_validator("password")(classmethod(lambda cls, v: _validate_strong_password(v)))
+
+
+@router.post("/initialize", response_model=UserResponse, status_code=status.HTTP_201_CREATED)
+async def initialize_admin(request: Request, response: Response, body: InitializeAdminRequest):
+    """Create the first admin account on initial system setup.
+
+    Only callable when no admin exists. Returns 409 Conflict if an admin
+    already exists.
+
+    On success, the admin account is created with ``needs_setup=False`` and
+    the session cookie is set.
+    """
+    admin_count = await get_local_provider().count_admin_users()
+    if admin_count > 0:
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail=AuthErrorResponse(code=AuthErrorCode.SYSTEM_ALREADY_INITIALIZED, message="System already initialized").model_dump(),
+        )
+
+    try:
+        user = await get_local_provider().create_user(email=body.email, password=body.password, system_role="admin", needs_setup=False)
+    except ValueError:
+        # DB unique-constraint race: another concurrent request beat us.
+        raise HTTPException(
+            status_code=status.HTTP_409_CONFLICT,
+            detail=AuthErrorResponse(code=AuthErrorCode.SYSTEM_ALREADY_INITIALIZED, message="System already initialized").model_dump(),
+        )
+
+    token = create_access_token(str(user.id), token_version=user.token_version)
+    _set_session_cookie(response, token, request)
+
+    return UserResponse(id=str(user.id), email=user.email, system_role=user.system_role)
+
+
+# ── OAuth Endpoints (Future/Placeholder) ─────────────────────────────────
+
+
+@router.get("/oauth/{provider}")
+async def oauth_login(provider: str):
+    """Initiate OAuth login flow.
+
+    Redirects to the OAuth provider's authorization URL.
+    Currently a placeholder - requires OAuth provider implementation.
+    """
+    if provider not in ["github", "google"]:
+        raise HTTPException(
+            status_code=status.HTTP_400_BAD_REQUEST,
+            detail=f"Unsupported OAuth provider: {provider}",
+        )
+
+    raise HTTPException(
+        status_code=status.HTTP_501_NOT_IMPLEMENTED,
+        detail="OAuth login not yet implemented",
+    )
+
+
+@router.get("/callback/{provider}")
+async def oauth_callback(provider: str, code: str, state: str):
+    """OAuth callback endpoint.
+
+    Handles the OAuth provider's callback after user authorization.
+    Currently a placeholder.
+    """
+    raise HTTPException(
+        status_code=status.HTTP_501_NOT_IMPLEMENTED,
+        detail="OAuth callback not yet implemented",
+    )
diff --git a/backend/app/gateway/routers/feedback.py b/backend/app/gateway/routers/feedback.py
new file mode 100644
index 000000000..ca5c1d406
--- /dev/null
+++ b/backend/app/gateway/routers/feedback.py
@@ -0,0 +1,188 @@
+"""Feedback endpoints — create, list, stats, delete.
+
+Allows users to submit thumbs-up/down feedback on runs,
+optionally scoped to a specific message.
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any
+
+from fastapi import APIRouter, HTTPException, Request
+from pydantic import BaseModel, Field
+
+from app.gateway.authz import require_permission
+from app.gateway.deps import get_current_user, get_feedback_repo, get_run_store
+
+logger = logging.getLogger(__name__)
+router = APIRouter(prefix="/api/threads", tags=["feedback"])
+
+
+# ---------------------------------------------------------------------------
+# Request / response models
+# ---------------------------------------------------------------------------
+
+
+class FeedbackCreateRequest(BaseModel):
+    rating: int = Field(..., description="Feedback rating: +1 (positive) or -1 (negative)")
+    comment: str | None = Field(default=None, description="Optional text feedback")
+    message_id: str | None = Field(default=None, description="Optional: scope feedback to a specific message")
+
+
+class FeedbackUpsertRequest(BaseModel):
+    rating: int = Field(..., description="Feedback rating: +1 (positive) or -1 (negative)")
+    comment: str | None = Field(default=None, description="Optional text feedback")
+
+
+class FeedbackResponse(BaseModel):
+    feedback_id: str
+    run_id: str
+    thread_id: str
+    user_id: str | None = None
+    message_id: str | None = None
+    rating: int
+    comment: str | None = None
+    created_at: str = ""
+
+
+class FeedbackStatsResponse(BaseModel):
+    run_id: str
+    total: int = 0
+    positive: int = 0
+    negative: int = 0
+
+
+# ---------------------------------------------------------------------------
+# Endpoints
+# ---------------------------------------------------------------------------
+
+
+@router.put("/{thread_id}/runs/{run_id}/feedback", response_model=FeedbackResponse)
+@require_permission("threads", "write", owner_check=True, require_existing=True)
+async def upsert_feedback(
+    thread_id: str,
+    run_id: str,
+    body: FeedbackUpsertRequest,
+    request: Request,
+) -> dict[str, Any]:
+    """Create or update feedback for a run (idempotent)."""
+    if body.rating not in (1, -1):
+        raise HTTPException(status_code=400, detail="rating must be +1 or -1")
+
+    user_id = await get_current_user(request)
+
+    run_store = get_run_store(request)
+    run = await run_store.get(run_id)
+    if run is None:
+        raise HTTPException(status_code=404, detail=f"Run {run_id} not found")
+    if run.get("thread_id") != thread_id:
+        raise HTTPException(status_code=404, detail=f"Run {run_id} not found in thread {thread_id}")
+
+    feedback_repo = get_feedback_repo(request)
+    return await feedback_repo.upsert(
+        run_id=run_id,
+        thread_id=thread_id,
+        rating=body.rating,
+        user_id=user_id,
+        comment=body.comment,
+    )
+
+
+@router.delete("/{thread_id}/runs/{run_id}/feedback")
+@require_permission("threads", "delete", owner_check=True, require_existing=True)
+async def delete_run_feedback(
+    thread_id: str,
+    run_id: str,
+    request: Request,
+) -> dict[str, bool]:
+    """Delete the current user's feedback for a run."""
+    user_id = await get_current_user(request)
+    feedback_repo = get_feedback_repo(request)
+    deleted = await feedback_repo.delete_by_run(
+        thread_id=thread_id,
+        run_id=run_id,
+        user_id=user_id,
+    )
+    if not deleted:
+        raise HTTPException(status_code=404, detail="No feedback found for this run")
+    return {"success": True}
+
+
+@router.post("/{thread_id}/runs/{run_id}/feedback", response_model=FeedbackResponse)
+@require_permission("threads", "write", owner_check=True, require_existing=True)
+async def create_feedback(
+    thread_id: str,
+    run_id: str,
+    body: FeedbackCreateRequest,
+    request: Request,
+) -> dict[str, Any]:
+    """Submit feedback (thumbs-up/down) for a run."""
+    if body.rating not in (1, -1):
+        raise HTTPException(status_code=400, detail="rating must be +1 or -1")
+
+    user_id = await get_current_user(request)
+
+    # Validate run exists and belongs to thread
+    run_store = get_run_store(request)
+    run = await run_store.get(run_id)
+    if run is None:
+        raise HTTPException(status_code=404, detail=f"Run {run_id} not found")
+    if run.get("thread_id") != thread_id:
+        raise HTTPException(status_code=404, detail=f"Run {run_id} not found in thread {thread_id}")
+
+    feedback_repo = get_feedback_repo(request)
+    return await feedback_repo.create(
+        run_id=run_id,
+        thread_id=thread_id,
+        rating=body.rating,
+        user_id=user_id,
+        message_id=body.message_id,
+        comment=body.comment,
+    )
+
+
+@router.get("/{thread_id}/runs/{run_id}/feedback", response_model=list[FeedbackResponse])
+@require_permission("threads", "read", owner_check=True)
+async def list_feedback(
+    thread_id: str,
+    run_id: str,
+    request: Request,
+) -> list[dict[str, Any]]:
+    """List all feedback for a run."""
+    feedback_repo = get_feedback_repo(request)
+    return await feedback_repo.list_by_run(thread_id, run_id)
+
+
+@router.get("/{thread_id}/runs/{run_id}/feedback/stats", response_model=FeedbackStatsResponse)
+@require_permission("threads", "read", owner_check=True)
+async def feedback_stats(
+    thread_id: str,
+    run_id: str,
+    request: Request,
+) -> dict[str, Any]:
+    """Get aggregated feedback stats (positive/negative counts) for a run."""
+    feedback_repo = get_feedback_repo(request)
+    return await feedback_repo.aggregate_by_run(thread_id, run_id)
+
+
+@router.delete("/{thread_id}/runs/{run_id}/feedback/{feedback_id}")
+@require_permission("threads", "delete", owner_check=True, require_existing=True)
+async def delete_feedback(
+    thread_id: str,
+    run_id: str,
+    feedback_id: str,
+    request: Request,
+) -> dict[str, bool]:
+    """Delete a feedback record."""
+    feedback_repo = get_feedback_repo(request)
+    # Verify feedback belongs to the specified thread/run before deleting
+    existing = await feedback_repo.get(feedback_id)
+    if existing is None:
+        raise HTTPException(status_code=404, detail=f"Feedback {feedback_id} not found")
+    if existing.get("thread_id") != thread_id or existing.get("run_id") != run_id:
+        raise HTTPException(status_code=404, detail=f"Feedback {feedback_id} not found in run {run_id}")
+    deleted = await feedback_repo.delete(feedback_id)
+    if not deleted:
+        raise HTTPException(status_code=404, detail=f"Feedback {feedback_id} not found")
+    return {"success": True}
diff --git a/backend/app/gateway/routers/mcp.py b/backend/app/gateway/routers/mcp.py
index 386fc13c6..3d39879e4 100644
--- a/backend/app/gateway/routers/mcp.py
+++ b/backend/app/gateway/routers/mcp.py
@@ -3,10 +3,12 @@ import logging
 from pathlib import Path
 from typing import Literal
 
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, Request
 from pydantic import BaseModel, Field
 
-from deerflow.config.extensions_config import ExtensionsConfig, get_extensions_config, reload_extensions_config
+from app.gateway.deps import get_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.extensions_config import ExtensionsConfig
 
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api", tags=["mcp"])
@@ -69,7 +71,7 @@ class McpConfigUpdateRequest(BaseModel):
     summary="Get MCP Configuration",
     description="Retrieve the current Model Context Protocol (MCP) server configurations.",
 )
-async def get_mcp_configuration() -> McpConfigResponse:
+async def get_mcp_configuration(config: AppConfig = Depends(get_config)) -> McpConfigResponse:
     """Get the current MCP configuration.
 
     Returns:
@@ -90,9 +92,9 @@ async def get_mcp_configuration() -> McpConfigResponse:
         }
         ```
     """
-    config = get_extensions_config()
+    ext = config.extensions
 
-    return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in config.mcp_servers.items()})
+    return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in ext.mcp_servers.items()})
 
 
 @router.put(
@@ -101,7 +103,11 @@ async def get_mcp_configuration() -> McpConfigResponse:
     summary="Update MCP Configuration",
     description="Update Model Context Protocol (MCP) server configurations and save to file.",
 )
-async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfigResponse:
+async def update_mcp_configuration(
+    request: McpConfigUpdateRequest,
+    http_request: Request,
+    config: AppConfig = Depends(get_config),
+) -> McpConfigResponse:
     """Update the MCP configuration.
 
     This will:
@@ -142,13 +148,13 @@ async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfig
             config_path = Path.cwd().parent / "extensions_config.json"
             logger.info(f"No existing extensions config found. Creating new config at: {config_path}")
 
-        # Load current config to preserve skills configuration
-        current_config = get_extensions_config()
+        # Use injected config to preserve skills configuration
+        current_ext = config.extensions
 
         # Convert request to dict format for JSON serialization
         config_data = {
             "mcpServers": {name: server.model_dump() for name, server in request.mcp_servers.items()},
-            "skills": {name: {"enabled": skill.enabled} for name, skill in current_config.skills.items()},
+            "skills": {name: {"enabled": skill.enabled} for name, skill in current_ext.skills.items()},
         }
 
         # Write the configuration to file
@@ -160,9 +166,11 @@ async def update_mcp_configuration(request: McpConfigUpdateRequest) -> McpConfig
         # NOTE: No need to reload/reset cache here - LangGraph Server (separate process)
         # will detect config file changes via mtime and reinitialize MCP tools automatically
 
-        # Reload the configuration and update the global cache
-        reloaded_config = reload_extensions_config()
-        return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded_config.mcp_servers.items()})
+        # Reload the configuration and swap ``app.state.config`` so subsequent
+        # ``Depends(get_config)`` calls see the refreshed value.
+        reloaded = AppConfig.from_file()
+        http_request.app.state.config = reloaded
+        return McpConfigResponse(mcp_servers={name: McpServerConfigResponse(**server.model_dump()) for name, server in reloaded.extensions.mcp_servers.items()})
 
     except Exception as e:
         logger.error(f"Failed to update MCP configuration: {e}", exc_info=True)
diff --git a/backend/app/gateway/routers/memory.py b/backend/app/gateway/routers/memory.py
index 6ee546924..9284bb736 100644
--- a/backend/app/gateway/routers/memory.py
+++ b/backend/app/gateway/routers/memory.py
@@ -1,8 +1,9 @@
 """Memory API router for retrieving and managing global memory data."""
 
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel, Field
 
+from app.gateway.deps import get_config
 from deerflow.agents.memory.updater import (
     clear_memory_data,
     create_memory_fact,
@@ -12,7 +13,8 @@ from deerflow.agents.memory.updater import (
     reload_memory_data,
     update_memory_fact,
 )
-from deerflow.config.memory_config import get_memory_config
+from deerflow.config.app_config import AppConfig
+from deerflow.runtime.user_context import get_effective_user_id
 
 router = APIRouter(prefix="/api", tags=["memory"])
 
@@ -113,7 +115,7 @@ class MemoryStatusResponse(BaseModel):
     summary="Get Memory Data",
     description="Retrieve the current global memory data including user context, history, and facts.",
 )
-async def get_memory() -> MemoryResponse:
+async def get_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Get the current global memory data.
 
     Returns:
@@ -147,7 +149,7 @@ async def get_memory() -> MemoryResponse:
         }
         ```
     """
-    memory_data = get_memory_data()
+    memory_data = get_memory_data(app_config.memory, user_id=get_effective_user_id())
     return MemoryResponse(**memory_data)
 
 
@@ -158,7 +160,7 @@ async def get_memory() -> MemoryResponse:
     summary="Reload Memory Data",
     description="Reload memory data from the storage file, refreshing the in-memory cache.",
 )
-async def reload_memory() -> MemoryResponse:
+async def reload_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Reload memory data from file.
 
     This forces a reload of the memory data from the storage file,
@@ -167,7 +169,7 @@ async def reload_memory() -> MemoryResponse:
     Returns:
         The reloaded memory data.
     """
-    memory_data = reload_memory_data()
+    memory_data = reload_memory_data(app_config.memory, user_id=get_effective_user_id())
     return MemoryResponse(**memory_data)
 
 
@@ -178,10 +180,10 @@ async def reload_memory() -> MemoryResponse:
     summary="Clear All Memory Data",
     description="Delete all saved memory data and reset the memory structure to an empty state.",
 )
-async def clear_memory() -> MemoryResponse:
+async def clear_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Clear all persisted memory data."""
     try:
-        memory_data = clear_memory_data()
+        memory_data = clear_memory_data(app_config.memory, user_id=get_effective_user_id())
     except OSError as exc:
         raise HTTPException(status_code=500, detail="Failed to clear memory data.") from exc
 
@@ -195,13 +197,15 @@ async def clear_memory() -> MemoryResponse:
     summary="Create Memory Fact",
     description="Create a single saved memory fact manually.",
 )
-async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryResponse:
+async def create_memory_fact_endpoint(request: FactCreateRequest, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Create a single fact manually."""
     try:
         memory_data = create_memory_fact(
+            app_config.memory,
             content=request.content,
             category=request.category,
             confidence=request.confidence,
+            user_id=get_effective_user_id(),
         )
     except ValueError as exc:
         raise _map_memory_fact_value_error(exc) from exc
@@ -218,10 +222,10 @@ async def create_memory_fact_endpoint(request: FactCreateRequest) -> MemoryRespo
     summary="Delete Memory Fact",
     description="Delete a single saved memory fact by its fact id.",
 )
-async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
+async def delete_memory_fact_endpoint(fact_id: str, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Delete a single fact from memory by fact id."""
     try:
-        memory_data = delete_memory_fact(fact_id)
+        memory_data = delete_memory_fact(app_config.memory, fact_id, user_id=get_effective_user_id())
     except KeyError as exc:
         raise HTTPException(status_code=404, detail=f"Memory fact '{fact_id}' not found.") from exc
     except OSError as exc:
@@ -237,14 +241,16 @@ async def delete_memory_fact_endpoint(fact_id: str) -> MemoryResponse:
     summary="Patch Memory Fact",
     description="Partially update a single saved memory fact by its fact id while preserving omitted fields.",
 )
-async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -> MemoryResponse:
+async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Partially update a single fact manually."""
     try:
         memory_data = update_memory_fact(
+            app_config.memory,
             fact_id=fact_id,
             content=request.content,
             category=request.category,
             confidence=request.confidence,
+            user_id=get_effective_user_id(),
         )
     except ValueError as exc:
         raise _map_memory_fact_value_error(exc) from exc
@@ -263,9 +269,9 @@ async def update_memory_fact_endpoint(fact_id: str, request: FactPatchRequest) -
     summary="Export Memory Data",
     description="Export the current global memory data as JSON for backup or transfer.",
 )
-async def export_memory() -> MemoryResponse:
+async def export_memory(app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Export the current memory data."""
-    memory_data = get_memory_data()
+    memory_data = get_memory_data(app_config.memory, user_id=get_effective_user_id())
     return MemoryResponse(**memory_data)
 
 
@@ -276,10 +282,10 @@ async def export_memory() -> MemoryResponse:
     summary="Import Memory Data",
     description="Import and overwrite the current global memory data from a JSON payload.",
 )
-async def import_memory(request: MemoryResponse) -> MemoryResponse:
+async def import_memory(request: MemoryResponse, app_config: AppConfig = Depends(get_config)) -> MemoryResponse:
     """Import and persist memory data."""
     try:
-        memory_data = import_memory_data(request.model_dump())
+        memory_data = import_memory_data(app_config.memory, request.model_dump(), user_id=get_effective_user_id())
     except OSError as exc:
         raise HTTPException(status_code=500, detail="Failed to import memory data.") from exc
 
@@ -292,7 +298,9 @@ async def import_memory(request: MemoryResponse) -> MemoryResponse:
     summary="Get Memory Configuration",
     description="Retrieve the current memory system configuration.",
 )
-async def get_memory_config_endpoint() -> MemoryConfigResponse:
+async def get_memory_config_endpoint(
+    app_config: AppConfig = Depends(get_config),
+) -> MemoryConfigResponse:
     """Get the memory system configuration.
 
     Returns:
@@ -311,7 +319,7 @@ async def get_memory_config_endpoint() -> MemoryConfigResponse:
         }
         ```
     """
-    config = get_memory_config()
+    config = app_config.memory
     return MemoryConfigResponse(
         enabled=config.enabled,
         storage_path=config.storage_path,
@@ -330,14 +338,16 @@ async def get_memory_config_endpoint() -> MemoryConfigResponse:
     summary="Get Memory Status",
     description="Retrieve both memory configuration and current data in a single request.",
 )
-async def get_memory_status() -> MemoryStatusResponse:
+async def get_memory_status(
+    app_config: AppConfig = Depends(get_config),
+) -> MemoryStatusResponse:
     """Get the memory system status including configuration and data.
 
     Returns:
         Combined memory configuration and current data.
     """
-    config = get_memory_config()
-    memory_data = get_memory_data()
+    config = app_config.memory
+    memory_data = get_memory_data(config, user_id=get_effective_user_id())
 
     return MemoryStatusResponse(
         config=MemoryConfigResponse(
diff --git a/backend/app/gateway/routers/models.py b/backend/app/gateway/routers/models.py
index 11a87a872..a36ece927 100644
--- a/backend/app/gateway/routers/models.py
+++ b/backend/app/gateway/routers/models.py
@@ -1,7 +1,8 @@
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, Depends, HTTPException
 from pydantic import BaseModel, Field
 
-from deerflow.config import get_app_config
+from app.gateway.deps import get_config
+from deerflow.config.app_config import AppConfig
 
 router = APIRouter(prefix="/api", tags=["models"])
 
@@ -36,7 +37,7 @@ class ModelsListResponse(BaseModel):
     summary="List All Models",
     description="Retrieve a list of all available AI models configured in the system.",
 )
-async def list_models() -> ModelsListResponse:
+async def list_models(config: AppConfig = Depends(get_config)) -> ModelsListResponse:
     """List all available models from configuration.
 
     Returns model information suitable for frontend display,
@@ -72,7 +73,6 @@ async def list_models() -> ModelsListResponse:
         }
         ```
     """
-    config = get_app_config()
     models = [
         ModelResponse(
             name=model.name,
@@ -96,7 +96,7 @@ async def list_models() -> ModelsListResponse:
     summary="Get Model Details",
     description="Retrieve detailed information about a specific AI model by its name.",
 )
-async def get_model(model_name: str) -> ModelResponse:
+async def get_model(model_name: str, config: AppConfig = Depends(get_config)) -> ModelResponse:
     """Get a specific model by name.
 
     Args:
@@ -118,7 +118,6 @@ async def get_model(model_name: str) -> ModelResponse:
         }
         ```
     """
-    config = get_app_config()
     model = config.get_model_config(model_name)
     if model is None:
         raise HTTPException(status_code=404, detail=f"Model '{model_name}' not found")
diff --git a/backend/app/gateway/routers/runs.py b/backend/app/gateway/routers/runs.py
index 7d17488fc..70e2abb63 100644
--- a/backend/app/gateway/routers/runs.py
+++ b/backend/app/gateway/routers/runs.py
@@ -11,10 +11,11 @@ import asyncio
 import logging
 import uuid
 
-from fastapi import APIRouter, Request
+from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import StreamingResponse
 
-from app.gateway.deps import get_checkpointer, get_run_manager, get_stream_bridge
+from app.gateway.authz import require_permission
+from app.gateway.deps import get_checkpointer, get_feedback_repo, get_run_event_store, get_run_manager, get_run_store, get_stream_bridge
 from app.gateway.routers.thread_runs import RunCreateRequest
 from app.gateway.services import sse_consumer, start_run
 from deerflow.runtime import serialize_channel_values
@@ -85,3 +86,57 @@ async def stateless_wait(body: RunCreateRequest, request: Request) -> dict:
         logger.exception("Failed to fetch final state for run %s", record.run_id)
 
     return {"status": record.status.value, "error": record.error}
+
+
+# ---------------------------------------------------------------------------
+# Run-scoped read endpoints
+# ---------------------------------------------------------------------------
+
+
+async def _resolve_run(run_id: str, request: Request) -> dict:
+    """Fetch run by run_id with user ownership check. Raises 404 if not found."""
+    run_store = get_run_store(request)
+    record = await run_store.get(run_id)  # user_id=AUTO filters by contextvar
+    if record is None:
+        raise HTTPException(status_code=404, detail=f"Run {run_id} not found")
+    return record
+
+
+@router.get("/{run_id}/messages")
+@require_permission("runs", "read")
+async def run_messages(
+    run_id: str,
+    request: Request,
+    limit: int = Query(default=50, le=200, ge=1),
+    before_seq: int | None = Query(default=None),
+    after_seq: int | None = Query(default=None),
+) -> dict:
+    """Return paginated messages for a run (cursor-based).
+
+    Pagination:
+    - after_seq: messages with seq > after_seq (forward)
+    - before_seq: messages with seq < before_seq (backward)
+    - neither: latest messages
+
+    Response: { data: [...], has_more: bool }
+    """
+    run = await _resolve_run(run_id, request)
+    event_store = get_run_event_store(request)
+    rows = await event_store.list_messages_by_run(
+        run["thread_id"], run_id,
+        limit=limit + 1,
+        before_seq=before_seq,
+        after_seq=after_seq,
+    )
+    has_more = len(rows) > limit
+    data = rows[:limit] if has_more else rows
+    return {"data": data, "has_more": has_more}
+
+
+@router.get("/{run_id}/feedback")
+@require_permission("runs", "read")
+async def run_feedback(run_id: str, request: Request) -> list[dict]:
+    """Return all feedback for a run."""
+    run = await _resolve_run(run_id, request)
+    feedback_repo = get_feedback_repo(request)
+    return await feedback_repo.list_by_run(run["thread_id"], run_id)
diff --git a/backend/app/gateway/routers/skills.py b/backend/app/gateway/routers/skills.py
index 5fac32d41..f4fb1b445 100644
--- a/backend/app/gateway/routers/skills.py
+++ b/backend/app/gateway/routers/skills.py
@@ -4,12 +4,14 @@ import logging
 import shutil
 from pathlib import Path
 
-from fastapi import APIRouter, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, Request
 from pydantic import BaseModel, Field
 
+from app.gateway.deps import get_config
 from app.gateway.path_utils import resolve_thread_virtual_path
 from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async
-from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.extensions_config import ExtensionsConfig
 from deerflow.skills import Skill, load_skills
 from deerflow.skills.installer import SkillAlreadyExistsError, install_skill_from_archive
 from deerflow.skills.manager import (
@@ -101,9 +103,9 @@ def _skill_to_response(skill: Skill) -> SkillResponse:
     summary="List All Skills",
     description="Retrieve a list of all available skills from both public and custom directories.",
 )
-async def list_skills() -> SkillsListResponse:
+async def list_skills(app_config: AppConfig = Depends(get_config)) -> SkillsListResponse:
     try:
-        skills = load_skills(enabled_only=False)
+        skills = load_skills(app_config, enabled_only=False)
         return SkillsListResponse(skills=[_skill_to_response(skill) for skill in skills])
     except Exception as e:
         logger.error(f"Failed to load skills: {e}", exc_info=True)
@@ -116,11 +118,11 @@ async def list_skills() -> SkillsListResponse:
     summary="Install Skill",
     description="Install a skill from a .skill file (ZIP archive) located in the thread's user-data directory.",
 )
-async def install_skill(request: SkillInstallRequest) -> SkillInstallResponse:
+async def install_skill(request: SkillInstallRequest, app_config: AppConfig = Depends(get_config)) -> SkillInstallResponse:
     try:
         skill_file_path = resolve_thread_virtual_path(request.thread_id, request.path)
         result = install_skill_from_archive(skill_file_path)
-        await refresh_skills_system_prompt_cache_async()
+        await refresh_skills_system_prompt_cache_async(app_config)
         return SkillInstallResponse(**result)
     except FileNotFoundError as e:
         raise HTTPException(status_code=404, detail=str(e))
@@ -136,9 +138,9 @@ async def install_skill(request: SkillInstallRequest) -> SkillInstallResponse:
 
 
 @router.get("/skills/custom", response_model=SkillsListResponse, summary="List Custom Skills")
-async def list_custom_skills() -> SkillsListResponse:
+async def list_custom_skills(app_config: AppConfig = Depends(get_config)) -> SkillsListResponse:
     try:
-        skills = [skill for skill in load_skills(enabled_only=False) if skill.category == "custom"]
+        skills = [skill for skill in load_skills(app_config, enabled_only=False) if skill.category == "custom"]
         return SkillsListResponse(skills=[_skill_to_response(skill) for skill in skills])
     except Exception as e:
         logger.error("Failed to list custom skills: %s", e, exc_info=True)
@@ -146,13 +148,13 @@ async def list_custom_skills() -> SkillsListResponse:
 
 
 @router.get("/skills/custom/{skill_name}", response_model=CustomSkillContentResponse, summary="Get Custom Skill Content")
-async def get_custom_skill(skill_name: str) -> CustomSkillContentResponse:
+async def get_custom_skill(skill_name: str, app_config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse:
     try:
-        skills = load_skills(enabled_only=False)
+        skills = load_skills(app_config, enabled_only=False)
         skill = next((s for s in skills if s.name == skill_name and s.category == "custom"), None)
         if skill is None:
             raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found")
-        return CustomSkillContentResponse(**_skill_to_response(skill).model_dump(), content=read_custom_skill_content(skill_name))
+        return CustomSkillContentResponse(**_skill_to_response(skill).model_dump(), content=read_custom_skill_content(skill_name, app_config))
     except HTTPException:
         raise
     except Exception as e:
@@ -161,14 +163,18 @@ async def get_custom_skill(skill_name: str) -> CustomSkillContentResponse:
 
 
 @router.put("/skills/custom/{skill_name}", response_model=CustomSkillContentResponse, summary="Edit Custom Skill")
-async def update_custom_skill(skill_name: str, request: CustomSkillUpdateRequest) -> CustomSkillContentResponse:
+async def update_custom_skill(
+    skill_name: str,
+    request: CustomSkillUpdateRequest,
+    app_config: AppConfig = Depends(get_config),
+) -> CustomSkillContentResponse:
     try:
-        ensure_custom_skill_is_editable(skill_name)
+        ensure_custom_skill_is_editable(skill_name, app_config)
         validate_skill_markdown_content(skill_name, request.content)
-        scan = await scan_skill_content(request.content, executable=False, location=f"{skill_name}/SKILL.md")
+        scan = await scan_skill_content(app_config, request.content, executable=False, location=f"{skill_name}/SKILL.md")
         if scan.decision == "block":
             raise HTTPException(status_code=400, detail=f"Security scan blocked the edit: {scan.reason}")
-        skill_file = get_custom_skill_dir(skill_name) / "SKILL.md"
+        skill_file = get_custom_skill_dir(skill_name, app_config) / "SKILL.md"
         prev_content = skill_file.read_text(encoding="utf-8")
         atomic_write(skill_file, request.content)
         append_history(
@@ -182,9 +188,10 @@ async def update_custom_skill(skill_name: str, request: CustomSkillUpdateRequest
                 "new_content": request.content,
                 "scanner": {"decision": scan.decision, "reason": scan.reason},
             },
+            app_config,
         )
-        await refresh_skills_system_prompt_cache_async()
-        return await get_custom_skill(skill_name)
+        await refresh_skills_system_prompt_cache_async(app_config)
+        return await get_custom_skill(skill_name, app_config)
     except HTTPException:
         raise
     except FileNotFoundError as e:
@@ -197,11 +204,11 @@ async def update_custom_skill(skill_name: str, request: CustomSkillUpdateRequest
 
 
 @router.delete("/skills/custom/{skill_name}", summary="Delete Custom Skill")
-async def delete_custom_skill(skill_name: str) -> dict[str, bool]:
+async def delete_custom_skill(skill_name: str, app_config: AppConfig = Depends(get_config)) -> dict[str, bool]:
     try:
-        ensure_custom_skill_is_editable(skill_name)
-        skill_dir = get_custom_skill_dir(skill_name)
-        prev_content = read_custom_skill_content(skill_name)
+        ensure_custom_skill_is_editable(skill_name, app_config)
+        skill_dir = get_custom_skill_dir(skill_name, app_config)
+        prev_content = read_custom_skill_content(skill_name, app_config)
         try:
             append_history(
                 skill_name,
@@ -214,13 +221,14 @@ async def delete_custom_skill(skill_name: str) -> dict[str, bool]:
                     "new_content": None,
                     "scanner": {"decision": "allow", "reason": "Deletion requested."},
                 },
+                app_config,
             )
         except OSError as e:
             if not isinstance(e, PermissionError) and e.errno not in {errno.EACCES, errno.EPERM, errno.EROFS}:
                 raise
             logger.warning("Skipping delete history write for custom skill %s due to readonly/permission failure; continuing with skill directory removal: %s", skill_name, e)
         shutil.rmtree(skill_dir)
-        await refresh_skills_system_prompt_cache_async()
+        await refresh_skills_system_prompt_cache_async(app_config)
         return {"success": True}
     except FileNotFoundError as e:
         raise HTTPException(status_code=404, detail=str(e))
@@ -232,11 +240,11 @@ async def delete_custom_skill(skill_name: str) -> dict[str, bool]:
 
 
 @router.get("/skills/custom/{skill_name}/history", response_model=CustomSkillHistoryResponse, summary="Get Custom Skill History")
-async def get_custom_skill_history(skill_name: str) -> CustomSkillHistoryResponse:
+async def get_custom_skill_history(skill_name: str, app_config: AppConfig = Depends(get_config)) -> CustomSkillHistoryResponse:
     try:
-        if not custom_skill_exists(skill_name) and not get_skill_history_file(skill_name).exists():
+        if not custom_skill_exists(skill_name, app_config) and not get_skill_history_file(skill_name, app_config).exists():
             raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found")
-        return CustomSkillHistoryResponse(history=read_history(skill_name))
+        return CustomSkillHistoryResponse(history=read_history(skill_name, app_config))
     except HTTPException:
         raise
     except Exception as e:
@@ -245,11 +253,15 @@ async def get_custom_skill_history(skill_name: str) -> CustomSkillHistoryRespons
 
 
 @router.post("/skills/custom/{skill_name}/rollback", response_model=CustomSkillContentResponse, summary="Rollback Custom Skill")
-async def rollback_custom_skill(skill_name: str, request: SkillRollbackRequest) -> CustomSkillContentResponse:
+async def rollback_custom_skill(
+    skill_name: str,
+    request: SkillRollbackRequest,
+    app_config: AppConfig = Depends(get_config),
+) -> CustomSkillContentResponse:
     try:
-        if not custom_skill_exists(skill_name) and not get_skill_history_file(skill_name).exists():
+        if not custom_skill_exists(skill_name, app_config) and not get_skill_history_file(skill_name, app_config).exists():
             raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found")
-        history = read_history(skill_name)
+        history = read_history(skill_name, app_config)
         if not history:
             raise HTTPException(status_code=400, detail=f"Custom skill '{skill_name}' has no history")
         record = history[request.history_index]
@@ -257,8 +269,8 @@ async def rollback_custom_skill(skill_name: str, request: SkillRollbackRequest)
         if target_content is None:
             raise HTTPException(status_code=400, detail="Selected history entry has no previous content to roll back to")
         validate_skill_markdown_content(skill_name, target_content)
-        scan = await scan_skill_content(target_content, executable=False, location=f"{skill_name}/SKILL.md")
-        skill_file = get_custom_skill_file(skill_name)
+        scan = await scan_skill_content(app_config, target_content, executable=False, location=f"{skill_name}/SKILL.md")
+        skill_file = get_custom_skill_file(skill_name, app_config)
         current_content = skill_file.read_text(encoding="utf-8") if skill_file.exists() else None
         history_entry = {
             "action": "rollback",
@@ -271,12 +283,12 @@ async def rollback_custom_skill(skill_name: str, request: SkillRollbackRequest)
             "scanner": {"decision": scan.decision, "reason": scan.reason},
         }
         if scan.decision == "block":
-            append_history(skill_name, history_entry)
+            append_history(skill_name, history_entry, app_config)
             raise HTTPException(status_code=400, detail=f"Rollback blocked by security scanner: {scan.reason}")
         atomic_write(skill_file, target_content)
-        append_history(skill_name, history_entry)
-        await refresh_skills_system_prompt_cache_async()
-        return await get_custom_skill(skill_name)
+        append_history(skill_name, history_entry, app_config)
+        await refresh_skills_system_prompt_cache_async(app_config)
+        return await get_custom_skill(skill_name, app_config)
     except HTTPException:
         raise
     except IndexError:
@@ -296,9 +308,9 @@ async def rollback_custom_skill(skill_name: str, request: SkillRollbackRequest)
     summary="Get Skill Details",
     description="Retrieve detailed information about a specific skill by its name.",
 )
-async def get_skill(skill_name: str) -> SkillResponse:
+async def get_skill(skill_name: str, app_config: AppConfig = Depends(get_config)) -> SkillResponse:
     try:
-        skills = load_skills(enabled_only=False)
+        skills = load_skills(app_config, enabled_only=False)
         skill = next((s for s in skills if s.name == skill_name), None)
 
         if skill is None:
@@ -318,9 +330,14 @@ async def get_skill(skill_name: str) -> SkillResponse:
     summary="Update Skill",
     description="Update a skill's enabled status by modifying the extensions_config.json file.",
 )
-async def update_skill(skill_name: str, request: SkillUpdateRequest) -> SkillResponse:
+async def update_skill(
+    skill_name: str,
+    request: SkillUpdateRequest,
+    http_request: Request,
+    app_config: AppConfig = Depends(get_config),
+) -> SkillResponse:
     try:
-        skills = load_skills(enabled_only=False)
+        skills = load_skills(app_config, enabled_only=False)
         skill = next((s for s in skills if s.name == skill_name), None)
 
         if skill is None:
@@ -331,22 +348,29 @@ async def update_skill(skill_name: str, request: SkillUpdateRequest) -> SkillRes
             config_path = Path.cwd().parent / "extensions_config.json"
             logger.info(f"No existing extensions config found. Creating new config at: {config_path}")
 
-        extensions_config = get_extensions_config()
-        extensions_config.skills[skill_name] = SkillStateConfig(enabled=request.enabled)
+        # Do not mutate the frozen AppConfig in place. Compose the new skills
+        # state in a fresh dict, write to disk, and reload AppConfig below so
+        # every subsequent Depends(get_config) sees the refreshed snapshot.
+        ext = app_config.extensions
+        updated_skills = {name: {"enabled": skill_config.enabled} for name, skill_config in ext.skills.items()}
+        updated_skills[skill_name] = {"enabled": request.enabled}
 
         config_data = {
-            "mcpServers": {name: server.model_dump() for name, server in extensions_config.mcp_servers.items()},
-            "skills": {name: {"enabled": skill_config.enabled} for name, skill_config in extensions_config.skills.items()},
+            "mcpServers": {name: server.model_dump() for name, server in ext.mcp_servers.items()},
+            "skills": updated_skills,
         }
 
         with open(config_path, "w", encoding="utf-8") as f:
             json.dump(config_data, f, indent=2)
 
         logger.info(f"Skills configuration updated and saved to: {config_path}")
-        reload_extensions_config()
-        await refresh_skills_system_prompt_cache_async()
+        # Reload AppConfig and swap ``app.state.config`` so subsequent
+        # ``Depends(get_config)`` sees the refreshed value.
+        reloaded = AppConfig.from_file()
+        http_request.app.state.config = reloaded
+        await refresh_skills_system_prompt_cache_async(reloaded)
 
-        skills = load_skills(enabled_only=False)
+        skills = load_skills(reloaded, enabled_only=False)
         updated_skill = next((s for s in skills if s.name == skill_name), None)
 
         if updated_skill is None:
diff --git a/backend/app/gateway/routers/suggestions.py b/backend/app/gateway/routers/suggestions.py
index bfda01491..1c0a75371 100644
--- a/backend/app/gateway/routers/suggestions.py
+++ b/backend/app/gateway/routers/suggestions.py
@@ -1,10 +1,13 @@
 import json
 import logging
 
-from fastapi import APIRouter
+from fastapi import APIRouter, Depends, Request
 from langchain_core.messages import HumanMessage, SystemMessage
 from pydantic import BaseModel, Field
 
+from app.gateway.authz import require_permission
+from app.gateway.deps import get_config
+from deerflow.config.app_config import AppConfig
 from deerflow.models import create_chat_model
 
 logger = logging.getLogger(__name__)
@@ -98,12 +101,13 @@ def _format_conversation(messages: list[SuggestionMessage]) -> str:
     summary="Generate Follow-up Questions",
     description="Generate short follow-up questions a user might ask next, based on recent conversation context.",
 )
-async def generate_suggestions(thread_id: str, request: SuggestionsRequest) -> SuggestionsResponse:
-    if not request.messages:
+@require_permission("threads", "read", owner_check=True)
+async def generate_suggestions(thread_id: str, body: SuggestionsRequest, request: Request, app_config: AppConfig = Depends(get_config)) -> SuggestionsResponse:
+    if not body.messages:
         return SuggestionsResponse(suggestions=[])
 
-    n = request.n
-    conversation = _format_conversation(request.messages)
+    n = body.n
+    conversation = _format_conversation(body.messages)
     if not conversation:
         return SuggestionsResponse(suggestions=[])
 
@@ -120,7 +124,7 @@ async def generate_suggestions(thread_id: str, request: SuggestionsRequest) -> S
     user_content = f"Conversation Context:\n{conversation}\n\nGenerate {n} follow-up questions"
 
     try:
-        model = create_chat_model(name=request.model_name, thinking_enabled=False)
+        model = create_chat_model(name=body.model_name, thinking_enabled=False, app_config=app_config)
         response = await model.ainvoke([SystemMessage(content=system_instruction), HumanMessage(content=user_content)], config={"run_name": "suggest_agent"})
         raw = _extract_response_text(response.content)
         suggestions = _parse_json_string_list(raw) or []
diff --git a/backend/app/gateway/routers/thread_runs.py b/backend/app/gateway/routers/thread_runs.py
index 105fc9ca6..e21375ab9 100644
--- a/backend/app/gateway/routers/thread_runs.py
+++ b/backend/app/gateway/routers/thread_runs.py
@@ -19,7 +19,8 @@ from fastapi import APIRouter, HTTPException, Query, Request
 from fastapi.responses import Response, StreamingResponse
 from pydantic import BaseModel, Field
 
-from app.gateway.deps import get_checkpointer, get_run_manager, get_stream_bridge
+from app.gateway.authz import require_permission
+from app.gateway.deps import get_checkpointer, get_current_user, get_feedback_repo, get_run_event_store, get_run_manager, get_run_store, get_stream_bridge
 from app.gateway.services import sse_consumer, start_run
 from deerflow.runtime import RunRecord, serialize_channel_values
 
@@ -53,6 +54,7 @@ class RunCreateRequest(BaseModel):
     after_seconds: float | None = Field(default=None, description="Delayed execution")
     if_not_exists: Literal["reject", "create"] = Field(default="create", description="Thread creation policy")
     feedback_keys: list[str] | None = Field(default=None, description="LangSmith feedback keys")
+    follow_up_to_run_id: str | None = Field(default=None, description="Run ID this message follows up on. Auto-detected from latest successful run if not provided.")
 
 
 class RunResponse(BaseModel):
@@ -92,6 +94,7 @@ def _record_to_response(record: RunRecord) -> RunResponse:
 
 
 @router.post("/{thread_id}/runs", response_model=RunResponse)
+@require_permission("runs", "create", owner_check=True, require_existing=True)
 async def create_run(thread_id: str, body: RunCreateRequest, request: Request) -> RunResponse:
     """Create a background run (returns immediately)."""
     record = await start_run(body, thread_id, request)
@@ -99,6 +102,7 @@ async def create_run(thread_id: str, body: RunCreateRequest, request: Request) -
 
 
 @router.post("/{thread_id}/runs/stream")
+@require_permission("runs", "create", owner_check=True, require_existing=True)
 async def stream_run(thread_id: str, body: RunCreateRequest, request: Request) -> StreamingResponse:
     """Create a run and stream events via SSE.
 
@@ -126,6 +130,7 @@ async def stream_run(thread_id: str, body: RunCreateRequest, request: Request) -
 
 
 @router.post("/{thread_id}/runs/wait", response_model=dict)
+@require_permission("runs", "create", owner_check=True, require_existing=True)
 async def wait_run(thread_id: str, body: RunCreateRequest, request: Request) -> dict:
     """Create a run and block until it completes, returning the final state."""
     record = await start_run(body, thread_id, request)
@@ -151,6 +156,7 @@ async def wait_run(thread_id: str, body: RunCreateRequest, request: Request) ->
 
 
 @router.get("/{thread_id}/runs", response_model=list[RunResponse])
+@require_permission("runs", "read", owner_check=True)
 async def list_runs(thread_id: str, request: Request) -> list[RunResponse]:
     """List all runs for a thread."""
     run_mgr = get_run_manager(request)
@@ -159,6 +165,7 @@ async def list_runs(thread_id: str, request: Request) -> list[RunResponse]:
 
 
 @router.get("/{thread_id}/runs/{run_id}", response_model=RunResponse)
+@require_permission("runs", "read", owner_check=True)
 async def get_run(thread_id: str, run_id: str, request: Request) -> RunResponse:
     """Get details of a specific run."""
     run_mgr = get_run_manager(request)
@@ -169,6 +176,7 @@ async def get_run(thread_id: str, run_id: str, request: Request) -> RunResponse:
 
 
 @router.post("/{thread_id}/runs/{run_id}/cancel")
+@require_permission("runs", "cancel", owner_check=True, require_existing=True)
 async def cancel_run(
     thread_id: str,
     run_id: str,
@@ -206,6 +214,7 @@ async def cancel_run(
 
 
 @router.get("/{thread_id}/runs/{run_id}/join")
+@require_permission("runs", "read", owner_check=True)
 async def join_run(thread_id: str, run_id: str, request: Request) -> StreamingResponse:
     """Join an existing run's SSE stream."""
     bridge = get_stream_bridge(request)
@@ -226,6 +235,7 @@ async def join_run(thread_id: str, run_id: str, request: Request) -> StreamingRe
 
 
 @router.api_route("/{thread_id}/runs/{run_id}/stream", methods=["GET", "POST"], response_model=None)
+@require_permission("runs", "read", owner_check=True)
 async def stream_existing_run(
     thread_id: str,
     run_id: str,
@@ -265,3 +275,99 @@ async def stream_existing_run(
             "X-Accel-Buffering": "no",
         },
     )
+
+
+# ---------------------------------------------------------------------------
+# Messages / Events / Token usage endpoints
+# ---------------------------------------------------------------------------
+
+
+@router.get("/{thread_id}/messages")
+@require_permission("runs", "read", owner_check=True)
+async def list_thread_messages(
+    thread_id: str,
+    request: Request,
+    limit: int = Query(default=50, le=200),
+    before_seq: int | None = Query(default=None),
+    after_seq: int | None = Query(default=None),
+) -> list[dict]:
+    """Return displayable messages for a thread (across all runs), with feedback attached."""
+    event_store = get_run_event_store(request)
+    messages = await event_store.list_messages(thread_id, limit=limit, before_seq=before_seq, after_seq=after_seq)
+
+    # Attach feedback to the last AI message of each run
+    feedback_repo = get_feedback_repo(request)
+    user_id = await get_current_user(request)
+    feedback_map = await feedback_repo.list_by_thread_grouped(thread_id, user_id=user_id)
+
+    # Find the last ai_message per run_id
+    last_ai_per_run: dict[str, int] = {}  # run_id -> index in messages list
+    for i, msg in enumerate(messages):
+        if msg.get("event_type") == "ai_message":
+            last_ai_per_run[msg["run_id"]] = i
+
+    # Attach feedback field
+    last_ai_indices = set(last_ai_per_run.values())
+    for i, msg in enumerate(messages):
+        if i in last_ai_indices:
+            run_id = msg["run_id"]
+            fb = feedback_map.get(run_id)
+            msg["feedback"] = {
+                "feedback_id": fb["feedback_id"],
+                "rating": fb["rating"],
+                "comment": fb.get("comment"),
+            } if fb else None
+        else:
+            msg["feedback"] = None
+
+    return messages
+
+
+@router.get("/{thread_id}/runs/{run_id}/messages")
+@require_permission("runs", "read", owner_check=True)
+async def list_run_messages(
+    thread_id: str,
+    run_id: str,
+    request: Request,
+    limit: int = Query(default=50, le=200, ge=1),
+    before_seq: int | None = Query(default=None),
+    after_seq: int | None = Query(default=None),
+) -> dict:
+    """Return paginated messages for a specific run.
+
+    Response: { data: [...], has_more: bool }
+    """
+    event_store = get_run_event_store(request)
+    rows = await event_store.list_messages_by_run(
+        thread_id, run_id,
+        limit=limit + 1,
+        before_seq=before_seq,
+        after_seq=after_seq,
+    )
+    has_more = len(rows) > limit
+    data = rows[:limit] if has_more else rows
+    return {"data": data, "has_more": has_more}
+
+
+@router.get("/{thread_id}/runs/{run_id}/events")
+@require_permission("runs", "read", owner_check=True)
+async def list_run_events(
+    thread_id: str,
+    run_id: str,
+    request: Request,
+    event_types: str | None = Query(default=None),
+    limit: int = Query(default=500, le=2000),
+) -> list[dict]:
+    """Return the full event stream for a run (debug/audit)."""
+    event_store = get_run_event_store(request)
+    types = event_types.split(",") if event_types else None
+    return await event_store.list_events(thread_id, run_id, event_types=types, limit=limit)
+
+
+@router.get("/{thread_id}/token-usage")
+@require_permission("threads", "read", owner_check=True)
+async def thread_token_usage(thread_id: str, request: Request) -> dict:
+    """Thread-level token usage aggregation."""
+    run_store = get_run_store(request)
+    agg = await run_store.aggregate_tokens_by_thread(thread_id)
+    return {"thread_id": thread_id, **agg}
diff --git a/backend/app/gateway/routers/threads.py b/backend/app/gateway/routers/threads.py
index 808604980..c7bfa69b6 100644
--- a/backend/app/gateway/routers/threads.py
+++ b/backend/app/gateway/routers/threads.py
@@ -13,28 +13,41 @@ matching the LangGraph Platform wire format expected by the
 from __future__ import annotations
 
 import logging
+import re
 import time
 import uuid
 from typing import Any
 
 from fastapi import APIRouter, HTTPException, Request
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, field_validator
 
-from app.gateway.deps import get_checkpointer, get_store
+from app.gateway.authz import require_permission
+from app.gateway.deps import get_checkpointer
+from app.gateway.utils import sanitize_log_param
 from deerflow.config.paths import Paths, get_paths
 from deerflow.runtime import serialize_channel_values
-
-# ---------------------------------------------------------------------------
-# Store namespace
-# ---------------------------------------------------------------------------
-
-THREADS_NS: tuple[str, ...] = ("threads",)
-"""Namespace used by the Store for thread metadata records."""
+from deerflow.runtime.user_context import get_effective_user_id
 
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/threads", tags=["threads"])
 
 
+# Metadata keys that the server controls; clients are not allowed to set
+# them. Pydantic ``@field_validator("metadata")`` strips them on every
+# inbound model below so a malicious client cannot reflect a forged
+# owner identity through the API surface. Defense-in-depth — the
+# row-level invariant is still ``threads_meta.user_id`` populated from
+# the auth contextvar; this list closes the metadata-blob echo gap.
+_SERVER_RESERVED_METADATA_KEYS: frozenset[str] = frozenset({"owner_id", "user_id"})
+
+
+def _strip_reserved_metadata(metadata: dict[str, Any] | None) -> dict[str, Any]:
+    """Return ``metadata`` with server-controlled keys removed."""
+    if not metadata:
+        return metadata or {}
+    return {k: v for k, v in metadata.items() if k not in _SERVER_RESERVED_METADATA_KEYS}
+
+
 # ---------------------------------------------------------------------------
 # Response / request models
 # ---------------------------------------------------------------------------
@@ -63,8 +76,11 @@ class ThreadCreateRequest(BaseModel):
     """Request body for creating a thread."""
 
     thread_id: str | None = Field(default=None, description="Optional thread ID (auto-generated if omitted)")
+    assistant_id: str | None = Field(default=None, description="Associate thread with an assistant")
     metadata: dict[str, Any] = Field(default_factory=dict, description="Initial metadata")
 
+    _strip_reserved = field_validator("metadata")(classmethod(lambda cls, v: _strip_reserved_metadata(v)))
+
 
 class ThreadSearchRequest(BaseModel):
     """Request body for searching threads."""
@@ -93,6 +109,8 @@ class ThreadPatchRequest(BaseModel):
 
     metadata: dict[str, Any] = Field(default_factory=dict, description="Metadata to merge")
 
+    _strip_reserved = field_validator("metadata")(classmethod(lambda cls, v: _strip_reserved_metadata(v)))
+
 
 class ThreadStateUpdateRequest(BaseModel):
     """Request body for updating thread state (human-in-the-loop resume)."""
@@ -126,70 +144,25 @@ class ThreadHistoryRequest(BaseModel):
 # ---------------------------------------------------------------------------
 
 
-def _delete_thread_data(thread_id: str, paths: Paths | None = None) -> ThreadDeleteResponse:
+def _delete_thread_data(thread_id: str, paths: Paths | None = None, *, user_id: str | None = None) -> ThreadDeleteResponse:
     """Delete local persisted filesystem data for a thread."""
     path_manager = paths or get_paths()
     try:
-        path_manager.delete_thread_dir(thread_id)
+        path_manager.delete_thread_dir(thread_id, user_id=user_id)
     except ValueError as exc:
         raise HTTPException(status_code=422, detail=str(exc)) from exc
     except FileNotFoundError:
         # Not critical — thread data may not exist on disk
-        logger.debug("No local thread data to delete for %s", thread_id)
+        logger.debug("No local thread data to delete for %s", sanitize_log_param(thread_id))
         return ThreadDeleteResponse(success=True, message=f"No local data for {thread_id}")
     except Exception as exc:
-        logger.exception("Failed to delete thread data for %s", thread_id)
+        logger.exception("Failed to delete thread data for %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to delete local thread data.") from exc
 
-    logger.info("Deleted local thread data for %s", thread_id)
+    logger.info("Deleted local thread data for %s", sanitize_log_param(thread_id))
     return ThreadDeleteResponse(success=True, message=f"Deleted local thread data for {thread_id}")
 
 
-async def _store_get(store, thread_id: str) -> dict | None:
-    """Fetch a thread record from the Store; returns ``None`` if absent."""
-    item = await store.aget(THREADS_NS, thread_id)
-    return item.value if item is not None else None
-
-
-async def _store_put(store, record: dict) -> None:
-    """Write a thread record to the Store."""
-    await store.aput(THREADS_NS, record["thread_id"], record)
-
-
-async def _store_upsert(store, thread_id: str, *, metadata: dict | None = None, values: dict | None = None) -> None:
-    """Create or refresh a thread record in the Store.
-
-    On creation the record is written with ``status="idle"``.  On update only
-    ``updated_at`` (and optionally ``metadata`` / ``values``) are changed so
-    that existing fields are preserved.
-
-    ``values`` carries the agent-state snapshot exposed to the frontend
-    (currently just ``{"title": "..."}``).
-    """
-    now = time.time()
-    existing = await _store_get(store, thread_id)
-    if existing is None:
-        await _store_put(
-            store,
-            {
-                "thread_id": thread_id,
-                "status": "idle",
-                "created_at": now,
-                "updated_at": now,
-                "metadata": metadata or {},
-                "values": values or {},
-            },
-        )
-    else:
-        val = dict(existing)
-        val["updated_at"] = now
-        if metadata:
-            val.setdefault("metadata", {}).update(metadata)
-        if values:
-            val.setdefault("values", {}).update(values)
-        await _store_put(store, val)
-
-
 def _derive_thread_status(checkpoint_tuple) -> str:
     """Derive thread status from checkpoint metadata."""
     if checkpoint_tuple is None:
@@ -215,22 +188,18 @@ def _derive_thread_status(checkpoint_tuple) -> str:
 
 
 @router.delete("/{thread_id}", response_model=ThreadDeleteResponse)
+@require_permission("threads", "delete", owner_check=True, require_existing=True)
 async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteResponse:
     """Delete local persisted filesystem data for a thread.
 
     Cleans DeerFlow-managed thread directories, removes checkpoint data,
-    and removes the thread record from the Store.
+    and removes the thread_meta row from the configured ThreadMetaStore
+    (sqlite or memory).
     """
-    # Clean local filesystem
-    response = _delete_thread_data(thread_id)
+    from app.gateway.deps import get_thread_store
 
-    # Remove from Store (best-effort)
-    store = get_store(request)
-    if store is not None:
-        try:
-            await store.adelete(THREADS_NS, thread_id)
-        except Exception:
-            logger.debug("Could not delete store record for thread %s (not critical)", thread_id)
+    # Clean local filesystem
+    response = _delete_thread_data(thread_id, user_id=get_effective_user_id())
 
     # Remove checkpoints (best-effort)
     checkpointer = getattr(request.app.state, "checkpointer", None)
@@ -239,7 +208,15 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe
             if hasattr(checkpointer, "adelete_thread"):
                 await checkpointer.adelete_thread(thread_id)
         except Exception:
-            logger.debug("Could not delete checkpoints for thread %s (not critical)", thread_id)
+            logger.debug("Could not delete checkpoints for thread %s (not critical)", sanitize_log_param(thread_id))
+
+    # Remove thread_meta row (best-effort) — required for sqlite backend
+    # so the deleted thread no longer appears in /threads/search.
+    try:
+        thread_store = get_thread_store(request)
+        await thread_store.delete(thread_id)
+    except Exception:
+        logger.debug("Could not delete thread_meta for %s (not critical)", sanitize_log_param(thread_id))
 
     return response
 
@@ -248,43 +225,40 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe
 async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadResponse:
     """Create a new thread.
 
-    The thread record is written to the Store (for fast listing) and an
-    empty checkpoint is written to the checkpointer (for state reads).
+    Writes a thread_meta record (so the thread appears in /threads/search)
+    and an empty checkpoint (so state endpoints work immediately).
     Idempotent: returns the existing record when ``thread_id`` already exists.
     """
-    store = get_store(request)
+    from app.gateway.deps import get_thread_store
+
     checkpointer = get_checkpointer(request)
+    thread_store = get_thread_store(request)
     thread_id = body.thread_id or str(uuid.uuid4())
     now = time.time()
+    # ``body.metadata`` is already stripped of server-reserved keys by
+    # ``ThreadCreateRequest._strip_reserved`` — see the model definition.
 
-    # Idempotency: return existing record from Store when already present
-    if store is not None:
-        existing_record = await _store_get(store, thread_id)
-        if existing_record is not None:
-            return ThreadResponse(
-                thread_id=thread_id,
-                status=existing_record.get("status", "idle"),
-                created_at=str(existing_record.get("created_at", "")),
-                updated_at=str(existing_record.get("updated_at", "")),
-                metadata=existing_record.get("metadata", {}),
-            )
+    # Idempotency: return existing record when already present
+    existing_record = await thread_store.get(thread_id)
+    if existing_record is not None:
+        return ThreadResponse(
+            thread_id=thread_id,
+            status=existing_record.get("status", "idle"),
+            created_at=str(existing_record.get("created_at", "")),
+            updated_at=str(existing_record.get("updated_at", "")),
+            metadata=existing_record.get("metadata", {}),
+        )
 
-    # Write thread record to Store
-    if store is not None:
-        try:
-            await _store_put(
-                store,
-                {
-                    "thread_id": thread_id,
-                    "status": "idle",
-                    "created_at": now,
-                    "updated_at": now,
-                    "metadata": body.metadata,
-                },
-            )
-        except Exception:
-            logger.exception("Failed to write thread %s to store", thread_id)
-            raise HTTPException(status_code=500, detail="Failed to create thread")
+    # Write thread_meta so the thread appears in /threads/search immediately
+    try:
+        await thread_store.create(
+            thread_id,
+            assistant_id=getattr(body, "assistant_id", None),
+            metadata=body.metadata,
+        )
+    except Exception:
+        logger.exception("Failed to write thread_meta for %s", sanitize_log_param(thread_id))
+        raise HTTPException(status_code=500, detail="Failed to create thread")
 
     # Write an empty checkpoint so state endpoints work immediately
     config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
@@ -301,10 +275,10 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
         }
         await checkpointer.aput(config, empty_checkpoint(), ckpt_metadata, {})
     except Exception:
-        logger.exception("Failed to create checkpoint for thread %s", thread_id)
+        logger.exception("Failed to create checkpoint for thread %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to create thread")
 
-    logger.info("Thread created: %s", thread_id)
+    logger.info("Thread created: %s", sanitize_log_param(thread_id))
     return ThreadResponse(
         thread_id=thread_id,
         status="idle",
@@ -318,166 +292,91 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe
 async def search_threads(body: ThreadSearchRequest, request: Request) -> list[ThreadResponse]:
     """Search and list threads.
 
-    Two-phase approach:
-
-    **Phase 1 — Store (fast path, O(threads))**: returns threads that were
-    created or run through this Gateway.  Store records are tiny metadata
-    dicts so fetching all of them at once is cheap.
-
-    **Phase 2 — Checkpointer supplement (lazy migration)**: threads that
-    were created directly by LangGraph Server (and therefore absent from the
-    Store) are discovered here by iterating the shared checkpointer.  Any
-    newly found thread is immediately written to the Store so that the next
-    search skips Phase 2 for that thread — the Store converges to a full
-    index over time without a one-shot migration job.
+    Delegates to the configured ThreadMetaStore implementation
+    (SQL-backed for sqlite/postgres, Store-backed for memory mode).
     """
-    store = get_store(request)
-    checkpointer = get_checkpointer(request)
+    from app.gateway.deps import get_thread_store
 
-    # -----------------------------------------------------------------------
-    # Phase 1: Store
-    # -----------------------------------------------------------------------
-    merged: dict[str, ThreadResponse] = {}
-
-    if store is not None:
-        try:
-            items = await store.asearch(THREADS_NS, limit=10_000)
-        except Exception:
-            logger.warning("Store search failed — falling back to checkpointer only", exc_info=True)
-            items = []
-
-        for item in items:
-            val = item.value
-            merged[val["thread_id"]] = ThreadResponse(
-                thread_id=val["thread_id"],
-                status=val.get("status", "idle"),
-                created_at=str(val.get("created_at", "")),
-                updated_at=str(val.get("updated_at", "")),
-                metadata=val.get("metadata", {}),
-                values=val.get("values", {}),
-            )
-
-    # -----------------------------------------------------------------------
-    # Phase 2: Checkpointer supplement
-    # Discovers threads not yet in the Store (e.g. created by LangGraph
-    # Server) and lazily migrates them so future searches skip this phase.
-    # -----------------------------------------------------------------------
-    try:
-        async for checkpoint_tuple in checkpointer.alist(None):
-            cfg = getattr(checkpoint_tuple, "config", {})
-            thread_id = cfg.get("configurable", {}).get("thread_id")
-            if not thread_id or thread_id in merged:
-                continue
-
-            # Skip sub-graph checkpoints (checkpoint_ns is non-empty for those)
-            if cfg.get("configurable", {}).get("checkpoint_ns", ""):
-                continue
-
-            ckpt_meta = getattr(checkpoint_tuple, "metadata", {}) or {}
-            # Strip LangGraph internal keys from the user-visible metadata dict
-            user_meta = {k: v for k, v in ckpt_meta.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")}
-
-            # Extract state values (title) from the checkpoint's channel_values
-            checkpoint_data = getattr(checkpoint_tuple, "checkpoint", {}) or {}
-            channel_values = checkpoint_data.get("channel_values", {})
-            ckpt_values = {}
-            if title := channel_values.get("title"):
-                ckpt_values["title"] = title
-
-            thread_resp = ThreadResponse(
-                thread_id=thread_id,
-                status=_derive_thread_status(checkpoint_tuple),
-                created_at=str(ckpt_meta.get("created_at", "")),
-                updated_at=str(ckpt_meta.get("updated_at", ckpt_meta.get("created_at", ""))),
-                metadata=user_meta,
-                values=ckpt_values,
-            )
-            merged[thread_id] = thread_resp
-
-            # Lazy migration — write to Store so the next search finds it there
-            if store is not None:
-                try:
-                    await _store_upsert(store, thread_id, metadata=user_meta, values=ckpt_values or None)
-                except Exception:
-                    logger.debug("Failed to migrate thread %s to store (non-fatal)", thread_id)
-    except Exception:
-        logger.exception("Checkpointer scan failed during thread search")
-        # Don't raise — return whatever was collected from Store + partial scan
-
-    # -----------------------------------------------------------------------
-    # Phase 3: Filter → sort → paginate
-    # -----------------------------------------------------------------------
-    results = list(merged.values())
-
-    if body.metadata:
-        results = [r for r in results if all(r.metadata.get(k) == v for k, v in body.metadata.items())]
-
-    if body.status:
-        results = [r for r in results if r.status == body.status]
-
-    results.sort(key=lambda r: r.updated_at, reverse=True)
-    return results[body.offset : body.offset + body.limit]
+    repo = get_thread_store(request)
+    rows = await repo.search(
+        metadata=body.metadata or None,
+        status=body.status,
+        limit=body.limit,
+        offset=body.offset,
+    )
+    return [
+        ThreadResponse(
+            thread_id=r["thread_id"],
+            status=r.get("status", "idle"),
+            created_at=r.get("created_at", ""),
+            updated_at=r.get("updated_at", ""),
+            metadata=r.get("metadata", {}),
+            values={"title": r["display_name"]} if r.get("display_name") else {},
+            interrupts={},
+        )
+        for r in rows
+    ]
 
 
 @router.patch("/{thread_id}", response_model=ThreadResponse)
+@require_permission("threads", "write", owner_check=True, require_existing=True)
 async def patch_thread(thread_id: str, body: ThreadPatchRequest, request: Request) -> ThreadResponse:
     """Merge metadata into a thread record."""
-    store = get_store(request)
-    if store is None:
-        raise HTTPException(status_code=503, detail="Store not available")
+    from app.gateway.deps import get_thread_store
 
-    record = await _store_get(store, thread_id)
+    thread_store = get_thread_store(request)
+    record = await thread_store.get(thread_id)
     if record is None:
         raise HTTPException(status_code=404, detail=f"Thread {thread_id} not found")
 
-    now = time.time()
-    updated = dict(record)
-    updated.setdefault("metadata", {}).update(body.metadata)
-    updated["updated_at"] = now
-
+    # ``body.metadata`` already stripped by ``ThreadPatchRequest._strip_reserved``.
     try:
-        await _store_put(store, updated)
+        await thread_store.update_metadata(thread_id, body.metadata)
     except Exception:
-        logger.exception("Failed to patch thread %s", thread_id)
+        logger.exception("Failed to patch thread %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to update thread")
 
+    # Re-read to get the merged metadata + refreshed updated_at
+    record = await thread_store.get(thread_id) or record
     return ThreadResponse(
         thread_id=thread_id,
-        status=updated.get("status", "idle"),
-        created_at=str(updated.get("created_at", "")),
-        updated_at=str(now),
-        metadata=updated.get("metadata", {}),
+        status=record.get("status", "idle"),
+        created_at=str(record.get("created_at", "")),
+        updated_at=str(record.get("updated_at", "")),
+        metadata=record.get("metadata", {}),
     )
 
 
 @router.get("/{thread_id}", response_model=ThreadResponse)
+@require_permission("threads", "read", owner_check=True)
 async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
     """Get thread info.
 
-    Reads metadata from the Store and derives the accurate execution
-    status from the checkpointer.  Falls back to the checkpointer alone
-    for threads that pre-date Store adoption (backward compat).
+    Reads metadata from the ThreadMetaStore and derives the accurate
+    execution status from the checkpointer.  Falls back to the checkpointer
+    alone for threads that pre-date ThreadMetaStore adoption (backward compat).
     """
-    store = get_store(request)
+    from app.gateway.deps import get_thread_store
+
+    thread_store = get_thread_store(request)
     checkpointer = get_checkpointer(request)
 
-    record: dict | None = None
-    if store is not None:
-        record = await _store_get(store, thread_id)
+    record: dict | None = await thread_store.get(thread_id)
 
     # Derive accurate status from the checkpointer
     config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
     try:
         checkpoint_tuple = await checkpointer.aget_tuple(config)
     except Exception:
-        logger.exception("Failed to get checkpoint for thread %s", thread_id)
+        logger.exception("Failed to get checkpoint for thread %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to get thread")
 
     if record is None and checkpoint_tuple is None:
         raise HTTPException(status_code=404, detail=f"Thread {thread_id} not found")
 
-    # If the thread exists in the checkpointer but not the store (e.g. legacy
-    # data), synthesize a minimal store record from the checkpoint metadata.
+    # If the thread exists in the checkpointer but not in thread_meta (e.g.
+    # legacy data created before thread_meta adoption), synthesize a minimal
+    # record from the checkpoint metadata.
     if record is None and checkpoint_tuple is not None:
         ckpt_meta = getattr(checkpoint_tuple, "metadata", {}) or {}
         record = {
@@ -505,7 +404,9 @@ async def get_thread(thread_id: str, request: Request) -> ThreadResponse:
     )
 
 
+# ---------------------------------------------------------------------------
 @router.get("/{thread_id}/state", response_model=ThreadStateResponse)
+@require_permission("threads", "read", owner_check=True)
 async def get_thread_state(thread_id: str, request: Request) -> ThreadStateResponse:
     """Get the latest state snapshot for a thread.
 
@@ -518,7 +419,7 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
     try:
         checkpoint_tuple = await checkpointer.aget_tuple(config)
     except Exception:
-        logger.exception("Failed to get state for thread %s", thread_id)
+        logger.exception("Failed to get state for thread %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to get thread state")
 
     if checkpoint_tuple is None:
@@ -542,8 +443,10 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
     next_tasks = [t.name for t in tasks_raw if hasattr(t, "name")]
     tasks = [{"id": getattr(t, "id", ""), "name": getattr(t, "name", "")} for t in tasks_raw]
 
+    values = serialize_channel_values(channel_values)
+
     return ThreadStateResponse(
-        values=serialize_channel_values(channel_values),
+        values=values,
         next=next_tasks,
         metadata=metadata,
         checkpoint={"id": checkpoint_id, "ts": str(metadata.get("created_at", ""))},
@@ -555,15 +458,19 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo
 
 
 @router.post("/{thread_id}/state", response_model=ThreadStateResponse)
+@require_permission("threads", "write", owner_check=True, require_existing=True)
 async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, request: Request) -> ThreadStateResponse:
     """Update thread state (e.g. for human-in-the-loop resume or title rename).
 
     Writes a new checkpoint that merges *body.values* into the latest
-    channel values, then syncs any updated ``title`` field back to the Store
-    so that ``/threads/search`` reflects the change immediately.
+    channel values, then syncs any updated ``title`` field through the
+    ThreadMetaStore abstraction so that ``/threads/search`` reflects the
+    change immediately in both sqlite and memory backends.
     """
+    from app.gateway.deps import get_thread_store
+
     checkpointer = get_checkpointer(request)
-    store = get_store(request)
+    thread_store = get_thread_store(request)
 
     # checkpoint_ns must be present in the config for aput — default to ""
     # (the root graph namespace).  checkpoint_id is optional; omitting it
@@ -580,7 +487,7 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
     try:
         checkpoint_tuple = await checkpointer.aget_tuple(read_config)
     except Exception:
-        logger.exception("Failed to get state for thread %s", thread_id)
+        logger.exception("Failed to get state for thread %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to get thread state")
 
     if checkpoint_tuple is None:
@@ -614,19 +521,22 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
     try:
         new_config = await checkpointer.aput(write_config, checkpoint, metadata, {})
     except Exception:
-        logger.exception("Failed to update state for thread %s", thread_id)
+        logger.exception("Failed to update state for thread %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to update thread state")
 
     new_checkpoint_id: str | None = None
     if isinstance(new_config, dict):
         new_checkpoint_id = new_config.get("configurable", {}).get("checkpoint_id")
 
-    # Sync title changes to the Store so /threads/search reflects them immediately.
-    if store is not None and body.values and "title" in body.values:
-        try:
-            await _store_upsert(store, thread_id, values={"title": body.values["title"]})
-        except Exception:
-            logger.debug("Failed to sync title to store for thread %s (non-fatal)", thread_id)
+    # Sync title changes through the ThreadMetaStore abstraction so /threads/search
+    # reflects them immediately in both sqlite and memory backends.
+    if body.values and "title" in body.values:
+        new_title = body.values["title"]
+        if new_title:  # Skip empty strings and None
+            try:
+                await thread_store.update_display_name(thread_id, new_title)
+            except Exception:
+                logger.debug("Failed to sync title to thread_meta for %s (non-fatal)", sanitize_log_param(thread_id))
 
     return ThreadStateResponse(
         values=serialize_channel_values(channel_values),
@@ -638,8 +548,16 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re
 
 
 @router.post("/{thread_id}/history", response_model=list[HistoryEntry])
+@require_permission("threads", "read", owner_check=True)
 async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request: Request) -> list[HistoryEntry]:
-    """Get checkpoint history for a thread."""
+    """Get checkpoint history for a thread.
+
+    Messages are read from the checkpointer's channel values (the
+    authoritative source) and serialized via
+    :func:`~deerflow.runtime.serialization.serialize_channel_values`.
+    Only the latest (first) checkpoint carries the ``messages`` key to
+    avoid duplicating them across every entry.
+    """
     checkpointer = get_checkpointer(request)
 
     config: dict[str, Any] = {"configurable": {"thread_id": thread_id}}
@@ -647,6 +565,7 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request
         config["configurable"]["checkpoint_id"] = body.before
 
     entries: list[HistoryEntry] = []
+    is_latest_checkpoint = True
     try:
         async for checkpoint_tuple in checkpointer.alist(config, limit=body.limit):
             ckpt_config = getattr(checkpoint_tuple, "config", {})
@@ -661,22 +580,42 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request
 
             channel_values = checkpoint.get("channel_values", {})
 
+            # Build values from checkpoint channel_values
+            values: dict[str, Any] = {}
+            if title := channel_values.get("title"):
+                values["title"] = title
+            if thread_data := channel_values.get("thread_data"):
+                values["thread_data"] = thread_data
+
+            # Attach messages only to the latest checkpoint entry.
+            if is_latest_checkpoint:
+                messages = channel_values.get("messages")
+                if messages:
+                    values["messages"] = serialize_channel_values({"messages": messages}).get("messages", [])
+            is_latest_checkpoint = False
+
             # Derive next tasks
             tasks_raw = getattr(checkpoint_tuple, "tasks", []) or []
             next_tasks = [t.name for t in tasks_raw if hasattr(t, "name")]
 
+            # Strip LangGraph internal keys from metadata
+            user_meta = {k: v for k, v in metadata.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")}
+            # Keep step for ordering context
+            if "step" in metadata:
+                user_meta["step"] = metadata["step"]
+
             entries.append(
                 HistoryEntry(
                     checkpoint_id=checkpoint_id,
                     parent_checkpoint_id=parent_id,
-                    metadata=metadata,
-                    values=serialize_channel_values(channel_values),
+                    metadata=user_meta,
+                    values=values,
                     created_at=str(metadata.get("created_at", "")),
                     next=next_tasks,
                 )
             )
     except Exception:
-        logger.exception("Failed to get history for thread %s", thread_id)
+        logger.exception("Failed to get history for thread %s", sanitize_log_param(thread_id))
         raise HTTPException(status_code=500, detail="Failed to get thread history")
 
     return entries
diff --git a/backend/app/gateway/routers/uploads.py b/backend/app/gateway/routers/uploads.py
index 6f8be52a1..c74ebf2d5 100644
--- a/backend/app/gateway/routers/uploads.py
+++ b/backend/app/gateway/routers/uploads.py
@@ -4,11 +4,14 @@ import logging
 import os
 import stat
 
-from fastapi import APIRouter, File, HTTPException, UploadFile
+from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
 from pydantic import BaseModel
 
-from deerflow.config.app_config import get_app_config
+from app.gateway.authz import require_permission
+from app.gateway.deps import get_config
+from deerflow.config.app_config import AppConfig
 from deerflow.config.paths import get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.sandbox.sandbox_provider import SandboxProvider, get_sandbox_provider
 from deerflow.uploads.manager import (
     PathTraversalError,
@@ -58,23 +61,22 @@ def _uses_thread_data_mounts(sandbox_provider: SandboxProvider) -> bool:
     return bool(getattr(sandbox_provider, "uses_thread_data_mounts", False))
 
 
-def _get_uploads_config_value(key: str, default: object) -> object:
+def _get_uploads_config_value(app_config: AppConfig, key: str, default: object) -> object:
     """Read a value from the uploads config, supporting dict and attribute access."""
-    cfg = get_app_config()
-    uploads_cfg = getattr(cfg, "uploads", None)
+    uploads_cfg = getattr(app_config, "uploads", None)
     if isinstance(uploads_cfg, dict):
         return uploads_cfg.get(key, default)
     return getattr(uploads_cfg, key, default)
 
 
-def _auto_convert_documents_enabled() -> bool:
+def _auto_convert_documents_enabled(app_config: AppConfig) -> bool:
     """Return whether automatic host-side document conversion is enabled.
 
     The secure default is disabled unless an operator explicitly opts in via
     uploads.auto_convert_documents in config.yaml.
     """
     try:
-        raw = _get_uploads_config_value("auto_convert_documents", False)
+        raw = _get_uploads_config_value(app_config, "auto_convert_documents", False)
         if isinstance(raw, str):
             return raw.strip().lower() in {"1", "true", "yes", "on"}
         return bool(raw)
@@ -83,9 +85,12 @@ def _auto_convert_documents_enabled() -> bool:
 
 
 @router.post("", response_model=UploadResponse)
+@require_permission("threads", "write", owner_check=True, require_existing=True)
 async def upload_files(
     thread_id: str,
+    request: Request,
     files: list[UploadFile] = File(...),
+    app_config: AppConfig = Depends(get_config),
 ) -> UploadResponse:
     """Upload multiple files to a thread's uploads directory."""
     if not files:
@@ -95,16 +100,16 @@ async def upload_files(
         uploads_dir = ensure_uploads_dir(thread_id)
     except ValueError as e:
         raise HTTPException(status_code=400, detail=str(e))
-    sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id)
+    sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
     uploaded_files = []
 
-    sandbox_provider = get_sandbox_provider()
+    sandbox_provider = get_sandbox_provider(app_config)
     sync_to_sandbox = not _uses_thread_data_mounts(sandbox_provider)
     sandbox = None
     if sync_to_sandbox:
         sandbox_id = sandbox_provider.acquire(thread_id)
         sandbox = sandbox_provider.get(sandbox_id)
-    auto_convert_documents = _auto_convert_documents_enabled()
+    auto_convert_documents = _auto_convert_documents_enabled(app_config)
 
     for file in files:
         if not file.filename:
@@ -166,7 +171,8 @@ async def upload_files(
 
 
 @router.get("/list", response_model=dict)
-async def list_uploaded_files(thread_id: str) -> dict:
+@require_permission("threads", "read", owner_check=True)
+async def list_uploaded_files(thread_id: str, request: Request) -> dict:
     """List all files in a thread's uploads directory."""
     try:
         uploads_dir = get_uploads_dir(thread_id)
@@ -176,7 +182,7 @@ async def list_uploaded_files(thread_id: str) -> dict:
     enrich_file_listing(result, thread_id)
 
     # Gateway additionally includes the sandbox-relative path.
-    sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id)
+    sandbox_uploads = get_paths().sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
     for f in result["files"]:
         f["path"] = str(sandbox_uploads / f["filename"])
 
@@ -184,7 +190,8 @@ async def list_uploaded_files(thread_id: str) -> dict:
 
 
 @router.delete("/{filename}")
-async def delete_uploaded_file(thread_id: str, filename: str) -> dict:
+@require_permission("threads", "delete", owner_check=True, require_existing=True)
+async def delete_uploaded_file(thread_id: str, filename: str, request: Request) -> dict:
     """Delete a file from a thread's uploads directory."""
     try:
         uploads_dir = get_uploads_dir(thread_id)
diff --git a/backend/app/gateway/services.py b/backend/app/gateway/services.py
index 3b3c40a27..45877e036 100644
--- a/backend/app/gateway/services.py
+++ b/backend/app/gateway/services.py
@@ -8,6 +8,7 @@ frames, and consuming stream bridge events.  Router modules
 from __future__ import annotations
 
 import asyncio
+import dataclasses
 import json
 import logging
 import re
@@ -18,7 +19,8 @@ from typing import Any
 from fastapi import HTTPException, Request
 from langchain_core.messages import HumanMessage
 
-from app.gateway.deps import get_checkpointer, get_run_manager, get_store, get_stream_bridge
+from app.gateway.deps import get_run_context, get_run_manager, get_run_store, get_stream_bridge
+from app.gateway.utils import sanitize_log_param
 from deerflow.runtime import (
     END_SENTINEL,
     HEARTBEAT_SENTINEL,
@@ -188,71 +190,6 @@ def build_run_config(
 # ---------------------------------------------------------------------------
 
 
-async def _upsert_thread_in_store(store, thread_id: str, metadata: dict | None) -> None:
-    """Create or refresh the thread record in the Store.
-
-    Called from :func:`start_run` so that threads created via the stateless
-    ``/runs/stream`` endpoint (which never calls ``POST /threads``) still
-    appear in ``/threads/search`` results.
-    """
-    # Deferred import to avoid circular import with the threads router module.
-    from app.gateway.routers.threads import _store_upsert
-
-    try:
-        await _store_upsert(store, thread_id, metadata=metadata)
-    except Exception:
-        logger.warning("Failed to upsert thread %s in store (non-fatal)", thread_id)
-
-
-async def _sync_thread_title_after_run(
-    run_task: asyncio.Task,
-    thread_id: str,
-    checkpointer: Any,
-    store: Any,
-) -> None:
-    """Wait for *run_task* to finish, then persist the generated title to the Store.
-
-    TitleMiddleware writes the generated title to the LangGraph agent state
-    (checkpointer) but the Gateway's Store record is not updated automatically.
-    This coroutine closes that gap by reading the final checkpoint after the
-    run completes and syncing ``values.title`` into the Store record so that
-    subsequent ``/threads/search`` responses include the correct title.
-
-    Runs as a fire-and-forget :func:`asyncio.create_task`; failures are
-    logged at DEBUG level and never propagate.
-    """
-    # Wait for the background run task to complete (any outcome).
-    # asyncio.wait does not propagate task exceptions — it just returns
-    # when the task is done, cancelled, or failed.
-    await asyncio.wait({run_task})
-
-    # Deferred import to avoid circular import with the threads router module.
-    from app.gateway.routers.threads import _store_get, _store_put
-
-    try:
-        ckpt_config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
-        ckpt_tuple = await checkpointer.aget_tuple(ckpt_config)
-        if ckpt_tuple is None:
-            return
-
-        channel_values = ckpt_tuple.checkpoint.get("channel_values", {})
-        title = channel_values.get("title")
-        if not title:
-            return
-
-        existing = await _store_get(store, thread_id)
-        if existing is None:
-            return
-
-        updated = dict(existing)
-        updated.setdefault("values", {})["title"] = title
-        updated["updated_at"] = time.time()
-        await _store_put(store, updated)
-        logger.debug("Synced title %r for thread %s", title, thread_id)
-    except Exception:
-        logger.debug("Failed to sync title for thread %s (non-fatal)", thread_id, exc_info=True)
-
-
 async def start_run(
     body: Any,
     thread_id: str,
@@ -272,11 +209,25 @@ async def start_run(
     """
     bridge = get_stream_bridge(request)
     run_mgr = get_run_manager(request)
-    checkpointer = get_checkpointer(request)
-    store = get_store(request)
+    run_ctx = get_run_context(request)
 
     disconnect = DisconnectMode.cancel if body.on_disconnect == "cancel" else DisconnectMode.continue_
 
+    # Resolve follow_up_to_run_id: explicit from request, or auto-detect from latest successful run
+    follow_up_to_run_id = getattr(body, "follow_up_to_run_id", None)
+    if follow_up_to_run_id is None:
+        run_store = get_run_store(request)
+        try:
+            recent_runs = await run_store.list_by_thread(thread_id, limit=1)
+            if recent_runs and recent_runs[0].get("status") == "success":
+                follow_up_to_run_id = recent_runs[0]["run_id"]
+        except Exception:
+            pass  # Don't block run creation
+
+    # Enrich base context with per-run field
+    if follow_up_to_run_id:
+        run_ctx = dataclasses.replace(run_ctx, follow_up_to_run_id=follow_up_to_run_id)
+
     try:
         record = await run_mgr.create_or_reject(
             thread_id,
@@ -285,17 +236,28 @@ async def start_run(
             metadata=body.metadata or {},
             kwargs={"input": body.input, "config": body.config},
             multitask_strategy=body.multitask_strategy,
+            follow_up_to_run_id=follow_up_to_run_id,
         )
     except ConflictError as exc:
         raise HTTPException(status_code=409, detail=str(exc)) from exc
     except UnsupportedStrategyError as exc:
         raise HTTPException(status_code=501, detail=str(exc)) from exc
 
-    # Ensure the thread is visible in /threads/search, even for threads that
-    # were never explicitly created via POST /threads (e.g. stateless runs).
-    store = get_store(request)
-    if store is not None:
-        await _upsert_thread_in_store(store, thread_id, body.metadata)
+    # Upsert thread metadata so the thread appears in /threads/search,
+    # even for threads that were never explicitly created via POST /threads
+    # (e.g. stateless runs).
+    try:
+        existing = await run_ctx.thread_store.get(thread_id)
+        if existing is None:
+            await run_ctx.thread_store.create(
+                thread_id,
+                assistant_id=body.assistant_id,
+                metadata=body.metadata,
+            )
+        else:
+            await run_ctx.thread_store.update_status(thread_id, "running")
+    except Exception:
+        logger.warning("Failed to upsert thread_meta for %s (non-fatal)", sanitize_log_param(thread_id))
 
     agent_factory = resolve_agent_factory(body.assistant_id)
     graph_input = normalize_input(body.input)
@@ -330,8 +292,7 @@ async def start_run(
             bridge,
             run_mgr,
             record,
-            checkpointer=checkpointer,
-            store=store,
+            ctx=run_ctx,
             agent_factory=agent_factory,
             graph_input=graph_input,
             config=config,
@@ -343,11 +304,9 @@ async def start_run(
     )
     record.task = task
 
-    # After the run completes, sync the title generated by TitleMiddleware from
-    # the checkpointer into the Store record so that /threads/search returns the
-    # correct title instead of an empty values dict.
-    if store is not None:
-        asyncio.create_task(_sync_thread_title_after_run(task, thread_id, checkpointer, store))
+    # Title sync is handled by worker.py's finally block which reads the
+    # title from the checkpoint and calls thread_store.update_display_name
+    # after the run completes.
 
     return record
 
diff --git a/backend/app/gateway/utils.py b/backend/app/gateway/utils.py
new file mode 100644
index 000000000..8368d84fc
--- /dev/null
+++ b/backend/app/gateway/utils.py
@@ -0,0 +1,6 @@
+"""Shared utility helpers for the Gateway layer."""
+
+
+def sanitize_log_param(value: str) -> str:
+    """Strip control characters to prevent log injection."""
+    return value.replace("\n", "").replace("\r", "").replace("\x00", "")
diff --git a/backend/docs/AUTH_TEST_DOCKER_GAP.md b/backend/docs/AUTH_TEST_DOCKER_GAP.md
new file mode 100644
index 000000000..adf4916a3
--- /dev/null
+++ b/backend/docs/AUTH_TEST_DOCKER_GAP.md
@@ -0,0 +1,77 @@
+# Docker Test Gap (Section 七 7.4)
+
+This file documents the only **un-executed** test cases from
+`backend/docs/AUTH_TEST_PLAN.md` after the full release validation pass.
+
+## Why this gap exists
+
+The release validation environment (sg_dev: `10.251.229.92`) **does not have
+a Docker daemon installed**. The TC-DOCKER cases are container-runtime
+behavior tests that need an actual Docker engine to spin up
+`docker/docker-compose.yaml` services.
+
+```bash
+$ ssh sg_dev "which docker; docker --version"
+# (empty)
+# bash: docker: command not found
+```
+
+All other test plan sections were executed against either:
+- The local dev box (Mac, all services running locally), or
+- The deployed sg_dev instance (gateway + frontend + nginx via SSH tunnel)
+
+## Cases not executed
+
+| Case | Title | What it covers | Why not run |
+|---|---|---|---|
+| TC-DOCKER-01 | `users.db` volume persistence | Verify the `DEER_FLOW_HOME` bind mount survives container restart | needs `docker compose up` |
+| TC-DOCKER-02 | Session persistence across container restart | `AUTH_JWT_SECRET` env var keeps cookies valid after `docker compose down && up` | needs `docker compose down/up` |
+| TC-DOCKER-03 | Per-worker rate limiter divergence | Confirms in-process `_login_attempts` dict doesn't share state across `gunicorn` workers (4 by default in the compose file); known limitation, documented | needs multi-worker container |
+| TC-DOCKER-04 | IM channels skip AuthMiddleware | Verify Feishu/Slack/Telegram dispatchers run in-container against `http://langgraph:2024` without going through nginx | needs `docker logs` |
+| TC-DOCKER-05 | Admin credentials surfacing | **Updated post-simplify** — was "log scrape", now "0600 credential file in `DEER_FLOW_HOME`". The file-based behavior is already validated by TC-1.1 + TC-UPG-13 on sg_dev (non-Docker), so the only Docker-specific gap is verifying the volume mount carries the file out to the host | needs container + host volume |
+| TC-DOCKER-06 | Gateway-mode Docker deploy | `./scripts/deploy.sh --gateway` produces a 3-container topology (no `langgraph` container); same auth flow as standard mode | needs `docker compose --profile gateway` |
+
+## Coverage already provided by non-Docker tests
+
+The **auth-relevant** behavior in each Docker case is already exercised by
+the test cases that ran on sg_dev or local:
+
+| Docker case | Auth behavior covered by |
+|---|---|
+| TC-DOCKER-01 (volume persistence) | TC-REENT-01 on sg_dev (admin row survives gateway restart) — same SQLite file, just no container layer between |
+| TC-DOCKER-02 (session persistence) | TC-API-02/03/06 (cookie roundtrip), plus TC-REENT-04 (multi-cookie) — JWT verification is process-state-free, container restart is equivalent to `pkill uvicorn && uv run uvicorn` |
+| TC-DOCKER-03 (per-worker rate limit) | TC-GW-04 + TC-REENT-09 (single-worker rate limit + 5min expiry). The cross-worker divergence is an architectural property of the in-memory dict; no auth code path differs |
+| TC-DOCKER-04 (IM channels skip auth) | Code-level only: `app/channels/manager.py` uses `langgraph_sdk` directly with no cookie handling. The langgraph_auth handler is bypassed by going through SDK, not HTTP |
+| TC-DOCKER-05 (credential surfacing) | TC-1.1 on sg_dev (file at `~/deer-flow/backend/.deer-flow/admin_initial_credentials.txt`, mode 0600, password 22 chars) — the only Docker-unique step is whether the bind mount projects this path onto the host, which is a `docker compose` config check, not a runtime behavior change |
+| TC-DOCKER-06 (gateway-mode container) | Section 七 7.2 covered by TC-GW-01..05 + Section 二 (gateway-mode auth flow on sg_dev) — same Gateway code, container is just a packaging change |
+
+## Reproduction steps when Docker becomes available
+
+Anyone with `docker` + `docker compose` installed can reproduce the gap by
+running the test plan section verbatim. Pre-flight:
+
+```bash
+# Required on the host
+docker --version           # >=24.x
+docker compose version     # plugin >=2.x
+
+# Required env var (otherwise sessions reset on every container restart)
+echo "AUTH_JWT_SECRET=$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')" \
+  >> .env
+
+# Optional: pin DEER_FLOW_HOME to a stable host path
+echo "DEER_FLOW_HOME=$HOME/deer-flow-data" >> .env
+```
+
+Then run TC-DOCKER-01..06 from the test plan as written.
+
+## Decision log
+
+- **Not blocking the release.** The auth-relevant behavior in every Docker
+  case has an already-validated equivalent on bare metal. The gap is purely
+  about *container packaging* details (bind mounts, multi-worker, log
+  collection), not about whether the auth code paths work.
+- **TC-DOCKER-05 was updated in place** in `AUTH_TEST_PLAN.md` to reflect
+  the post-simplify reality (credentials file → 0600 file, no log leak).
+  The old "grep 'Password:' in docker logs" expectation would have failed
+  silently and given a false sense of coverage.
diff --git a/backend/docs/AUTH_TEST_PLAN.md b/backend/docs/AUTH_TEST_PLAN.md
new file mode 100644
index 000000000..15b20494a
--- /dev/null
+++ b/backend/docs/AUTH_TEST_PLAN.md
@@ -0,0 +1,1801 @@
+# Auth 模块测试计划
+
+## 测试矩阵
+
+| 模式 | 启动命令 | Auth 层 | 端口 |
+|------|---------|---------|------|
+| 标准模式 | `make dev` | Gateway AuthMiddleware + LangGraph auth | 2026 (nginx) |
+| Gateway 模式 | `make dev-pro` | Gateway AuthMiddleware（全量） | 2026 (nginx) |
+| 直连 Gateway | `cd backend && make gateway` | Gateway AuthMiddleware | 8001 |
+| 直连 LangGraph | `cd backend && make dev` | LangGraph auth | 2024 |
+
+每种模式下都需执行以下测试。
+
+---
+
+## 一、环境准备
+
+### 1.1 首次启动（干净数据库）
+
+```bash
+# 清除已有数据
+rm -f backend/.deer-flow/users.db
+
+# 选择模式启动
+make dev          # 标准模式
+# 或
+make dev-pro      # Gateway 模式
+```
+
+**验证点：**
+- [ ] 控制台输出 admin 邮箱和随机密码
+- [ ] 密码格式为 `secrets.token_urlsafe(16)` 的 22 字符字符串
+- [ ] 邮箱为 `admin@deerflow.dev`
+- [ ] 提示 `Change it after login: Settings -> Account`
+
+### 1.2 非首次启动
+
+```bash
+# 不清除数据库，直接启动
+make dev
+```
+
+**验证点：**
+- [ ] 控制台不输出密码
+- [ ] 如果 admin 仍 `needs_setup=True`，控制台有 warning 提示
+
+### 1.3 环境变量配置
+
+| 变量 | 验证 |
+|------|------|
+| `AUTH_JWT_SECRET` 未设 | 启动时 warning，自动生成临时密钥 |
+| `AUTH_JWT_SECRET` 已设 | 无 warning，重启后 session 保持 |
+
+---
+
+## 二、接口流程测试
+
+> 以下用 `BASE=http://localhost:2026` 为例。标准模式和 Gateway 模式都用此地址。
+> 直连测试替换为对应端口。
+>
+> **CSRF token 提取**：多处用到从 cookie jar 提取 CSRF token，统一使用：
+> ```bash
+> CSRF=$(python3 -c "
+> import http.cookiejar
+> cj = http.cookiejar.MozillaCookieJar('cookies.txt'); cj.load()
+> print(next(c.value for c in cj if c.name == 'csrf_token'))
+> ")
+> ```
+> 或简写（多数场景够用）：`CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')`
+
+### 2.1 注册 + 登录 + 会话
+
+#### TC-API-01: Setup 状态查询
+
+```bash
+curl -s $BASE/api/v1/auth/setup-status | jq .
+```
+
+**预期：** 返回 `{"needs_setup": false}`（admin 在启动时已自动创建，`count_users() > 0`）。仅在启动完成前的极短窗口内可能返回 `true`。
+
+#### TC-API-02: Admin 首次登录
+
+```bash
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@deerflow.dev&password=<控制台密码>" \
+  -c cookies.txt | jq .
+```
+
+**预期：**
+- 状态码 200
+- Body: `{"expires_in": 604800, "needs_setup": true}`
+- `cookies.txt` 包含 `access_token`（HttpOnly）和 `csrf_token`（非 HttpOnly）
+
+#### TC-API-03: 获取当前用户
+
+```bash
+curl -s $BASE/api/v1/auth/me -b cookies.txt | jq .
+```
+
+**预期：** `{"id": "...", "email": "admin@deerflow.dev", "system_role": "admin", "needs_setup": true}`
+
+#### TC-API-04: Setup 流程（改邮箱 + 改密码）
+
+```bash
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/v1/auth/change-password \
+  -b cookies.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{"current_password":"<控制台密码>","new_password":"NewPass123!","new_email":"admin@example.com"}' | jq .
+```
+
+**预期：**
+- 状态码 200
+- `{"message": "Password changed successfully"}`
+- 再调 `/auth/me` 邮箱变为 `admin@example.com`，`needs_setup` 变为 `false`
+
+#### TC-API-05: 普通用户注册
+
+```bash
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"user1@example.com","password":"UserPass1!"}' \
+  -c user_cookies.txt | jq .
+```
+
+**预期：** 状态码 201，`system_role` 为 `"user"`，自动登录（cookie 已设）
+
+#### TC-API-06: 登出
+
+```bash
+curl -s -X POST $BASE/api/v1/auth/logout -b cookies.txt | jq .
+```
+
+**预期：** `{"message": "Successfully logged out"}`，后续用 cookies.txt 访问 `/auth/me` 返回 401
+
+### 2.2 多租户隔离
+
+#### TC-API-07: 用户 A 创建 Thread
+
+```bash
+# 以 user1 登录
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=user1@example.com&password=UserPass1!" \
+  -c user1.txt
+
+CSRF1=$(grep csrf_token user1.txt | awk '{print $NF}')
+
+# 创建 thread
+curl -s -X POST $BASE/api/threads \
+  -b user1.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF1" \
+  -d '{"metadata":{}}' | jq .thread_id
+# 记录 THREAD_ID
+```
+
+#### TC-API-08: 用户 B 无法访问用户 A 的 Thread
+
+```bash
+# 注册并登录 user2
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"user2@example.com","password":"UserPass2!"}' \
+  -c user2.txt
+
+# 尝试访问 user1 的 thread
+curl -s $BASE/api/threads/$THREAD_ID -b user2.txt
+```
+
+**预期：** 状态码 404（不是 403，避免泄露 thread 存在性）
+
+#### TC-API-09: 用户 B 搜索 Thread 看不到用户 A 的
+
+```bash
+CSRF2=$(grep csrf_token user2.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/threads/search \
+  -b user2.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF2" \
+  -d '{}' | jq length
+```
+
+**预期：** 返回 0 或仅包含 user2 自己的 thread
+
+### 2.3 标准模式 LangGraph Server 隔离
+
+> 仅在标准模式下测试。Gateway 模式不跑 LangGraph Server。
+
+#### TC-API-10: LangGraph 端点需要 cookie
+
+```bash
+# 不带 cookie 访问 LangGraph 接口
+curl -s -w "%{http_code}" $BASE/api/langgraph/threads
+```
+
+**预期：** 401
+
+#### TC-API-11: LangGraph 带 cookie 可访问
+
+```bash
+curl -s $BASE/api/langgraph/threads -b user1.txt | jq length
+```
+
+**预期：** 200，返回 user1 的 thread 列表
+
+#### TC-API-12: LangGraph 隔离 — 用户只看到自己的
+
+```bash
+# user2 查 LangGraph threads
+curl -s $BASE/api/langgraph/threads -b user2.txt | jq length
+```
+
+**预期：** 不包含 user1 的 thread
+
+### 2.4 Token 失效
+
+#### TC-API-13: 改密码后旧 token 立即失效
+
+```bash
+# 保存当前 cookie
+cp user1.txt user1_old.txt
+
+# 改密码
+CSRF1=$(grep csrf_token user1.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/v1/auth/change-password \
+  -b user1.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF1" \
+  -d '{"current_password":"UserPass1!","new_password":"NewUserPass1!"}' \
+  -c user1.txt
+
+# 用旧 cookie 访问
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me -b user1_old.txt
+```
+
+**预期：** 401（token_version 不匹配）
+
+#### TC-API-14: 改密码后新 cookie 可用
+
+```bash
+curl -s $BASE/api/v1/auth/me -b user1.txt | jq .email
+```
+
+**预期：** 200，返回用户信息
+
+### 2.5 错误响应格式
+
+#### TC-API-15: 结构化错误响应
+
+```bash
+# 错误密码登录
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=wrong" | jq .detail
+```
+
+**预期：**
+```json
+{"code": "invalid_credentials", "message": "Incorrect email or password"}
+```
+
+#### TC-API-16: 重复邮箱注册
+
+```bash
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"user1@example.com","password":"AnyPass123"}' -w "\n%{http_code}"
+```
+
+**预期：** 400，`{"code": "email_already_exists", ...}`
+
+---
+
+## 三、攻击测试
+
+### 3.1 暴力破解防护
+
+#### TC-ATK-01: IP 限速
+
+```bash
+# 连续 6 次错误密码
+for i in $(seq 1 6); do
+  echo "Attempt $i:"
+  curl -s -X POST $BASE/api/v1/auth/login/local \
+    -d "username=admin@example.com&password=wrong$i" -w " HTTP %{http_code}\n"
+done
+```
+
+**预期：** 前 5 次返回 401，第 6 次返回 429 `"Too many login attempts. Try again later."`
+
+#### TC-ATK-02: 限速后正确密码也被拒
+
+```bash
+# 紧接上一步
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" -w " HTTP %{http_code}\n"
+```
+
+**预期：** 429（锁定 5 分钟）
+
+#### TC-ATK-03: 成功登录清除限速
+
+```bash
+# 等待锁定过期后（或重启服务），用正确密码登录
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" -w " HTTP %{http_code}\n"
+```
+
+**预期：** 200，计数器重置
+
+### 3.2 CSRF 防护
+
+#### TC-ATK-04: 无 CSRF token 的 POST 请求
+
+```bash
+curl -s -X POST $BASE/api/threads \
+  -b user1.txt \
+  -H "Content-Type: application/json" \
+  -d '{"metadata":{}}' -w "\nHTTP %{http_code}"
+```
+
+**预期：** 403 `"CSRF token missing"`
+
+#### TC-ATK-05: 错误 CSRF token
+
+```bash
+curl -s -X POST $BASE/api/threads \
+  -b user1.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: fake-token" \
+  -d '{"metadata":{}}' -w "\nHTTP %{http_code}"
+```
+
+**预期：** 403 `"CSRF token mismatch"`
+
+### 3.3 Cookie 安全
+
+> HTTP 与 HTTPS 行为差异通过 `X-Forwarded-Proto: https` 模拟。
+> **注意：** 经 nginx 代理时，nginx 的 `proxy_set_header X-Forwarded-Proto $scheme` 会覆盖
+> 客户端发的值（`$scheme` = nginx 监听端口的 scheme），因此 HTTPS 模拟必须**直连 Gateway（端口 8001）**。
+> 每个 case 需在 **login** 和 **register** 两个端点各验证一次。
+
+#### TC-ATK-06: HTTP 模式 Cookie 属性
+
+```bash
+# 登录
+curl -s -D - -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" 2>/dev/null | grep -i set-cookie
+```
+
+**预期：**
+- `access_token`: `HttpOnly; Path=/; SameSite=lax`，无 `Secure`，无 `Max-Age`
+- `csrf_token`: `Path=/; SameSite=strict`，无 `HttpOnly`（JS 需要读取），无 `Secure`
+
+```bash
+# 注册
+curl -s -D - -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"cookie-http@example.com","password":"CookieTest1!"}' 2>/dev/null | grep -i set-cookie
+```
+
+**预期：** 同上
+
+#### TC-ATK-07: HTTPS 模式 Cookie 属性
+
+> **必须直连 Gateway**（`GW=http://localhost:8001`），经 nginx 会被 `$scheme` 覆盖。
+
+```bash
+GW=http://localhost:8001
+
+# 登录（模拟 HTTPS）
+curl -s -D - -X POST $GW/api/v1/auth/login/local \
+  -H "X-Forwarded-Proto: https" \
+  -d "username=admin@example.com&password=正确密码" 2>/dev/null | grep -i set-cookie
+```
+
+**预期：**
+- `access_token`: `HttpOnly; Secure; Path=/; SameSite=lax; Max-Age=604800`
+- `csrf_token`: `Secure; Path=/; SameSite=strict`，无 `HttpOnly`
+
+```bash
+# 注册（模拟 HTTPS）
+curl -s -D - -X POST $GW/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -H "X-Forwarded-Proto: https" \
+  -d '{"email":"cookie-https@example.com","password":"CookieTest1!"}' 2>/dev/null | grep -i set-cookie
+```
+
+**预期：** 同上
+
+#### TC-ATK-07a: HTTP/HTTPS 差异对比
+
+> 直连 Gateway 执行，避免 nginx 覆盖 `X-Forwarded-Proto`。
+
+```bash
+GW=http://localhost:8001
+
+for proto in "" "https"; do
+  HEADER=""
+  LABEL="HTTP"
+  if [ -n "$proto" ]; then
+    HEADER="-H X-Forwarded-Proto:$proto"
+    LABEL="HTTPS"
+  fi
+  echo "=== $LABEL ==="
+  EMAIL="compare-${LABEL,,}-$(date +%s)@example.com"
+  curl -s -D - -X POST $GW/api/v1/auth/register \
+    -H "Content-Type: application/json" $HEADER \
+    -d "{\"email\":\"$EMAIL\",\"password\":\"Compare1!\"}" 2>/dev/null | grep -i set-cookie | while read line; do
+    if echo "$line" | grep -q "access_token="; then
+      echo "  access_token:"
+      echo "    HttpOnly: $(echo "$line" | grep -qi httponly && echo YES || echo NO)"
+      echo "    Secure:   $(echo "$line" | grep -qi "secure" && echo "$line" | grep -v samesite | grep -qi secure && echo YES || echo NO)"
+      echo "    Max-Age:  $(echo "$line" | grep -oi "max-age=[0-9]*" || echo NONE)"
+      echo "    SameSite: $(echo "$line" | grep -oi "samesite=[a-z]*")"
+    fi
+    if echo "$line" | grep -q "csrf_token="; then
+      echo "  csrf_token:"
+      echo "    HttpOnly: $(echo "$line" | grep -qi httponly && echo YES || echo NO)"
+      echo "    Secure:   $(echo "$line" | grep -qi "secure" && echo "$line" | grep -v samesite | grep -qi secure && echo YES || echo NO)"
+      echo "    SameSite: $(echo "$line" | grep -oi "samesite=[a-z]*")"
+    fi
+  done
+done
+```
+
+**预期对比表：**
+
+| 属性 | HTTP access_token | HTTPS access_token | HTTP csrf_token | HTTPS csrf_token |
+|------|------|------|------|------|
+| HttpOnly | Yes | Yes | No | No |
+| Secure | No | **Yes** | No | **Yes** |
+| SameSite | Lax | Lax | Strict | Strict |
+| Max-Age | 无（session cookie） | **604800**（7天） | 无 | 无 |
+
+### 3.4 越权访问
+
+#### TC-ATK-08: 无 cookie 访问受保护接口
+
+```bash
+for path in /api/models /api/mcp/config /api/memory /api/skills \
+            /api/agents /api/channels; do
+  echo "$path: $(curl -s -w '%{http_code}' -o /dev/null $BASE$path)"
+done
+```
+
+**预期：** 全部 401
+
+#### TC-ATK-09: 伪造 JWT
+
+```bash
+# 用不同 secret 签名的 token
+FAKE_TOKEN=$(python3 -c "
+import jwt
+print(jwt.encode({'sub':'admin-id','ver':0,'exp':9999999999}, 'wrong-secret', algorithm='HS256'))
+")
+
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me \
+  --cookie "access_token=$FAKE_TOKEN"
+```
+
+**预期：** 401（签名验证失败）
+
+#### TC-ATK-10: 过期 JWT
+
+```bash
+# 不依赖环境变量，直接用一个已过期的、随机 secret 签名的 token
+# 无论 secret 是否匹配，过期 token 都会被拒绝
+EXPIRED_TOKEN=$(python3 -c "
+import jwt, time
+print(jwt.encode({'sub':'x','ver':0,'exp':int(time.time())-100}, 'any-secret-32chars-placeholder!!', algorithm='HS256'))
+")
+
+curl -s -w "%{http_code}" -o /dev/null $BASE/api/v1/auth/me \
+  --cookie "access_token=$EXPIRED_TOKEN"
+```
+
+**预期：** 401（过期 or 签名不匹配，均被拒绝）
+
+### 3.5 密码安全
+
+#### TC-ATK-11: 密码长度不足
+
+```bash
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"short@example.com","password":"1234567"}' -w "\nHTTP %{http_code}"
+```
+
+**预期：** 422（Pydantic validation: min_length=8）
+
+#### TC-ATK-12: 密码不以明文存储
+
+```bash
+# 检查数据库
+sqlite3 backend/.deer-flow/users.db "SELECT email, password_hash FROM users LIMIT 3;"
+```
+
+**预期：** `password_hash` 以 `$2b$` 开头（bcrypt 格式）
+
+---
+
+## 四、UI 操作测试
+
+> 浏览器中操作，验证前后端联动。
+
+### 4.1 首次登录流程
+
+#### TC-UI-01: 访问首页跳转登录
+
+1. 打开 `http://localhost:2026/workspace`
+2. **预期：** 自动跳转到 `/login`
+
+#### TC-UI-02: Login 页面
+
+1. 输入 admin 邮箱和控制台密码
+2. 点击 Login
+3. **预期：** 跳转到 `/setup`（因为 `needs_setup=true`）
+
+#### TC-UI-03: Setup 页面
+
+1. 输入新邮箱、控制台密码（current）、新密码、确认密码
+2. 点击 Complete Setup
+3. **预期：** 跳转到 `/workspace`
+4. 刷新页面不跳回 `/setup`
+
+#### TC-UI-04: Setup 密码不匹配
+
+1. 新密码和确认密码不一致
+2. 点击 Complete Setup
+3. **预期：** 显示 "Passwords do not match" 错误
+
+### 4.2 日常使用
+
+#### TC-UI-05: 创建对话
+
+1. 在 workspace 发送一条消息
+2. **预期：** 左侧栏出现新 thread
+
+#### TC-UI-06: 对话持久化
+
+1. 创建对话后刷新页面
+2. **预期：** 对话列表和内容仍然存在
+
+#### TC-UI-07: 登出
+
+1. 点击头像 → Logout
+2. **预期：** 跳转到首页 `/`
+3. 直接访问 `/workspace` → 跳转到 `/login`
+
+### 4.3 多用户隔离
+
+#### TC-UI-08: 用户 A 看不到用户 B 的对话
+
+1. 用户 A 在浏览器 1 登录，创建一个对话并发消息
+2. 用户 B 在浏览器 2（或隐身窗口）注册并登录
+3. **预期：** 用户 B 的 workspace 左侧栏为空，看不到用户 A 的对话
+
+#### TC-UI-09: 直接 URL 访问他人 Thread
+
+1. 复制用户 A 的 thread URL
+2. 在用户 B 的浏览器中访问
+3. **预期：** 404 或空白页，不显示对话内容
+
+### 4.4 Session 管理
+
+#### TC-UI-10: Tab 切换 Session 检查
+
+1. 登录 workspace
+2. 切换到其他 tab 等待 60+ 秒
+3. 切回 workspace tab
+4. **预期：** 静默检查 session，页面正常（控制台无 401 刷屏）
+
+#### TC-UI-11: Session 过期后 Tab 切回
+
+1. 登录 workspace
+2. 在另一个 tab 改密码（使当前 session 失效）
+3. 切回 workspace tab
+4. **预期：** 自动跳转到 `/login`
+
+#### TC-UI-12: 改密码后 Settings 页面
+
+1. 进入 Settings → Account
+2. 修改密码
+3. **预期：** 成功提示，页面不需要重新登录（cookie 已自动更新）
+
+### 4.5 注册流程
+
+#### TC-UI-13: 从登录页跳转注册
+
+1. 在 `/login` 页面点击注册链接
+2. 输入邮箱和密码
+3. **预期：** 注册成功后自动跳转 `/workspace`
+
+#### TC-UI-14: 重复邮箱注册
+
+1. 用已注册的邮箱尝试注册
+2. **预期：** 显示 "Email already registered" 错误
+
+### 4.6 密码重置（CLI）
+
+#### TC-UI-15: reset_admin 后重新登录
+
+1. 执行 `cd backend && python -m app.gateway.auth.reset_admin`
+2. 使用新密码登录
+3. **预期：** 跳转到 `/setup` 页面（`needs_setup` 被重置为 true）
+4. 旧 session 已失效
+
+---
+
+## 五、升级测试
+
+> 模拟从无 auth 版本（main 分支）升级到 auth 版本（feat/rfc-001-auth-module）。
+
+### 5.1 准备旧版数据
+
+```bash
+# 1. 切到 main 分支，启动服务
+git stash && git checkout main
+make dev
+
+# 2. 创建一些对话数据（无 auth，直接访问）
+curl -s -X POST http://localhost:2026/api/langgraph/threads \
+  -H "Content-Type: application/json" \
+  -d '{"metadata":{"title":"old-thread-1"}}' | jq .thread_id
+
+curl -s -X POST http://localhost:2026/api/langgraph/threads \
+  -H "Content-Type: application/json" \
+  -d '{"metadata":{"title":"old-thread-2"}}' | jq .thread_id
+
+# 3. 记录 thread 数量
+curl -s http://localhost:2026/api/langgraph/threads | jq length
+# 预期: 2+
+
+# 4. 停止服务
+make stop
+```
+
+### 5.2 升级并启动
+
+```bash
+# 5. 切到 auth 分支
+git checkout feat/rfc-001-auth-module && git stash pop
+make install
+make dev
+```
+
+#### TC-UPG-01: 首次启动创建 admin
+
+**预期：**
+- [ ] 控制台输出 admin 邮箱（`admin@deerflow.dev`）和随机密码
+- [ ] 无报错，正常启动
+
+#### TC-UPG-02: 旧 Thread 迁移到 admin
+
+```bash
+# 登录 admin
+curl -s -X POST http://localhost:2026/api/v1/auth/login/local \
+  -d "username=admin@deerflow.dev&password=<控制台密码>" \
+  -c cookies.txt
+
+# 查看 thread 列表
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+curl -s -X POST http://localhost:2026/api/threads/search \
+  -b cookies.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{}' | jq length
+```
+
+**预期：**
+- [ ] 返回的 thread 数量 ≥ 旧版创建的数量
+- [ ] 控制台日志有 `Migrated N orphaned thread(s) to admin`
+- [ ] 每个 thread 的 `metadata.owner_id` 都已被设为 admin 的 ID
+
+#### TC-UPG-03: 旧 Thread 内容完整
+
+```bash
+# 检查某个旧 thread 的内容
+curl -s http://localhost:2026/api/threads/<old-thread-id> \
+  -b cookies.txt | jq .metadata
+```
+
+**预期：**
+- [ ] `metadata.title` 保留原值（如 `old-thread-1`）
+- [ ] `metadata.owner_id` 已填充
+
+#### TC-UPG-04: 新用户看不到旧 Thread
+
+```bash
+# 注册新用户
+curl -s -X POST http://localhost:2026/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"newuser@example.com","password":"NewPass123!"}' \
+  -c newuser.txt
+
+CSRF2=$(grep csrf_token newuser.txt | awk '{print $NF}')
+curl -s -X POST http://localhost:2026/api/threads/search \
+  -b newuser.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF2" \
+  -d '{}' | jq length
+```
+
+**预期：** 返回 0（旧 thread 属于 admin，新用户不可见）
+
+### 5.3 数据库 Schema 兼容
+
+#### TC-UPG-05: 无 users.db 时自动创建
+
+```bash
+ls -la backend/.deer-flow/users.db
+```
+
+**预期：** 文件存在，`sqlite3` 可查到 `users` 表含 `needs_setup`、`token_version` 列
+
+#### TC-UPG-06: users.db WAL 模式
+
+```bash
+sqlite3 backend/.deer-flow/users.db "PRAGMA journal_mode;"
+```
+
+**预期：** 返回 `wal`
+
+### 5.4 配置兼容
+
+#### TC-UPG-07: 无 AUTH_JWT_SECRET 的旧 .env 文件
+
+```bash
+# 确认 .env 中没有 AUTH_JWT_SECRET
+grep AUTH_JWT_SECRET backend/.env || echo "NOT SET"
+```
+
+**预期：**
+- [ ] 启动时 warning：`AUTH_JWT_SECRET is not set — using auto-generated ephemeral secret`
+- [ ] 服务正常可用
+- [ ] 重启后旧 session 失效（临时密钥变了）
+
+#### TC-UPG-08: 旧 config.yaml 无 auth 相关配置
+
+```bash
+# 检查 config.yaml 没有 auth 段
+grep -c "auth" config.yaml || echo "0"
+```
+
+**预期：** auth 模块不依赖 config.yaml（配置走环境变量），旧 config.yaml 不影响启动
+
+### 5.5 前端兼容
+
+#### TC-UPG-09: 旧前端缓存
+
+1. 用旧版前端的浏览器缓存访问升级后的服务
+2. **预期：** 被 AuthMiddleware 拦截返回 401（旧前端无 cookie），页面自然刷新后加载新前端
+
+#### TC-UPG-10: 书签 URL
+
+1. 用升级前保存的 workspace URL（如 `localhost:2026/workspace/chats/xxx`）直接访问
+2. **预期：** 跳转到 `/login`，登录后跳回原 URL（`?next=` 参数）
+
+### 5.6 降级回滚
+
+#### TC-UPG-11: 回退到 main 分支
+
+```bash
+make stop
+git checkout main
+make dev
+```
+
+**预期：**
+- [ ] 服务正常启动（忽略 `users.db`，无 auth 相关代码不报错）
+- [ ] 旧对话数据仍然可访问
+- [ ] `users.db` 文件残留但不影响运行
+
+#### TC-UPG-12: 再次升级到 auth 分支
+
+```bash
+make stop
+git checkout feat/rfc-001-auth-module
+make dev
+```
+
+**预期：**
+- [ ] 识别已有 `users.db`，不重新创建 admin
+- [ ] 旧的 admin 账号仍可登录（如果回退期间未删 `users.db`）
+
+### 5.7 休眠 Admin（初始密码未使用/未更改）
+
+> 首次启动生成 admin + 随机密码，但运维未登录、未改密码。
+> 密码只在首次启动的控制台闪过一次，后续启动不再显示。
+
+#### TC-UPG-13: 重启后自动重置密码并打印
+
+```bash
+# 首次启动，记录密码
+rm -f backend/.deer-flow/users.db
+make dev
+# 控制台输出密码 P0，不登录
+make stop
+
+# 隔了几天，再次启动
+make dev
+# 控制台输出新密码 P1
+```
+
+**预期：**
+- [ ] 控制台输出 `Admin account setup incomplete — password reset`
+- [ ] 输出新密码 P1（P0 已失效）
+- [ ] 用 P1 可以登录，P0 不可以
+- [ ] 登录后 `needs_setup=true`，跳转 `/setup`
+- [ ] `token_version` 递增（旧 session 如有也失效）
+
+#### TC-UPG-14: 密码丢失 — 无需 CLI，重启即可
+
+```bash
+# 忘记了控制台密码 → 直接重启服务
+make stop && make dev
+# 控制台自动输出新密码
+```
+
+**预期：**
+- [ ] 无需 `reset_admin`，重启服务即可拿到新密码
+- [ ] `reset_admin` CLI 仍然可用作手动备选方案
+
+#### TC-UPG-15: 休眠 admin 期间普通用户注册
+
+```bash
+# admin 存在但从未登录，普通用户先注册
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"earlybird@example.com","password":"EarlyPass1!"}' \
+  -c early.txt -w "\nHTTP %{http_code}"
+```
+
+**预期：**
+- [ ] 注册成功（201），角色为 `user`
+- [ ] 无法提权为 admin
+- [ ] 普通用户的数据与 admin 隔离
+
+#### TC-UPG-16: 休眠 admin 不影响后续操作
+
+```bash
+# 普通用户正常创建 thread、发消息
+CSRF=$(grep csrf_token early.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/threads \
+  -b early.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{"metadata":{}}' | jq .thread_id
+```
+
+**预期：** 正常创建，不受休眠 admin 影响
+
+#### TC-UPG-17: 休眠 admin 最终完成 Setup
+
+```bash
+# 运维终于登录
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@deerflow.dev&password=<P0或P1>" \
+  -c admin.txt | jq .needs_setup
+# 预期: true
+
+# 完成 setup
+CSRF=$(grep csrf_token admin.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/v1/auth/change-password \
+  -b admin.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{"current_password":"<密码>","new_password":"AdminFinal1!","new_email":"admin@real.com"}' \
+  -c admin.txt
+
+# 验证
+curl -s $BASE/api/v1/auth/me -b admin.txt | jq '{email, needs_setup}'
+```
+
+**预期：**
+- [ ] `email` 变为 `admin@real.com`
+- [ ] `needs_setup` 变为 `false`
+- [ ] 后续重启控制台不再有 warning
+
+#### TC-UPG-18: 长期未用后 JWT 密钥轮换
+
+```bash
+# 场景：admin 未登录期间，运维更换了 AUTH_JWT_SECRET
+# 1. 首次启动用自动生成的临时密钥
+# 2. 某天运维在 .env 设置了固定密钥
+echo "AUTH_JWT_SECRET=$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')" >> .env
+make stop && make dev
+```
+
+**预期：**
+- [ ] 服务正常启动
+- [ ] 旧密码仍可登录（密码存在 DB，与 JWT 密钥无关）
+- [ ] 旧的 JWT token 失效（密钥变了签名不匹配）— 但因为从未登录过也没有旧 token
+
+---
+
+## 六、可重入测试
+
+> 验证 auth 模块在重复操作、并发、中断恢复等场景下行为正确，无竞态条件。
+
+### 6.1 启动可重入
+
+#### TC-REENT-01: 连续重启不重复创建 admin
+
+```bash
+# 连续启动 3 次（daemon 模式，避免前台阻塞）
+for i in 1 2 3; do
+  make dev-daemon && sleep 10 && make stop
+done
+
+# 检查 admin 数量
+sqlite3 backend/.deer-flow/users.db \
+  "SELECT COUNT(*) FROM users WHERE system_role='admin';"
+```
+
+**预期：** 始终为 1。不会因重启创建多个 admin。
+
+#### TC-REENT-02: 多进程同时启动
+
+```bash
+# 模拟两个 gateway 进程同时启动（竞争 admin 创建）
+cd backend
+PYTHONPATH=. uv run python -c "
+import asyncio
+from app.gateway.app import create_app, _ensure_admin_user
+
+async def boot():
+    app = create_app()
+    # 模拟两个并发 ensure_admin
+    await asyncio.gather(
+        _ensure_admin_user(app),
+        _ensure_admin_user(app),
+    )
+
+asyncio.run(boot())
+" 2>&1 | grep -i "admin\|error\|duplicate"
+```
+
+**预期：**
+- [ ] 不报错（SQLite UNIQUE 约束捕获竞争，第二个静默跳过）
+- [ ] 最终只有 1 个 admin
+
+#### TC-REENT-03: Thread 迁移幂等
+
+```bash
+# 连续调用 _migrate_orphaned_threads 两次
+# 第二次应无 thread 需要迁移（已有 user_id）
+```
+
+**预期：** 第二次 `migrated = 0`，无副作用
+
+### 6.2 登录可重入
+
+#### TC-REENT-04: 重复登录获取新 cookie
+
+```bash
+# 同一用户连续登录 3 次
+for i in 1 2 3; do
+  curl -s -X POST $BASE/api/v1/auth/login/local \
+    -d "username=admin@example.com&password=正确密码" \
+    -c "cookies_$i.txt" -o /dev/null
+done
+
+# 三个 cookie 都有效
+for i in 1 2 3; do
+  echo "Cookie $i: $(curl -s -w '%{http_code}' -o /dev/null $BASE/api/v1/auth/me -b cookies_$i.txt)"
+done
+```
+
+**预期：** 三个 cookie 都返回 200（未改密码，token_version 相同，多 session 共存）
+
+#### TC-REENT-05: 登录-登出-登录
+
+```bash
+# 登录
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" \
+  -c cookies.txt -o /dev/null
+
+# 登出
+curl -s -X POST $BASE/api/v1/auth/logout -b cookies.txt -o /dev/null
+
+# 再次登录
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" \
+  -c cookies.txt
+
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me -b cookies.txt
+```
+
+**预期：** 200。登出→再登录流程无状态残留。
+
+### 6.3 改密码可重入
+
+#### TC-REENT-06: 连续两次改密码
+
+```bash
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+
+# 第一次改密码
+curl -s -X POST $BASE/api/v1/auth/change-password \
+  -b cookies.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{"current_password":"Pass1","new_password":"Pass2"}' \
+  -c cookies.txt
+
+# 用新 cookie 的 CSRF 再改一次
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/v1/auth/change-password \
+  -b cookies.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{"current_password":"Pass2","new_password":"Pass3"}' \
+  -c cookies.txt
+
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me -b cookies.txt
+```
+
+**预期：**
+- [ ] 两次改密码都成功
+- [ ] 最终密码为 Pass3
+- [ ] `token_version` 递增两次（+2）
+- [ ] 最新 cookie 有效
+
+#### TC-REENT-07: 改密码后旧 cookie 全部失效
+
+```bash
+# 保存三个时间点的 cookie
+# t1: 初始登录 → cookies_t1.txt
+# t2: 第一次改密码后 → cookies_t2.txt
+# t3: 第二次改密码后 → cookies_t3.txt
+
+# 用 t1 和 t2 的 cookie 访问
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me -b cookies_t1.txt  # 预期 401
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me -b cookies_t2.txt  # 预期 401
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me -b cookies_t3.txt  # 预期 200
+```
+
+**预期：** 只有最新的 cookie 有效，历史 cookie 因 token_version 不匹配全部 401
+
+### 6.4 注册可重入
+
+#### TC-REENT-08: 同一邮箱并发注册
+
+```bash
+# 并发发送两个相同邮箱的注册请求
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"race@example.com","password":"RacePass1!"}' &
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"race@example.com","password":"RacePass1!"}' &
+wait
+
+# 检查用户数
+sqlite3 backend/.deer-flow/users.db \
+  "SELECT COUNT(*) FROM users WHERE email='race@example.com';"
+```
+
+**预期：**
+- [ ] 一个成功（201），一个失败（400 `email_already_exists`）
+- [ ] 数据库中只有 1 条记录（UNIQUE 约束保护）
+
+### 6.5 Rate Limiter 可重入
+
+#### TC-REENT-09: 限速过期后重新计数
+
+```bash
+# 触发锁定（5 次错误）
+for i in $(seq 1 5); do
+  curl -s -o /dev/null -X POST $BASE/api/v1/auth/login/local \
+    -d "username=admin@example.com&password=wrong"
+done
+
+# 确认被锁定
+curl -s -w "%{http_code}" -o /dev/null -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=wrong"
+# 预期: 429
+
+# 等待锁定过期（5 分钟）或重启服务清除内存计数器
+make stop && make dev
+
+# 重新尝试 — 计数器应已重置
+curl -s -w "%{http_code}" -o /dev/null -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=wrong"
+# 预期: 401（不是 429）
+```
+
+**预期：** 锁定过期后恢复正常限速（从 0 开始计数），而非累积
+
+#### TC-REENT-10: 成功登录重置计数后再次失败
+
+```bash
+# 3 次失败
+for i in $(seq 1 3); do
+  curl -s -o /dev/null -X POST $BASE/api/v1/auth/login/local \
+    -d "username=admin@example.com&password=wrong"
+done
+
+# 1 次成功（重置计数）
+curl -s -o /dev/null -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码"
+
+# 再 4 次失败（从 0 重新计数，未达阈值 5）
+for i in $(seq 1 4); do
+  curl -s -w "attempt $i: %{http_code}\n" -o /dev/null -X POST $BASE/api/v1/auth/login/local \
+    -d "username=admin@example.com&password=wrong"
+done
+```
+
+**预期：** 4 次全部返回 401（未锁定），因为成功登录已重置计数器
+
+### 6.6 CSRF Token 可重入
+
+#### TC-REENT-11: 登录后多次 POST 使用同一 CSRF token
+
+```bash
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+
+# 同一 CSRF token 多次使用
+for i in 1 2 3; do
+  echo "Request $i: $(curl -s -w '%{http_code}' -o /dev/null \
+    -X POST $BASE/api/threads \
+    -b cookies.txt \
+    -H 'Content-Type: application/json' \
+    -H "X-CSRF-Token: $CSRF" \
+    -d '{"metadata":{}}')"
+done
+```
+
+**预期：** 三次都成功（CSRF token 是 Double Submit Cookie，不是一次性 nonce）
+
+### 6.7 Thread 操作可重入
+
+#### TC-REENT-12: 重复删除同一 Thread
+
+```bash
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+
+# 创建 thread
+TID=$(curl -s -X POST $BASE/api/threads \
+  -b cookies.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{"metadata":{}}' | jq -r .thread_id)
+
+# 第一次删除
+curl -s -w "%{http_code}" -X DELETE "$BASE/api/threads/$TID" \
+  -b cookies.txt -H "X-CSRF-Token: $CSRF"
+# 预期: 200
+
+# 第二次删除（幂等）
+curl -s -w "%{http_code}" -X DELETE "$BASE/api/threads/$TID" \
+  -b cookies.txt -H "X-CSRF-Token: $CSRF"
+```
+
+**预期：** 第二次返回 200 或 404，不报 500
+
+### 6.8 reset_admin 可重入
+
+#### TC-REENT-13: 连续两次 reset_admin
+
+```bash
+cd backend
+python -m app.gateway.auth.reset_admin
+# 记录密码 P1
+
+python -m app.gateway.auth.reset_admin
+# 记录密码 P2
+```
+
+**预期：**
+- [ ] P1 ≠ P2（每次生成新随机密码）
+- [ ] P1 不可用，只有 P2 有效
+- [ ] `token_version` 递增了 2
+- [ ] `needs_setup` 为 True
+
+### 6.9 Setup 流程可重入
+
+#### TC-REENT-14: 完成 Setup 后再访问 /setup 页面
+
+1. 完成 admin setup（改邮箱 + 改密码）
+2. 直接访问 `/setup`
+3. **预期：** 应跳转到 `/workspace`（`needs_setup` 已为 false，SSR guard 不会返回 `needs_setup` tag）
+
+#### TC-REENT-15: Setup 中途刷新页面
+
+1. 在 `/setup` 页面填写一半
+2. 刷新页面
+3. **预期：** 仍在 `/setup`（`needs_setup` 仍为 true），表单清空但不报错
+
+---
+
+## 七、模式差异测试
+
+> 以下用 `GW=http://localhost:8001` 表示直连 Gateway，`BASE=http://localhost:2026` 表示经 nginx。
+> Gateway 模式启动命令：`make dev-pro`（或 `./scripts/serve.sh --dev --gateway`）。
+
+### 7.1 标准模式独有
+
+> 启动命令：`make dev`（或 `./scripts/serve.sh --dev`）
+
+#### TC-MODE-01: LangGraph Server 独立运行，需 cookie
+
+```bash
+# 无 cookie 访问 LangGraph
+curl -s -w "%{http_code}" -o /dev/null $BASE/api/langgraph/threads/search
+# 预期: 403（LangGraph auth handler 拒绝）
+```
+
+#### TC-MODE-02: LangGraph auth 的 token_version 检查
+
+```bash
+# 登录拿 cookie
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" -c cookies.txt
+
+# 改密码（bumps token_version）
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/v1/auth/change-password \
+  -b cookies.txt -H "Content-Type: application/json" -H "X-CSRF-Token: $CSRF" \
+  -d '{"current_password":"正确密码","new_password":"NewPass1!"}' -c new_cookies.txt
+
+# 用旧 cookie 访问 LangGraph
+curl -s -w "%{http_code}" $BASE/api/langgraph/threads/search -b cookies.txt
+# 预期: 403（token_version 不匹配）
+
+# 用新 cookie 访问
+CSRF2=$(grep csrf_token new_cookies.txt | awk '{print $NF}')
+curl -s -w "%{http_code}" -X POST $BASE/api/langgraph/threads/search \
+  -b new_cookies.txt -H "Content-Type: application/json" -H "X-CSRF-Token: $CSRF2" -d '{}'
+# 预期: 200
+```
+
+#### TC-MODE-03: LangGraph auth 的 owner filter 隔离
+
+```bash
+# user1 创建 thread
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=user1@example.com&password=UserPass1!" -c u1.txt
+CSRF1=$(grep csrf_token u1.txt | awk '{print $NF}')
+TID=$(curl -s -X POST $BASE/api/langgraph/threads \
+  -b u1.txt -H "Content-Type: application/json" -H "X-CSRF-Token: $CSRF1" \
+  -d '{"metadata":{}}' | python3 -c "import sys,json; print(json.load(sys.stdin)['thread_id'])")
+
+# user2 搜索 — 应看不到 user1 的 thread
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=user2@example.com&password=UserPass2!" -c u2.txt
+CSRF2=$(grep csrf_token u2.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/langgraph/threads/search \
+  -b u2.txt -H "Content-Type: application/json" -H "X-CSRF-Token: $CSRF2" -d '{}' | python3 -c "
+import sys,json
+threads = json.load(sys.stdin)
+ids = [t['thread_id'] for t in threads]
+assert '$TID' not in ids, 'LEAK: user2 can see user1 thread'
+print('OK: user2 sees', len(threads), 'threads, none belong to user1')
+"
+```
+
+### 7.2 Gateway 模式独有
+
+> 启动命令：`make dev-pro`（或 `./scripts/serve.sh --dev --gateway`）
+> 无 LangGraph Server 进程，agent runtime 嵌入 Gateway。
+
+#### TC-MODE-04: 所有请求经 AuthMiddleware
+
+```bash
+# 确认 LangGraph Server 未运行
+curl -s -w "%{http_code}" -o /dev/null http://localhost:2024/ok
+# 预期: 000（连接被拒）
+
+# Gateway API 受保护
+curl -s -w "%{http_code}" -o /dev/null $BASE/api/models
+# 预期: 401
+
+# LangGraph 兼容路由（rewrite 到 Gateway）也受保护
+curl -s -w "%{http_code}" -o /dev/null -X POST $BASE/api/langgraph/threads/search \
+  -H "Content-Type: application/json" -d '{}'
+# 预期: 401
+```
+
+#### TC-MODE-05: Gateway 模式下完整 auth 流程
+
+```bash
+# 登录
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" -c cookies.txt
+
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+
+# 创建 thread（走 Gateway 内嵌 runtime）
+curl -s -X POST $BASE/api/langgraph/threads \
+  -b cookies.txt -H "Content-Type: application/json" -H "X-CSRF-Token: $CSRF" \
+  -d '{"metadata":{}}' | python3 -c "import sys,json; print(json.load(sys.stdin)['thread_id'])"
+# 预期: 返回 thread_id
+
+# CSRF 保护（Gateway 模式下 CSRFMiddleware 直接覆盖所有路由）
+curl -s -w "%{http_code}" -o /dev/null -X POST $BASE/api/langgraph/threads \
+  -b cookies.txt -H "Content-Type: application/json" -d '{"metadata":{}}'
+# 预期: 403（CSRF token missing）
+```
+
+### 7.3 直连 Gateway（无 nginx）
+
+> 启动命令：`cd backend && make gateway`（端口 8001）
+> 不经过 nginx，直接测试 Gateway 的 auth 层。
+
+#### TC-GW-01: AuthMiddleware 保护所有非 public 路由
+
+```bash
+GW=http://localhost:8001
+
+for path in /api/models /api/mcp/config /api/memory /api/skills \
+            /api/v1/auth/me /api/v1/auth/change-password; do
+  echo "$path: $(curl -s -w '%{http_code}' -o /dev/null $GW$path)"
+done
+# 预期: 全部 401
+```
+
+#### TC-GW-02: Public 路由不需要 cookie
+
+```bash
+GW=http://localhost:8001
+
+for path in /health /api/v1/auth/setup-status /api/v1/auth/login/local /api/v1/auth/register; do
+  echo "$path: $(curl -s -w '%{http_code}' -o /dev/null $GW$path)"
+done
+# 预期: 200 或 405/422（方法不对但不是 401）
+```
+
+#### TC-GW-03: 直连 Gateway 注册 + 登录 + CSRF 完整流程
+
+```bash
+GW=http://localhost:8001
+
+# 注册
+curl -s -X POST $GW/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"gwtest@example.com","password":"GwTest123!"}' \
+  -c gw_cookies.txt -w "\nHTTP %{http_code}"
+# 预期: 201
+
+# 登录
+curl -s -X POST $GW/api/v1/auth/login/local \
+  -d "username=gwtest@example.com&password=GwTest123!" \
+  -c gw_cookies.txt -w "\nHTTP %{http_code}"
+# 预期: 200
+
+# GET（不需要 CSRF）
+curl -s -w "%{http_code}" $GW/api/models -b gw_cookies.txt
+# 预期: 200
+
+# POST 无 CSRF
+curl -s -w "%{http_code}" -o /dev/null -X POST $GW/api/memory/reload -b gw_cookies.txt
+# 预期: 403（CSRF token missing）
+
+# POST 有 CSRF
+CSRF=$(grep csrf_token gw_cookies.txt | awk '{print $NF}')
+curl -s -w "%{http_code}" -o /dev/null -X POST $GW/api/memory/reload \
+  -b gw_cookies.txt -H "X-CSRF-Token: $CSRF"
+# 预期: 200
+```
+
+#### TC-GW-04: 直连 Gateway 的 Rate Limiter
+
+```bash
+GW=http://localhost:8001
+
+# 直连时 request.client.host 是真实 IP（无 nginx 代理），不读 X-Real-IP
+for i in $(seq 1 6); do
+  echo -n "attempt $i: "
+  curl -s -w "%{http_code}\n" -o /dev/null -X POST $GW/api/v1/auth/login/local \
+    -d "username=admin@example.com&password=wrong"
+done
+# 预期: 前 5 次 401，第 6 次 429
+```
+
+#### TC-GW-05: 直连 Gateway 不受 X-Real-IP 欺骗
+
+```bash
+GW=http://localhost:8001
+
+# 直连时 client.host 不是 trusted proxy，X-Real-IP 被忽略
+for i in $(seq 1 6); do
+  echo -n "attempt $i (X-Real-IP spoofed): "
+  curl -s -w "%{http_code}\n" -o /dev/null -X POST $GW/api/v1/auth/login/local \
+    -H "X-Real-IP: 10.0.0.$i" \
+    -d "username=admin@example.com&password=wrong"
+done
+# 预期: 前 5 次 401，第 6 次 429（伪造的 X-Real-IP 无效，所有请求共享真实 IP 的桶）
+```
+
+### 7.4 Docker 部署
+
+> 启动命令：`./scripts/deploy.sh`（标准）或 `./scripts/deploy.sh --gateway`（Gateway 模式）
+> Docker Compose 文件：`docker/docker-compose.yaml`
+>
+> 前置条件：
+> - `.env` 中设置 `AUTH_JWT_SECRET`（否则每次容器重启 session 全部失效）
+> - `DEER_FLOW_HOME` 挂载到宿主机目录（持久化 `users.db`）
+
+#### TC-DOCKER-01: users.db 通过 volume 持久化
+
+```bash
+# 启动容器
+./scripts/deploy.sh
+
+# 等待启动完成
+sleep 15
+BASE=http://localhost:2026
+
+# 注册用户
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"docker-test@example.com","password":"DockerTest1!"}' -w "\nHTTP %{http_code}"
+
+# 检查宿主机上的 users.db
+ls -la ${DEER_FLOW_HOME:-backend/.deer-flow}/users.db
+sqlite3 ${DEER_FLOW_HOME:-backend/.deer-flow}/users.db \
+  "SELECT email FROM users WHERE email='docker-test@example.com';"
+```
+
+**预期：** users.db 在宿主机 `DEER_FLOW_HOME` 目录中，查询可见刚注册的用户。
+
+#### TC-DOCKER-02: 重启容器后 session 保持
+
+```bash
+# 登录拿 cookie
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=docker-test@example.com&password=DockerTest1!" \
+  -c docker_cookies.txt -o /dev/null
+
+# 验证 cookie 有效
+curl -s -w "%{http_code}" -o /dev/null $BASE/api/v1/auth/me -b docker_cookies.txt
+# 预期: 200
+
+# 重启容器（不删 volume）
+./scripts/deploy.sh down && ./scripts/deploy.sh
+sleep 15
+
+# 用旧 cookie 访问
+curl -s -w "%{http_code}" -o /dev/null $BASE/api/v1/auth/me -b docker_cookies.txt
+```
+
+**预期：**
+- 有 `AUTH_JWT_SECRET` → 200（session 保持）
+- 无 `AUTH_JWT_SECRET` → 401（每次启动生成新临时密钥，旧 JWT 签名失效）
+
+#### TC-DOCKER-03: 多 Worker 下 Rate Limiter 独立
+
+```bash
+# docker-compose.yaml 中 gateway 默认 4 workers
+# 每个 worker 有独立的 _login_attempts dict
+# 限速可能不精确（请求分散到不同 worker），但不会完全失效
+
+for i in $(seq 1 20); do
+  echo -n "attempt $i: "
+  curl -s -w "%{http_code}\n" -o /dev/null -X POST $BASE/api/v1/auth/login/local \
+    -d "username=docker-test@example.com&password=wrong"
+done
+```
+
+**预期：** 在某个点开始返回 429（每个 worker 独立计数，阈值可能在 5~20 之间触发，取决于负载均衡分布）。
+
+**已知限制：** In-process rate limiter 不跨 worker 共享。生产环境如需精确限速，需要 Redis 等外部存储。
+
+#### TC-DOCKER-04: IM 渠道不经过 auth
+
+```bash
+# IM 渠道（Feishu/Slack/Telegram）在 gateway 容器内部通过 LangGraph SDK 通信
+# 不走 nginx，不经过 AuthMiddleware
+
+# 验证方式：检查 gateway 日志中 channel manager 的请求不包含 auth 错误
+docker logs deer-flow-gateway 2>&1 | grep -E "ChannelManager|channel" | head -10
+```
+
+**预期：** 无 auth 相关错误。渠道通过 `langgraph-sdk` 直连 LangGraph Server（`http://langgraph:2024`），不走 auth 层。
+
+#### TC-DOCKER-05: admin 密码写入 0600 凭证文件（不再走日志）
+
+```bash
+# 凭证文件写在挂载到宿主机的 DEER_FLOW_HOME 下
+ls -la ${DEER_FLOW_HOME:-backend/.deer-flow}/admin_initial_credentials.txt
+# 预期文件权限: -rw------- (0600)
+
+cat ${DEER_FLOW_HOME:-backend/.deer-flow}/admin_initial_credentials.txt
+# 预期内容: email + password 行
+
+# 容器日志只输出文件路径，不输出密码本身
+docker logs deer-flow-gateway 2>&1 | grep -E "Credentials written to|Admin account"
+# 预期看到: "Credentials written to: /...../admin_initial_credentials.txt (mode 0600)"
+
+# 反向验证: 日志里 NEVER 出现明文密码
+docker logs deer-flow-gateway 2>&1 | grep -iE "Password: .{15,}" && echo "FAIL: leaked" || echo "OK: not leaked"
+```
+
+**预期：**
+- 凭证文件存在于 `DEER_FLOW_HOME` 下，权限 `0600`
+- 容器日志输出**路径**（不是密码本身），符合 CodeQL `py/clear-text-logging-sensitive-data` 规则
+- `grep "Password:"` 在日志中**应当无匹配**（旧行为已废弃，simplify pass 移除了日志泄露路径）
+
+#### TC-DOCKER-06: Gateway 模式 Docker 部署
+
+```bash
+# Gateway 模式：无 langgraph 容器
+./scripts/deploy.sh --gateway
+sleep 15
+
+# 确认 langgraph 容器不存在
+docker ps --filter name=deer-flow-langgraph --format '{{.Names}}' | wc -l
+# 预期: 0
+
+# auth 流程正常
+curl -s -w "%{http_code}" -o /dev/null $BASE/api/models
+# 预期: 401
+
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@deerflow.dev&password=<日志密码>" \
+  -c cookies.txt -w "\nHTTP %{http_code}"
+# 预期: 200
+```
+
+### 7.4 补充边界用例
+
+#### TC-EDGE-01: 格式正确但随机 JWT
+
+```bash
+RANDOM_JWT=$(python3 -c "
+import jwt, time, uuid
+print(jwt.encode({'sub':str(uuid.uuid4()),'ver':0,'exp':int(time.time())+3600}, 'wrong-secret-32chars-placeholder!!', algorithm='HS256'))
+")
+curl -s --cookie "access_token=$RANDOM_JWT" $BASE/api/v1/auth/me | jq .detail
+```
+
+**预期：** `{"code": "token_invalid", "message": "Token error: invalid_signature"}`
+
+#### TC-EDGE-02: 注册时传 system_role=admin
+
+```bash
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"hacker@example.com","password":"HackPass1!","system_role":"admin"}' | jq .system_role
+```
+
+**预期：** `"user"`（`system_role` 字段被忽略）
+
+#### TC-EDGE-03: 并发改密码
+
+```bash
+# 注册用户，登录两个 session
+curl -s -X POST $BASE/api/v1/auth/register \
+  -H "Content-Type: application/json" \
+  -d '{"email":"edge03@example.com","password":"EdgePass3!"}' -o /dev/null
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=edge03@example.com&password=EdgePass3!" -c s1.txt -o /dev/null
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=edge03@example.com&password=EdgePass3!" -c s2.txt -o /dev/null
+
+CSRF1=$(grep csrf_token s1.txt | awk '{print $NF}')
+CSRF2=$(grep csrf_token s2.txt | awk '{print $NF}')
+
+# 并发改密码
+curl -s -w "S1: %{http_code}\n" -o /dev/null -X POST $BASE/api/v1/auth/change-password \
+  -b s1.txt -H "Content-Type: application/json" -H "X-CSRF-Token: $CSRF1" \
+  -d '{"current_password":"EdgePass3!","new_password":"NewEdge3a!"}' &
+curl -s -w "S2: %{http_code}\n" -o /dev/null -X POST $BASE/api/v1/auth/change-password \
+  -b s2.txt -H "Content-Type: application/json" -H "X-CSRF-Token: $CSRF2" \
+  -d '{"current_password":"EdgePass3!","new_password":"NewEdge3b!"}' &
+wait
+```
+
+**预期：** 一个 200、一个 400（current_password 已变导致验证失败）。极端并发下可能两个都 200（SQLite 串行写），但最终只有一个密码生效。
+
+#### TC-EDGE-04: Cookie SameSite 验证
+
+> 完整的 HTTP/HTTPS cookie 属性对比见 §3.3 TC-ATK-06/07/07a。
+
+```bash
+curl -s -D - -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" 2>/dev/null | grep -i set-cookie
+```
+
+**预期：** `access_token` → `SameSite=lax`，`csrf_token` → `SameSite=strict`
+
+#### TC-EDGE-05: HTTP 无 max_age / HTTPS 有 max_age
+
+```bash
+# HTTP
+curl -s -D - -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" 2>/dev/null \
+  | grep "access_token=" | grep -oi "max-age=[0-9]*" || echo "NO max-age (HTTP session cookie)"
+
+# HTTPS
+curl -s -D - -X POST $BASE/api/v1/auth/login/local \
+  -H "X-Forwarded-Proto: https" \
+  -d "username=admin@example.com&password=正确密码" 2>/dev/null \
+  | grep "access_token=" | grep -oi "max-age=[0-9]*"
+```
+
+**预期：** HTTP 无 `Max-Age`（session cookie，浏览器关闭即失效），HTTPS 有 `Max-Age=604800`（7 天）
+
+#### TC-EDGE-06: public 路径 trailing slash
+
+```bash
+for path in /api/v1/auth/login/local/ /api/v1/auth/register/ \
+            /api/v1/auth/logout/ /api/v1/auth/setup-status/; do
+  echo "$path: $(curl -s -w '%{http_code}' -o /dev/null $BASE$path)"
+done
+```
+
+**预期：** 全部 307（redirect 去掉 trailing slash）或 200/405，不是 401
+
+### 7.5 红队对抗测试
+
+> 模拟攻击者视角，验证防线没有可利用的缝隙。
+
+#### 7.5.1 路径混淆绕过
+
+```bash
+# 通过编码/双斜杠/路径穿越尝试绕过 AuthMiddleware 公开路径判断
+for path in \
+  "//api/v1/auth/me" \
+  "/api/v1/auth/login/local/../me" \
+  "/api/v1/auth/login/local%2f..%2fme" \
+  "/api/v1/auth/login/local/..%2Fme" \
+  "/API/V1/AUTH/ME"; do
+  echo "$path: $(curl -s -w '%{http_code}' -o /dev/null $BASE$path)"
+done
+```
+
+**预期：** 全部 401 或 404。不应有路径混淆导致跳过 auth 检查。
+
+#### 7.5.2 CSRF 对抗矩阵
+
+```bash
+# 登录拿 cookie
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" -c cookies.txt
+
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+
+# Case 1: 有 cookie 无 header → 403
+curl -s -w "%{http_code}" -o /dev/null \
+  -X POST $BASE/api/threads -b cookies.txt \
+  -H "Content-Type: application/json" -d '{"metadata":{}}'
+
+# Case 2: 有 header 无 cookie → 403（删除 cookie 中的 csrf_token）
+curl -s -w "%{http_code}" -o /dev/null \
+  -X POST $BASE/api/threads \
+  -b cookies.txt \
+  -H "X-CSRF-Token: $CSRF" \
+  -H "Content-Type: application/json" -d '{"metadata":{}}'
+
+# Case 3: header 和 cookie 不匹配 → 403
+curl -s -w "%{http_code}" -o /dev/null \
+  -X POST $BASE/api/threads -b cookies.txt \
+  -H "X-CSRF-Token: wrong-token" \
+  -H "Content-Type: application/json" -d '{"metadata":{}}'
+
+# Case 4: 旧 CSRF token（登出再登录后） → 旧 token 应失效
+curl -s -X POST $BASE/api/v1/auth/logout -b cookies.txt
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" -c cookies.txt
+# 用旧 CSRF 发请求
+curl -s -w "%{http_code}" -o /dev/null \
+  -X POST $BASE/api/threads -b cookies.txt \
+  -H "X-CSRF-Token: $CSRF" \
+  -H "Content-Type: application/json" -d '{"metadata":{}}'
+```
+
+**预期：** Case 1-3 全部 403。Case 4 应 403（旧 CSRF 与新 cookie 不匹配）。
+
+#### 7.5.3 Token Replay（登出后旧 token 重放）
+
+```bash
+# 登录，保存 cookie
+curl -s -X POST $BASE/api/v1/auth/login/local \
+  -d "username=admin@example.com&password=正确密码" -c cookies.txt
+
+# 提取 access_token 值
+TOKEN=$(grep access_token cookies.txt | awk '{print $NF}')
+
+# 登出
+curl -s -X POST $BASE/api/v1/auth/logout -b cookies.txt
+
+# 手工注入旧 token（模拟攻击者窃取了 token）
+curl -s -w "%{http_code}" -o /dev/null \
+  $BASE/api/v1/auth/me --cookie "access_token=$TOKEN"
+```
+
+**预期：** 200（已知限制：登出只清客户端 cookie，不 bump `token_version`。旧 token 在过期前仍有效）。
+**安全备注：** 如需严格防重放，需在登出时 `token_version += 1`。当前设计选择不做，因为成本是所有设备的 session 全部失效。
+
+#### 7.5.4 跨站强制登出
+
+```bash
+# 攻击者从第三方站点 POST /logout（无需认证、无需 CSRF）
+curl -s -X POST $BASE/api/v1/auth/logout -w "%{http_code}"
+```
+
+**预期：** 200（logout 是 public + CSRF 豁免）。
+**风险评估：** 低——只影响可用性（被强制登出），不泄露数据。浏览器 `SameSite=Lax` 限制了真实跨站场景下 cookie 不会被带上，所以实际上第三方站点的 POST 不会清除用户 cookie。
+
+#### 7.5.5 Metadata 注入攻击（所有权伪造）
+
+```bash
+# 尝试在创建 thread 时注入其他用户的 user_id
+CSRF=$(grep csrf_token cookies.txt | awk '{print $NF}')
+curl -s -X POST $BASE/api/threads \
+  -b cookies.txt \
+  -H "Content-Type: application/json" \
+  -H "X-CSRF-Token: $CSRF" \
+  -d '{"metadata":{"owner_id":"victim-user-id"}}' | jq .metadata.owner_id
+```
+
+**预期：** 返回的 `metadata.owner_id` 应为当前登录用户的 ID，不是请求中注入的 `victim-user-id`。服务端应覆盖客户端提供的 `user_id`。
+
+#### 7.5.6 HTTP Method 探测
+
+```bash
+# HEAD/OPTIONS 不应泄露受保护资源信息
+for method in HEAD OPTIONS TRACE; do
+  echo "$method /api/models: $(curl -s -w '%{http_code}' -o /dev/null -X $method $BASE/api/models)"
+done
+```
+
+**预期：** HEAD/OPTIONS 返回 401 或 405。TRACE 应返回 405。
+
+#### 7.5.7 Rate Limiter IP 维度缺陷验证
+
+```bash
+# 通过不同的 X-Forwarded-For 绕过限速（验证是否用 client.host 而非 header）
+for i in $(seq 1 6); do
+  curl -s -w "attempt $i: %{http_code}\n" -o /dev/null \
+    -X POST $BASE/api/v1/auth/login/local \
+    -H "X-Forwarded-For: 10.0.0.$i" \
+    -d "username=admin@example.com&password=wrong"
+done
+```
+
+**预期：** 如果 rate limiter 基于 `request.client.host`（实际 TCP 连接 IP），所有请求来自同一 IP，第 6 个应返回 429。X-Forwarded-For 不应影响限速判断。
+
+#### 7.5.8 Junk Cookie 穿透验证
+
+```bash
+# middleware 只检查 cookie 存在性，不验证 JWT
+# 确认 junk cookie 能过 middleware 但被下游 @require_auth 拦截
+curl -s -w "%{http_code}" $BASE/api/v1/auth/me \
+  --cookie "access_token=not-a-jwt"
+```
+
+**预期：** 401（middleware 放行，`get_current_user_from_request` 解码失败返回 401）。
+**安全备注：** middleware 是 presence-only 检查，有意设计。完整验证交给 `@require_auth`。
+
+#### 7.5.9 路由覆盖审计
+
+```bash
+# 列出所有注册的路由，检查哪些没有 @require_auth
+cd backend && PYTHONPATH=. python3 -c "
+from app.gateway.app import create_app
+app = create_app()
+public_prefixes = ['/health', '/docs', '/redoc', '/openapi.json',
+                   '/api/v1/auth/login', '/api/v1/auth/register',
+                   '/api/v1/auth/logout', '/api/v1/auth/setup-status']
+for route in app.routes:
+    path = getattr(route, 'path', '')
+    if not path or not path.startswith('/api'):
+        continue
+    is_public = any(path.startswith(p) for p in public_prefixes)
+    if not is_public:
+        print(f'  {path}')
+" 2>/dev/null
+```
+
+**预期：** 列出的所有路由都应由 AuthMiddleware（cookie 存在性）+ `@require_auth`/`@require_permission`（JWT 验证）双层保护。检查是否有遗漏。
+
+---
+
+## 八、回归清单
+
+每次 auth 相关代码变更后必须通过：
+
+```bash
+# 单元测试（168 个）
+cd backend && PYTHONPATH=. uv run pytest \
+  tests/test_auth.py \
+  tests/test_auth_config.py \
+  tests/test_auth_errors.py \
+  tests/test_auth_type_system.py \
+  tests/test_auth_middleware.py \
+  tests/test_langgraph_auth.py \
+  -v
+
+# 核心接口冒烟
+curl -s $BASE/health                              # 200
+curl -s $BASE/api/models                          # 401 (无 cookie)
+curl -s -X POST $BASE/api/v1/auth/setup-status    # 200
+curl -s $BASE/api/v1/auth/me -b cookies.txt       # 200 (有 cookie)
+```
diff --git a/backend/docs/AUTH_UPGRADE.md b/backend/docs/AUTH_UPGRADE.md
new file mode 100644
index 000000000..344c488c4
--- /dev/null
+++ b/backend/docs/AUTH_UPGRADE.md
@@ -0,0 +1,129 @@
+# Authentication Upgrade Guide
+
+DeerFlow 内置了认证模块。本文档面向从无认证版本升级的用户。
+
+## 核心概念
+
+认证模块采用**始终强制**策略：
+
+- 首次启动时自动创建 admin 账号，随机密码打印到控制台日志
+- 认证从一开始就是强制的，无竞争窗口
+- 历史对话（升级前创建的 thread）自动迁移到 admin 名下
+
+## 升级步骤
+
+### 1. 更新代码
+
+```bash
+git pull origin main
+cd backend && make install
+```
+
+### 2. 首次启动
+
+```bash
+make dev
+```
+
+控制台会输出：
+
+```
+============================================================
+  Admin account created on first boot
+  Email:    admin@deerflow.dev
+  Password: aB3xK9mN_pQ7rT2w
+  Change it after login: Settings → Account
+============================================================
+```
+
+如果未登录就重启了服务，不用担心——只要 setup 未完成，每次启动都会重置密码并重新打印到控制台。
+
+### 3. 登录
+
+访问 `http://localhost:2026/login`，使用控制台输出的邮箱和密码登录。
+
+### 4. 修改密码
+
+登录后进入 Settings → Account → Change Password。
+
+### 5. 添加用户（可选）
+
+其他用户通过 `/login` 页面注册，自动获得 **user** 角色。每个用户只能看到自己的对话。
+
+## 安全机制
+
+| 机制 | 说明 |
+|------|------|
+| JWT HttpOnly Cookie | Token 不暴露给 JavaScript，防止 XSS 窃取 |
+| CSRF Double Submit Cookie | 所有 POST/PUT/DELETE 请求需携带 `X-CSRF-Token` |
+| bcrypt 密码哈希 | 密码不以明文存储 |
+| 多租户隔离 | 用户只能访问自己的 thread |
+| HTTPS 自适应 | 检测 `x-forwarded-proto`，自动设置 `Secure` cookie 标志 |
+
+## 常见操作
+
+### 忘记密码
+
+```bash
+cd backend
+
+# 重置 admin 密码
+python -m app.gateway.auth.reset_admin
+
+# 重置指定用户密码
+python -m app.gateway.auth.reset_admin --email user@example.com
+```
+
+会输出新的随机密码。
+
+### 完全重置
+
+删除用户数据库，重启后自动创建新 admin：
+
+```bash
+rm -f backend/.deer-flow/users.db
+# 重启服务，控制台输出新密码
+```
+
+## 数据存储
+
+| 文件 | 内容 |
+|------|------|
+| `.deer-flow/users.db` | SQLite 用户数据库（密码哈希、角色） |
+| `.env` 中的 `AUTH_JWT_SECRET` | JWT 签名密钥（未设置时自动生成临时密钥，重启后 session 失效） |
+
+### 生产环境建议
+
+```bash
+# 生成持久化 JWT 密钥，避免重启后所有用户需重新登录
+python -c "import secrets; print(secrets.token_urlsafe(32))"
+# 将输出添加到 .env：
+# AUTH_JWT_SECRET=<生成的密钥>
+```
+
+## API 端点
+
+| 端点 | 方法 | 说明 |
+|------|------|------|
+| `/api/v1/auth/login/local` | POST | 邮箱密码登录（OAuth2 form） |
+| `/api/v1/auth/register` | POST | 注册新用户（user 角色） |
+| `/api/v1/auth/logout` | POST | 登出（清除 cookie） |
+| `/api/v1/auth/me` | GET | 获取当前用户信息 |
+| `/api/v1/auth/change-password` | POST | 修改密码 |
+| `/api/v1/auth/setup-status` | GET | 检查 admin 是否存在 |
+
+## 兼容性
+
+- **标准模式**（`make dev`）：完全兼容，admin 自动创建
+- **Gateway 模式**（`make dev-pro`）：完全兼容
+- **Docker 部署**：完全兼容，`.deer-flow/users.db` 需持久化卷挂载
+- **IM 渠道**（Feishu/Slack/Telegram）：通过 LangGraph SDK 通信，不经过认证层
+- **DeerFlowClient**（嵌入式）：不经过 HTTP，不受认证影响
+
+## 故障排查
+
+| 症状 | 原因 | 解决 |
+|------|------|------|
+| 启动后没看到密码 | admin 已存在（非首次启动） | 用 `reset_admin` 重置，或删 `users.db` |
+| 登录后 POST 返回 403 | CSRF token 缺失 | 确认前端已更新 |
+| 重启后需要重新登录 | `AUTH_JWT_SECRET` 未持久化 | 在 `.env` 中设置固定密钥 |
diff --git a/backend/docs/TITLE_GENERATION_IMPLEMENTATION.md b/backend/docs/TITLE_GENERATION_IMPLEMENTATION.md
index 07a026e79..87e8aa61a 100644
--- a/backend/docs/TITLE_GENERATION_IMPLEMENTATION.md
+++ b/backend/docs/TITLE_GENERATION_IMPLEMENTATION.md
@@ -124,7 +124,7 @@ title:
 # checkpointer.py
 from langgraph.checkpoint.sqlite import SqliteSaver
 
-checkpointer = SqliteSaver.from_conn_string("checkpoints.db")
+checkpointer = SqliteSaver.from_conn_string("deerflow.db")
 ```
 
 ```json
diff --git a/backend/langgraph.json b/backend/langgraph.json
index 74f5c691d..28588c9f8 100644
--- a/backend/langgraph.json
+++ b/backend/langgraph.json
@@ -8,6 +8,9 @@
   "graphs": {
     "lead_agent": "deerflow.agents:make_lead_agent"
   },
+  "auth": {
+    "path": "./app/gateway/langgraph_auth.py:auth"
+  },
   "checkpointer": {
     "path": "./packages/harness/deerflow/agents/checkpointer/async_provider.py:make_checkpointer"
   }
diff --git a/backend/packages/harness/deerflow/agents/__init__.py b/backend/packages/harness/deerflow/agents/__init__.py
index 2c31a514a..397f67f8e 100644
--- a/backend/packages/harness/deerflow/agents/__init__.py
+++ b/backend/packages/harness/deerflow/agents/__init__.py
@@ -1,4 +1,3 @@
-from .checkpointer import get_checkpointer, make_checkpointer, reset_checkpointer
 from .factory import create_deerflow_agent
 from .features import Next, Prev, RuntimeFeatures
 from .lead_agent import make_lead_agent
@@ -18,7 +17,4 @@ __all__ = [
     "make_lead_agent",
     "SandboxState",
     "ThreadState",
-    "get_checkpointer",
-    "reset_checkpointer",
-    "make_checkpointer",
 ]
diff --git a/backend/packages/harness/deerflow/agents/lead_agent/agent.py b/backend/packages/harness/deerflow/agents/lead_agent/agent.py
index 3b336a377..2f8bdf661 100644
--- a/backend/packages/harness/deerflow/agents/lead_agent/agent.py
+++ b/backend/packages/harness/deerflow/agents/lead_agent/agent.py
@@ -3,6 +3,7 @@ import logging
 from langchain.agents import create_agent
 from langchain.agents.middleware import AgentMiddleware
 from langchain_core.runnables import RunnableConfig
+from langgraph.graph.state import CompiledStateGraph
 
 from deerflow.agents.lead_agent.prompt import apply_prompt_template
 from deerflow.agents.memory.summarization_hook import memory_flush_hook
@@ -18,9 +19,8 @@ from deerflow.agents.middlewares.tool_error_handling_middleware import build_lea
 from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware
 from deerflow.agents.thread_state import ThreadState
 from deerflow.config.agents_config import load_agent_config, validate_agent_name
-from deerflow.config.app_config import get_app_config
-from deerflow.config.memory_config import get_memory_config
-from deerflow.config.summarization_config import get_summarization_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.models import create_chat_model
 
 logger = logging.getLogger(__name__)
@@ -35,9 +35,8 @@ def _get_runtime_config(config: RunnableConfig) -> dict:
     return cfg
 
 
-def _resolve_model_name(requested_model_name: str | None = None) -> str:
+def _resolve_model_name(app_config: AppConfig, requested_model_name: str | None = None) -> str:
     """Resolve a runtime model name safely, falling back to default if invalid. Returns None if no models are configured."""
-    app_config = get_app_config()
     default_model_name = app_config.models[0].name if app_config.models else None
     if default_model_name is None:
         raise ValueError("No chat models are configured. Please configure at least one model in config.yaml.")
@@ -50,9 +49,9 @@ def _resolve_model_name(requested_model_name: str | None = None) -> str:
     return default_model_name
 
 
-def _create_summarization_middleware() -> DeerFlowSummarizationMiddleware | None:
+def _create_summarization_middleware(app_config: AppConfig) -> DeerFlowSummarizationMiddleware | None:
     """Create and configure the summarization middleware from config."""
-    config = get_summarization_config()
+    config = app_config.summarization
 
     if not config.enabled:
         return None
@@ -68,13 +67,15 @@ def _create_summarization_middleware() -> DeerFlowSummarizationMiddleware | None
     # Prepare keep parameter
     keep = config.keep.to_tuple()
 
-    # Prepare model parameter
+    # Prepare model parameter.
+    # Bind "middleware:summarize" tag so RunJournal identifies these LLM calls
+    # as middleware rather than lead_agent (SummarizationMiddleware is a
+    # LangChain built-in, so we tag the model at creation time).
     if config.model_name:
-        model = create_chat_model(name=config.model_name, thinking_enabled=False)
+        model = create_chat_model(name=config.model_name, thinking_enabled=False, app_config=app_config)
     else:
-        # Use a lightweight model for summarization to save costs
-        # Falls back to default model if not explicitly specified
-        model = create_chat_model(thinking_enabled=False)
+        model = create_chat_model(thinking_enabled=False, app_config=app_config)
+    model = model.with_config(tags=["middleware:summarize"])
 
     # Prepare kwargs
     kwargs = {
@@ -90,14 +91,14 @@ def _create_summarization_middleware() -> DeerFlowSummarizationMiddleware | None
         kwargs["summary_prompt"] = config.summary_prompt
 
     hooks: list[BeforeSummarizationHook] = []
-    if get_memory_config().enabled:
+    if app_config.memory.enabled:
         hooks.append(memory_flush_hook)
 
     # The logic below relies on two assumptions holding true: this factory is
     # the sole entry point for DeerFlowSummarizationMiddleware, and the runtime
     # config is not expected to change after startup.
     try:
-        skills_container_path = get_app_config().skills.container_path or "/mnt/skills"
+        skills_container_path = app_config.skills.container_path or "/mnt/skills"
     except Exception:
         logger.exception("Failed to resolve skills container path; falling back to default")
         skills_container_path = "/mnt/skills"
@@ -238,10 +239,18 @@ Being proactive with task management demonstrates thoroughness and ensures all r
 # ViewImageMiddleware should be before ClarificationMiddleware to inject image details before LLM
 # ToolErrorHandlingMiddleware should be before ClarificationMiddleware to convert tool exceptions to ToolMessages
 # ClarificationMiddleware should be last to intercept clarification requests after model calls
-def _build_middlewares(config: RunnableConfig, model_name: str | None, agent_name: str | None = None, custom_middlewares: list[AgentMiddleware] | None = None):
+def _build_middlewares(
+    app_config: AppConfig,
+    config: RunnableConfig,
+    *,
+    model_name: str | None,
+    agent_name: str | None = None,
+    custom_middlewares: list[AgentMiddleware] | None = None,
+):
     """Build middleware chain based on runtime configuration.
 
     Args:
+        app_config: Resolved application config.
         config: Runtime configuration containing configurable options like is_plan_mode.
         agent_name: If provided, MemoryMiddleware will use per-agent memory storage.
         custom_middlewares: Optional list of custom middlewares to inject into the chain.
@@ -249,10 +258,10 @@ def _build_middlewares(config: RunnableConfig, model_name: str | None, agent_nam
     Returns:
         List of middleware instances.
     """
-    middlewares = build_lead_runtime_middlewares(lazy_init=True)
+    middlewares = build_lead_runtime_middlewares(app_config=app_config, lazy_init=True)
 
     # Add summarization middleware if enabled
-    summarization_middleware = _create_summarization_middleware()
+    summarization_middleware = _create_summarization_middleware(app_config)
     if summarization_middleware is not None:
         middlewares.append(summarization_middleware)
 
@@ -264,7 +273,7 @@ def _build_middlewares(config: RunnableConfig, model_name: str | None, agent_nam
         middlewares.append(todo_list_middleware)
 
     # Add TokenUsageMiddleware when token_usage tracking is enabled
-    if get_app_config().token_usage.enabled:
+    if app_config.token_usage.enabled:
         middlewares.append(TokenUsageMiddleware())
 
     # Add TitleMiddleware
@@ -275,7 +284,6 @@ def _build_middlewares(config: RunnableConfig, model_name: str | None, agent_nam
 
     # Add ViewImageMiddleware only if the current model supports vision.
     # Use the resolved runtime model_name from make_lead_agent to avoid stale config values.
-    app_config = get_app_config()
     model_config = app_config.get_model_config(model_name) if model_name else None
     if model_config is not None and model_config.supports_vision:
         middlewares.append(ViewImageMiddleware())
@@ -304,11 +312,32 @@ def _build_middlewares(config: RunnableConfig, model_name: str | None, agent_nam
     return middlewares
 
 
-def make_lead_agent(config: RunnableConfig):
+def make_lead_agent(
+    config: RunnableConfig,
+    app_config: AppConfig | None = None,
+) -> CompiledStateGraph:
+    """Build the lead agent from runtime config.
+
+    Args:
+        config: LangGraph ``RunnableConfig`` carrying per-invocation options
+            (``thinking_enabled``, ``model_name``, ``is_plan_mode``, etc.).
+        app_config: Resolved application config. Required for in-process
+            entry points (DeerFlowClient, Gateway Worker). When omitted we
+            are being called via ``langgraph.json`` registration and reload
+            from disk — the LangGraph Server bootstrap path has no other
+            way to thread the value.
+    """
     # Lazy import to avoid circular dependency
     from deerflow.tools import get_available_tools
     from deerflow.tools.builtins import setup_agent
 
+    if app_config is None:
+        # LangGraph Server registers ``make_lead_agent`` via ``langgraph.json``
+        # and hands us only a ``RunnableConfig``. Reload config from disk
+        # here — it's a pure function, equivalent to the process-global the
+        # old code path would have read.
+        app_config = AppConfig.from_file()
+
     cfg = _get_runtime_config(config)
 
     thinking_enabled = cfg.get("thinking_enabled", True)
@@ -325,9 +354,8 @@ def make_lead_agent(config: RunnableConfig):
     agent_model_name = agent_config.model if agent_config and agent_config.model else None
 
     # Final model name resolution: request → agent config → global default, with fallback for unknown names
-    model_name = _resolve_model_name(requested_model_name or agent_model_name)
+    model_name = _resolve_model_name(app_config, requested_model_name or agent_model_name)
 
-    app_config = get_app_config()
     model_config = app_config.get_model_config(model_name)
 
     if model_config is None:
@@ -367,20 +395,22 @@ def make_lead_agent(config: RunnableConfig):
     if is_bootstrap:
         # Special bootstrap agent with minimal prompt for initial custom agent creation flow
         return create_agent(
-            model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
-            tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled) + [setup_agent],
-            middleware=_build_middlewares(config, model_name=model_name),
-            system_prompt=apply_prompt_template(subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, available_skills=set(["bootstrap"])),
+            model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=app_config),
+            tools=get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=app_config) + [setup_agent],
+            middleware=_build_middlewares(app_config, config, model_name=model_name),
+            system_prompt=apply_prompt_template(app_config, subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, available_skills=set(["bootstrap"])),
             state_schema=ThreadState,
+            context_schema=DeerFlowContext,
         )
 
     # Default lead agent (unchanged behavior)
     return create_agent(
-        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort),
-        tools=get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled),
-        middleware=_build_middlewares(config, model_name=model_name, agent_name=agent_name),
+        model=create_chat_model(name=model_name, thinking_enabled=thinking_enabled, reasoning_effort=reasoning_effort, app_config=app_config),
+        tools=get_available_tools(model_name=model_name, groups=agent_config.tool_groups if agent_config else None, subagent_enabled=subagent_enabled, app_config=app_config),
+        middleware=_build_middlewares(app_config, config, model_name=model_name, agent_name=agent_name),
         system_prompt=apply_prompt_template(
-            subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, agent_name=agent_name, available_skills=set(agent_config.skills) if agent_config and agent_config.skills is not None else None
+            app_config, subagent_enabled=subagent_enabled, max_concurrent_subagents=max_concurrent_subagents, agent_name=agent_name, available_skills=set(agent_config.skills) if agent_config and agent_config.skills is not None else None
         ),
         state_schema=ThreadState,
+        context_schema=DeerFlowContext,
     )
diff --git a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py
index 2ccacac68..b059c9bdb 100644
--- a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py
+++ b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py
@@ -5,6 +5,7 @@ from datetime import datetime
 from functools import lru_cache
 
 from deerflow.config.agents_config import load_agent_soul
+from deerflow.config.app_config import AppConfig
 from deerflow.skills import load_skills
 from deerflow.skills.types import Skill
 from deerflow.subagents import get_available_subagent_names
@@ -19,19 +20,20 @@ _enabled_skills_refresh_version = 0
 _enabled_skills_refresh_event = threading.Event()
 
 
-def _load_enabled_skills_sync() -> list[Skill]:
-    return list(load_skills(enabled_only=True))
+def _load_enabled_skills_sync(app_config: AppConfig | None) -> list[Skill]:
+    return list(load_skills(app_config, enabled_only=True))
 
 
-def _start_enabled_skills_refresh_thread() -> None:
+def _start_enabled_skills_refresh_thread(app_config: AppConfig | None) -> None:
     threading.Thread(
         target=_refresh_enabled_skills_cache_worker,
+        args=(app_config,),
         name="deerflow-enabled-skills-loader",
         daemon=True,
     ).start()
 
 
-def _refresh_enabled_skills_cache_worker() -> None:
+def _refresh_enabled_skills_cache_worker(app_config: AppConfig | None) -> None:
     global _enabled_skills_cache, _enabled_skills_refresh_active
 
     while True:
@@ -39,8 +41,8 @@ def _refresh_enabled_skills_cache_worker() -> None:
             target_version = _enabled_skills_refresh_version
 
         try:
-            skills = _load_enabled_skills_sync()
-        except Exception:
+            skills = _load_enabled_skills_sync(app_config)
+        except (OSError, ImportError):
             logger.exception("Failed to load enabled skills for prompt injection")
             skills = []
 
@@ -56,7 +58,7 @@ def _refresh_enabled_skills_cache_worker() -> None:
             _enabled_skills_cache = None
 
 
-def _ensure_enabled_skills_cache() -> threading.Event:
+def _ensure_enabled_skills_cache(app_config: AppConfig | None) -> threading.Event:
     global _enabled_skills_refresh_active
 
     with _enabled_skills_lock:
@@ -68,11 +70,11 @@ def _ensure_enabled_skills_cache() -> threading.Event:
         _enabled_skills_refresh_active = True
         _enabled_skills_refresh_event.clear()
 
-    _start_enabled_skills_refresh_thread()
+    _start_enabled_skills_refresh_thread(app_config)
     return _enabled_skills_refresh_event
 
 
-def _invalidate_enabled_skills_cache() -> threading.Event:
+def _invalidate_enabled_skills_cache(app_config: AppConfig | None) -> threading.Event:
     global _enabled_skills_cache, _enabled_skills_refresh_active, _enabled_skills_refresh_version
 
     _get_cached_skills_prompt_section.cache_clear()
@@ -84,30 +86,30 @@ def _invalidate_enabled_skills_cache() -> threading.Event:
             return _enabled_skills_refresh_event
         _enabled_skills_refresh_active = True
 
-    _start_enabled_skills_refresh_thread()
+    _start_enabled_skills_refresh_thread(app_config)
     return _enabled_skills_refresh_event
 
 
-def prime_enabled_skills_cache() -> None:
-    _ensure_enabled_skills_cache()
+def prime_enabled_skills_cache(app_config: AppConfig | None = None) -> None:
+    _ensure_enabled_skills_cache(app_config)
 
 
-def warm_enabled_skills_cache(timeout_seconds: float = _ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS) -> bool:
-    if _ensure_enabled_skills_cache().wait(timeout=timeout_seconds):
+def warm_enabled_skills_cache(app_config: AppConfig | None = None, timeout_seconds: float = _ENABLED_SKILLS_REFRESH_WAIT_TIMEOUT_SECONDS) -> bool:
+    if _ensure_enabled_skills_cache(app_config).wait(timeout=timeout_seconds):
         return True
 
     logger.warning("Timed out waiting %.1fs for enabled skills cache warm-up", timeout_seconds)
     return False
 
 
-def _get_enabled_skills():
+def _get_enabled_skills(app_config: AppConfig | None = None):
     with _enabled_skills_lock:
         cached = _enabled_skills_cache
 
     if cached is not None:
         return list(cached)
 
-    _ensure_enabled_skills_cache()
+    _ensure_enabled_skills_cache(app_config)
     return []
 
 
@@ -115,12 +117,12 @@ def _skill_mutability_label(category: str) -> str:
     return "[custom, editable]" if category == "custom" else "[built-in]"
 
 
-def clear_skills_system_prompt_cache() -> None:
-    _invalidate_enabled_skills_cache()
+def clear_skills_system_prompt_cache(app_config: AppConfig | None = None) -> None:
+    _invalidate_enabled_skills_cache(app_config)
 
 
-async def refresh_skills_system_prompt_cache_async() -> None:
-    await asyncio.to_thread(_invalidate_enabled_skills_cache().wait)
+async def refresh_skills_system_prompt_cache_async(app_config: AppConfig | None = None) -> None:
+    await asyncio.to_thread(_invalidate_enabled_skills_cache(app_config).wait)
 
 
 def _reset_skills_system_prompt_cache_state() -> None:
@@ -134,10 +136,10 @@ def _reset_skills_system_prompt_cache_state() -> None:
         _enabled_skills_refresh_event.clear()
 
 
-def _refresh_enabled_skills_cache() -> None:
+def _refresh_enabled_skills_cache(app_config: AppConfig | None = None) -> None:
     """Backward-compatible test helper for direct synchronous reload."""
     try:
-        skills = _load_enabled_skills_sync()
+        skills = _load_enabled_skills_sync(app_config)
     except Exception:
         logger.exception("Failed to load enabled skills for prompt injection")
         skills = []
@@ -164,7 +166,7 @@ Skip simple one-off tasks.
 """
 
 
-def _build_available_subagents_description(available_names: list[str], bash_available: bool) -> str:
+def _build_available_subagents_description(available_names: list[str], bash_available: bool, app_config: AppConfig) -> str:
     """Dynamically build subagent type descriptions from registry.
 
     Mirrors Codex's pattern where agent_type_description is dynamically generated
@@ -186,7 +188,7 @@ def _build_available_subagents_description(available_names: list[str], bash_avai
         if name in builtin_descriptions:
             lines.append(f"- **{name}**: {builtin_descriptions[name]}")
         else:
-            config = get_subagent_config(name)
+            config = get_subagent_config(name, app_config)
             if config is not None:
                 desc = config.description.split("\n")[0].strip()  # First line only for brevity
                 lines.append(f"- **{name}**: {desc}")
@@ -194,22 +196,23 @@ def _build_available_subagents_description(available_names: list[str], bash_avai
     return "\n".join(lines)
 
 
-def _build_subagent_section(max_concurrent: int) -> str:
+def _build_subagent_section(max_concurrent: int, app_config: AppConfig) -> str:
     """Build the subagent system prompt section with dynamic concurrency limit.
 
     Args:
         max_concurrent: Maximum number of concurrent subagent calls allowed per response.
+        app_config: Application config used to gate bash availability.
 
     Returns:
         Formatted subagent section string.
     """
     n = max_concurrent
-    available_names = get_available_subagent_names()
+    available_names = get_available_subagent_names(app_config)
     bash_available = "bash" in available_names
 
     # Dynamically build subagent type descriptions from registry (aligned with Codex's
     # agent_type_description pattern where all registered roles are listed in the tool spec).
-    available_subagents = _build_available_subagents_description(available_names, bash_available)
+    available_subagents = _build_available_subagents_description(available_names, bash_available, app_config)
     direct_tool_examples = "bash, ls, read_file, web_search, etc." if bash_available else "ls, read_file, web_search, etc."
     direct_execution_example = (
         '# User asks: "Run the tests"\n# Thinking: Cannot decompose into parallel sub-tasks\n# → Execute directly\n\nbash("npm test")  # Direct execution, not task()'
@@ -536,36 +539,34 @@ combined with a FastAPI gateway for REST API access [citation:FastAPI](https://f
 """
 
 
-def _get_memory_context(agent_name: str | None = None) -> str:
+def _get_memory_context(app_config: AppConfig, agent_name: str | None = None) -> str:
     """Get memory context for injection into system prompt.
 
-    Args:
-        agent_name: If provided, loads per-agent memory. If None, loads global memory.
-
-    Returns:
-        Formatted memory context string wrapped in XML tags, or empty string if disabled.
+    Returns an empty string when memory is disabled or the stored memory file
+    cannot be read/parsed. A corrupt memory.json degrades the prompt to
+    no-memory; it never kills the agent.
     """
+    from deerflow.agents.memory import format_memory_for_injection, get_memory_data
+    from deerflow.runtime.user_context import get_effective_user_id
+
+    memory_config = app_config.memory
+    if not memory_config.enabled or not memory_config.injection_enabled:
+        return ""
+
     try:
-        from deerflow.agents.memory import format_memory_for_injection, get_memory_data
-        from deerflow.config.memory_config import get_memory_config
+        memory_data = get_memory_data(memory_config, agent_name, user_id=get_effective_user_id())
+    except (OSError, ValueError, UnicodeDecodeError):
+        logger.exception("Failed to load memory data for prompt injection")
+        return ""
 
-        config = get_memory_config()
-        if not config.enabled or not config.injection_enabled:
-            return ""
+    memory_content = format_memory_for_injection(memory_data, max_tokens=memory_config.max_injection_tokens)
+    if not memory_content.strip():
+        return ""
 
-        memory_data = get_memory_data(agent_name)
-        memory_content = format_memory_for_injection(memory_data, max_tokens=config.max_injection_tokens)
-
-        if not memory_content.strip():
-            return ""
-
-        return f"""<memory>
+    return f"""<memory>
 {memory_content}
 </memory>
 """
-    except Exception as e:
-        logger.error("Failed to load memory context: %s", e)
-        return ""
 
 
 @lru_cache(maxsize=32)
@@ -600,19 +601,12 @@ You have access to skills that provide optimized workflows for specific tasks. E
 </skill_system>"""
 
 
-def get_skills_prompt_section(available_skills: set[str] | None = None) -> str:
+def get_skills_prompt_section(app_config: AppConfig, available_skills: set[str] | None = None) -> str:
     """Generate the skills prompt section with available skills list."""
-    skills = _get_enabled_skills()
+    skills = _get_enabled_skills(app_config)
 
-    try:
-        from deerflow.config import get_app_config
-
-        config = get_app_config()
-        container_base_path = config.skills.container_path
-        skill_evolution_enabled = config.skill_evolution.enabled
-    except Exception:
-        container_base_path = "/mnt/skills"
-        skill_evolution_enabled = False
+    container_base_path = app_config.skills.container_path
+    skill_evolution_enabled = app_config.skill_evolution.enabled
 
     if not skills and not skill_evolution_enabled:
         return ""
@@ -636,7 +630,7 @@ def get_agent_soul(agent_name: str | None) -> str:
     return ""
 
 
-def get_deferred_tools_prompt_section() -> str:
+def get_deferred_tools_prompt_section(app_config: AppConfig) -> str:
     """Generate <available-deferred-tools> block for the system prompt.
 
     Lists only deferred tool names so the agent knows what exists
@@ -645,12 +639,7 @@ def get_deferred_tools_prompt_section() -> str:
     """
     from deerflow.tools.builtins.tool_search import get_deferred_registry
 
-    try:
-        from deerflow.config import get_app_config
-
-        if not get_app_config().tool_search.enabled:
-            return ""
-    except Exception:
+    if not app_config.tool_search.enabled:
         return ""
 
     registry = get_deferred_registry()
@@ -661,15 +650,9 @@ def get_deferred_tools_prompt_section() -> str:
     return f"<available-deferred-tools>\n{names}\n</available-deferred-tools>"
 
 
-def _build_acp_section() -> str:
+def _build_acp_section(app_config: AppConfig) -> str:
     """Build the ACP agent prompt section, only if ACP agents are configured."""
-    try:
-        from deerflow.config.acp_config import get_acp_agents
-
-        agents = get_acp_agents()
-        if not agents:
-            return ""
-    except Exception:
+    if not app_config.acp_agents:
         return ""
 
     return (
@@ -681,15 +664,9 @@ def _build_acp_section() -> str:
     )
 
 
-def _build_custom_mounts_section() -> str:
+def _build_custom_mounts_section(app_config: AppConfig) -> str:
     """Build a prompt section for explicitly configured sandbox mounts."""
-    try:
-        from deerflow.config import get_app_config
-
-        mounts = get_app_config().sandbox.mounts or []
-    except Exception:
-        logger.exception("Failed to load configured sandbox mounts for the lead-agent prompt")
-        return ""
+    mounts = app_config.sandbox.mounts or []
 
     if not mounts:
         return ""
@@ -703,13 +680,20 @@ def _build_custom_mounts_section() -> str:
     return f"\n**Custom Mounted Directories:**\n{mounts_list}\n- If the user needs files outside `/mnt/user-data`, use these absolute container paths directly when they match the requested directory"
 
 
-def apply_prompt_template(subagent_enabled: bool = False, max_concurrent_subagents: int = 3, *, agent_name: str | None = None, available_skills: set[str] | None = None) -> str:
+def apply_prompt_template(
+    app_config: AppConfig,
+    subagent_enabled: bool = False,
+    max_concurrent_subagents: int = 3,
+    *,
+    agent_name: str | None = None,
+    available_skills: set[str] | None = None,
+) -> str:
     # Get memory context
-    memory_context = _get_memory_context(agent_name)
+    memory_context = _get_memory_context(app_config, agent_name)
 
     # Include subagent section only if enabled (from runtime parameter)
     n = max_concurrent_subagents
-    subagent_section = _build_subagent_section(n) if subagent_enabled else ""
+    subagent_section = _build_subagent_section(n, app_config) if subagent_enabled else ""
 
     # Add subagent reminder to critical_reminders if enabled
     subagent_reminder = (
@@ -730,14 +714,14 @@ def apply_prompt_template(subagent_enabled: bool = False, max_concurrent_subagen
     )
 
     # Get skills section
-    skills_section = get_skills_prompt_section(available_skills)
+    skills_section = get_skills_prompt_section(app_config, available_skills)
 
     # Get deferred tools section (tool_search)
-    deferred_tools_section = get_deferred_tools_prompt_section()
+    deferred_tools_section = get_deferred_tools_prompt_section(app_config)
 
     # Build ACP agent section only if ACP agents are configured
-    acp_section = _build_acp_section()
-    custom_mounts_section = _build_custom_mounts_section()
+    acp_section = _build_acp_section(app_config)
+    custom_mounts_section = _build_custom_mounts_section(app_config)
     acp_and_mounts_section = "\n".join(section for section in (acp_section, custom_mounts_section) if section)
 
     # Format the prompt with dynamic skills and memory
diff --git a/backend/packages/harness/deerflow/agents/memory/queue.py b/backend/packages/harness/deerflow/agents/memory/queue.py
index 5a7686996..55b2baa54 100644
--- a/backend/packages/harness/deerflow/agents/memory/queue.py
+++ b/backend/packages/harness/deerflow/agents/memory/queue.py
@@ -7,11 +7,17 @@ from dataclasses import dataclass, field
 from datetime import UTC, datetime
 from typing import Any
 
-from deerflow.config.memory_config import get_memory_config
+from deerflow.config.app_config import AppConfig
 
 logger = logging.getLogger(__name__)
 
 
+# Module-level config pointer set by the middleware that owns the queue.
+# The queue runs on a background Timer thread where ``Runtime`` and FastAPI
+# request context are not accessible; the enqueuer (which does have runtime
+# context) is responsible for plumbing ``AppConfig`` through ``add()``.
+
+
 @dataclass
 class ConversationContext:
     """Context for a conversation to be processed for memory update."""
@@ -20,6 +26,7 @@ class ConversationContext:
     messages: list[Any]
     timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
     agent_name: str | None = None
+    user_id: str | None = None
     correction_detected: bool = False
     reinforcement_detected: bool = False
 
@@ -30,10 +37,21 @@ class MemoryUpdateQueue:
     This queue collects conversation contexts and processes them after
     a configurable debounce period. Multiple conversations received within
     the debounce window are batched together.
+
+    The queue captures an ``AppConfig`` reference at construction time and
+    reuses it for the MemoryUpdater it spawns. Callers must construct a
+    fresh queue when the config changes rather than reaching into a global.
     """
 
-    def __init__(self):
-        """Initialize the memory update queue."""
+    def __init__(self, app_config: AppConfig):
+        """Initialize the memory update queue.
+
+        Args:
+            app_config: Application config. The queue reads its own
+                ``memory`` section for debounce timing and hands the full
+                config to :class:`MemoryUpdater`.
+        """
+        self._app_config = app_config
         self._queue: list[ConversationContext] = []
         self._lock = threading.Lock()
         self._timer: threading.Timer | None = None
@@ -44,19 +62,12 @@ class MemoryUpdateQueue:
         thread_id: str,
         messages: list[Any],
         agent_name: str | None = None,
+        user_id: str | None = None,
         correction_detected: bool = False,
         reinforcement_detected: bool = False,
     ) -> None:
-        """Add a conversation to the update queue.
-
-        Args:
-            thread_id: The thread ID.
-            messages: The conversation messages.
-            agent_name: If provided, memory is stored per-agent. If None, uses global memory.
-            correction_detected: Whether recent turns include an explicit correction signal.
-            reinforcement_detected: Whether recent turns include a positive reinforcement signal.
-        """
-        config = get_memory_config()
+        """Add a conversation to the update queue."""
+        config = self._app_config.memory
         if not config.enabled:
             return
 
@@ -65,6 +76,7 @@ class MemoryUpdateQueue:
                 thread_id=thread_id,
                 messages=messages,
                 agent_name=agent_name,
+                user_id=user_id,
                 correction_detected=correction_detected,
                 reinforcement_detected=reinforcement_detected,
             )
@@ -77,11 +89,12 @@ class MemoryUpdateQueue:
         thread_id: str,
         messages: list[Any],
         agent_name: str | None = None,
+        user_id: str | None = None,
         correction_detected: bool = False,
         reinforcement_detected: bool = False,
     ) -> None:
         """Add a conversation and start processing immediately in the background."""
-        config = get_memory_config()
+        config = self._app_config.memory
         if not config.enabled:
             return
 
@@ -90,6 +103,7 @@ class MemoryUpdateQueue:
                 thread_id=thread_id,
                 messages=messages,
                 agent_name=agent_name,
+                user_id=user_id,
                 correction_detected=correction_detected,
                 reinforcement_detected=reinforcement_detected,
             )
@@ -103,6 +117,7 @@ class MemoryUpdateQueue:
         thread_id: str,
         messages: list[Any],
         agent_name: str | None,
+        user_id: str | None = None,
         correction_detected: bool,
         reinforcement_detected: bool,
     ) -> None:
@@ -116,6 +131,7 @@ class MemoryUpdateQueue:
             thread_id=thread_id,
             messages=messages,
             agent_name=agent_name,
+            user_id=user_id,
             correction_detected=merged_correction_detected,
             reinforcement_detected=merged_reinforcement_detected,
         )
@@ -125,7 +141,7 @@ class MemoryUpdateQueue:
 
     def _reset_timer(self) -> None:
         """Reset the debounce timer."""
-        config = get_memory_config()
+        config = self._app_config.memory
         self._schedule_timer(config.debounce_seconds)
 
         logger.debug("Memory update timer set for %ss", config.debounce_seconds)
@@ -165,7 +181,7 @@ class MemoryUpdateQueue:
         logger.info("Processing %d queued memory updates", len(contexts_to_process))
 
         try:
-            updater = MemoryUpdater()
+            updater = MemoryUpdater(self._app_config)
 
             for context in contexts_to_process:
                 try:
@@ -176,6 +192,7 @@ class MemoryUpdateQueue:
                         agent_name=context.agent_name,
                         correction_detected=context.correction_detected,
                         reinforcement_detected=context.reinforcement_detected,
+                        user_id=context.user_id,
                     )
                     if success:
                         logger.info("Memory updated successfully for thread %s", context.thread_id)
@@ -236,31 +253,35 @@ class MemoryUpdateQueue:
             return self._processing
 
 
-# Global singleton instance
-_memory_queue: MemoryUpdateQueue | None = None
+# Queues keyed by ``id(AppConfig)`` so tests and multi-client setups with
+# distinct configs do not share a debounce queue.
+_memory_queues: dict[int, MemoryUpdateQueue] = {}
 _queue_lock = threading.Lock()
 
 
-def get_memory_queue() -> MemoryUpdateQueue:
-    """Get the global memory update queue singleton.
-
-    Returns:
-        The memory update queue instance.
-    """
-    global _memory_queue
+def get_memory_queue(app_config: AppConfig) -> MemoryUpdateQueue:
+    """Get or create the memory update queue for the given app config."""
+    key = id(app_config)
     with _queue_lock:
-        if _memory_queue is None:
-            _memory_queue = MemoryUpdateQueue()
-        return _memory_queue
+        queue = _memory_queues.get(key)
+        if queue is None:
+            queue = MemoryUpdateQueue(app_config)
+            _memory_queues[key] = queue
+        return queue
 
 
-def reset_memory_queue() -> None:
-    """Reset the global memory queue.
+def reset_memory_queue(app_config: AppConfig | None = None) -> None:
+    """Reset memory queue(s).
 
-    This is useful for testing.
+    Pass an ``app_config`` to reset only its queue, or omit to reset all
+    (useful at test teardown).
     """
-    global _memory_queue
     with _queue_lock:
-        if _memory_queue is not None:
-            _memory_queue.clear()
-        _memory_queue = None
+        if app_config is not None:
+            queue = _memory_queues.pop(id(app_config), None)
+            if queue is not None:
+                queue.clear()
+            return
+        for queue in _memory_queues.values():
+            queue.clear()
+        _memory_queues.clear()
diff --git a/backend/packages/harness/deerflow/agents/memory/storage.py b/backend/packages/harness/deerflow/agents/memory/storage.py
index 8fae907d9..f5593bc30 100644
--- a/backend/packages/harness/deerflow/agents/memory/storage.py
+++ b/backend/packages/harness/deerflow/agents/memory/storage.py
@@ -10,7 +10,7 @@ from pathlib import Path
 from typing import Any
 
 from deerflow.config.agents_config import AGENT_NAME_PATTERN
-from deerflow.config.memory_config import get_memory_config
+from deerflow.config.memory_config import MemoryConfig
 from deerflow.config.paths import get_paths
 
 logger = logging.getLogger(__name__)
@@ -44,17 +44,17 @@ class MemoryStorage(abc.ABC):
     """Abstract base class for memory storage providers."""
 
     @abc.abstractmethod
-    def load(self, agent_name: str | None = None) -> dict[str, Any]:
+    def load(self, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
         """Load memory data for the given agent."""
         pass
 
     @abc.abstractmethod
-    def reload(self, agent_name: str | None = None) -> dict[str, Any]:
+    def reload(self, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
         """Force reload memory data for the given agent."""
         pass
 
     @abc.abstractmethod
-    def save(self, memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
+    def save(self, memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> bool:
         """Save memory data for the given agent."""
         pass
 
@@ -62,11 +62,18 @@ class MemoryStorage(abc.ABC):
 class FileMemoryStorage(MemoryStorage):
     """File-based memory storage provider."""
 
-    def __init__(self):
-        """Initialize the file memory storage."""
-        # Per-agent memory cache: keyed by agent_name (None = global)
+    def __init__(self, memory_config: MemoryConfig):
+        """Initialize the file memory storage.
+
+        Args:
+            memory_config: Memory configuration (storage_path etc.). Stored on
+                the instance so per-request lookups don't need to reach for
+                ambient state.
+        """
+        self._memory_config = memory_config
+        # Per-user/agent memory cache: keyed by (user_id, agent_name) tuple (None = global)
         # Value: (memory_data, file_mtime)
-        self._memory_cache: dict[str | None, tuple[dict[str, Any], float | None]] = {}
+        self._memory_cache: dict[tuple[str | None, str | None], tuple[dict[str, Any], float | None]] = {}
         # Guards all reads and writes to _memory_cache across concurrent callers.
         self._cache_lock = threading.Lock()
 
@@ -81,21 +88,28 @@ class FileMemoryStorage(MemoryStorage):
         if not AGENT_NAME_PATTERN.match(agent_name):
             raise ValueError(f"Invalid agent name {agent_name!r}: names must match {AGENT_NAME_PATTERN.pattern}")
 
-    def _get_memory_file_path(self, agent_name: str | None = None) -> Path:
+    def _get_memory_file_path(self, agent_name: str | None = None, *, user_id: str | None = None) -> Path:
         """Get the path to the memory file."""
+        config = self._memory_config
+        if user_id is not None:
+            if agent_name is not None:
+                self._validate_agent_name(agent_name)
+                return get_paths().user_agent_memory_file(user_id, agent_name)
+            if config.storage_path and Path(config.storage_path).is_absolute():
+                return Path(config.storage_path)
+            return get_paths().user_memory_file(user_id)
+        # Legacy: no user_id
         if agent_name is not None:
             self._validate_agent_name(agent_name)
             return get_paths().agent_memory_file(agent_name)
-
-        config = get_memory_config()
         if config.storage_path:
             p = Path(config.storage_path)
             return p if p.is_absolute() else get_paths().base_dir / p
         return get_paths().memory_file
 
-    def _load_memory_from_file(self, agent_name: str | None = None) -> dict[str, Any]:
+    def _load_memory_from_file(self, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
         """Load memory data from file."""
-        file_path = self._get_memory_file_path(agent_name)
+        file_path = self._get_memory_file_path(agent_name, user_id=user_id)
 
         if not file_path.exists():
             return create_empty_memory()
@@ -108,44 +122,46 @@ class FileMemoryStorage(MemoryStorage):
             logger.warning("Failed to load memory file: %s", e)
             return create_empty_memory()
 
-    def load(self, agent_name: str | None = None) -> dict[str, Any]:
+    def load(self, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
         """Load memory data (cached with file modification time check)."""
-        file_path = self._get_memory_file_path(agent_name)
+        file_path = self._get_memory_file_path(agent_name, user_id=user_id)
 
         try:
             current_mtime = file_path.stat().st_mtime if file_path.exists() else None
         except OSError:
             current_mtime = None
 
+        cache_key = (user_id, agent_name)
         with self._cache_lock:
-            cached = self._memory_cache.get(agent_name)
+            cached = self._memory_cache.get(cache_key)
             if cached is not None and cached[1] == current_mtime:
                 return cached[0]
 
-        memory_data = self._load_memory_from_file(agent_name)
+        memory_data = self._load_memory_from_file(agent_name, user_id=user_id)
 
         with self._cache_lock:
-            self._memory_cache[agent_name] = (memory_data, current_mtime)
+            self._memory_cache[cache_key] = (memory_data, current_mtime)
 
         return memory_data
 
-    def reload(self, agent_name: str | None = None) -> dict[str, Any]:
+    def reload(self, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
         """Reload memory data from file, forcing cache invalidation."""
-        file_path = self._get_memory_file_path(agent_name)
-        memory_data = self._load_memory_from_file(agent_name)
+        file_path = self._get_memory_file_path(agent_name, user_id=user_id)
+        memory_data = self._load_memory_from_file(agent_name, user_id=user_id)
 
         try:
             mtime = file_path.stat().st_mtime if file_path.exists() else None
         except OSError:
             mtime = None
 
+        cache_key = (user_id, agent_name)
         with self._cache_lock:
-            self._memory_cache[agent_name] = (memory_data, mtime)
+            self._memory_cache[cache_key] = (memory_data, mtime)
         return memory_data
 
-    def save(self, memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
+    def save(self, memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> bool:
         """Save memory data to file and update cache."""
-        file_path = self._get_memory_file_path(agent_name)
+        file_path = self._get_memory_file_path(agent_name, user_id=user_id)
 
         try:
             file_path.parent.mkdir(parents=True, exist_ok=True)
@@ -165,8 +181,9 @@ class FileMemoryStorage(MemoryStorage):
             except OSError:
                 mtime = None
 
+            cache_key = (user_id, agent_name)
             with self._cache_lock:
-                self._memory_cache[agent_name] = (memory_data, mtime)
+                self._memory_cache[cache_key] = (memory_data, mtime)
             logger.info("Memory saved to %s", file_path)
             return True
         except OSError as e:
@@ -174,23 +191,31 @@ class FileMemoryStorage(MemoryStorage):
             return False
 
 
-_storage_instance: MemoryStorage | None = None
+# Instances keyed by (storage_class_path, id(memory_config)) so tests can
+# construct isolated storages and multi-client setups with different configs
+# don't collide on a single process-wide singleton.
+_storage_instances: dict[tuple[str, int], MemoryStorage] = {}
 _storage_lock = threading.Lock()
 
 
-def get_memory_storage() -> MemoryStorage:
-    """Get the configured memory storage instance."""
-    global _storage_instance
-    if _storage_instance is not None:
-        return _storage_instance
+def get_memory_storage(memory_config: MemoryConfig) -> MemoryStorage:
+    """Get the configured memory storage instance.
+
+    Caches one instance per ``(storage_class, memory_config)`` pair. In
+    single-config deployments this collapses to one instance; in multi-client
+    or test scenarios each config gets its own storage.
+    """
+    key = (memory_config.storage_class, id(memory_config))
+    existing = _storage_instances.get(key)
+    if existing is not None:
+        return existing
 
     with _storage_lock:
-        if _storage_instance is not None:
-            return _storage_instance
-
-        config = get_memory_config()
-        storage_class_path = config.storage_class
+        existing = _storage_instances.get(key)
+        if existing is not None:
+            return existing
 
+        storage_class_path = memory_config.storage_class
         try:
             module_path, class_name = storage_class_path.rsplit(".", 1)
             import importlib
@@ -204,13 +229,14 @@ def get_memory_storage() -> MemoryStorage:
             if not issubclass(storage_class, MemoryStorage):
                 raise TypeError(f"Configured memory storage '{storage_class_path}' is not a subclass of MemoryStorage")
 
-            _storage_instance = storage_class()
+            instance = storage_class(memory_config)
         except Exception as e:
             logger.error(
                 "Failed to load memory storage %s, falling back to FileMemoryStorage: %s",
                 storage_class_path,
                 e,
             )
-            _storage_instance = FileMemoryStorage()
+            instance = FileMemoryStorage(memory_config)
 
-    return _storage_instance
+        _storage_instances[key] = instance
+        return instance
diff --git a/backend/packages/harness/deerflow/agents/memory/summarization_hook.py b/backend/packages/harness/deerflow/agents/memory/summarization_hook.py
index dafa7d977..b75131449 100644
--- a/backend/packages/harness/deerflow/agents/memory/summarization_hook.py
+++ b/backend/packages/harness/deerflow/agents/memory/summarization_hook.py
@@ -5,12 +5,19 @@ from __future__ import annotations
 from deerflow.agents.memory.message_processing import detect_correction, detect_reinforcement, filter_messages_for_memory
 from deerflow.agents.memory.queue import get_memory_queue
 from deerflow.agents.middlewares.summarization_middleware import SummarizationEvent
-from deerflow.config.memory_config import get_memory_config
+from deerflow.config.app_config import AppConfig
 
 
 def memory_flush_hook(event: SummarizationEvent) -> None:
-    """Flush messages about to be summarized into the memory queue."""
-    if not get_memory_config().enabled or not event.thread_id:
+    """Flush messages about to be summarized into the memory queue.
+
+    Reads ``AppConfig`` from disk on every invocation. This hook is fired by
+    ``SummarizationMiddleware`` which has no ergonomic way to thread an
+    explicit ``app_config`` through; ``AppConfig.from_file()`` is a pure load
+    so the cost is acceptable for this rare pre-summarization callback.
+    """
+    app_config = AppConfig.from_file()
+    if not app_config.memory.enabled or not event.thread_id:
         return
 
     filtered_messages = filter_messages_for_memory(list(event.messages_to_summarize))
@@ -21,7 +28,7 @@ def memory_flush_hook(event: SummarizationEvent) -> None:
 
     correction_detected = detect_correction(filtered_messages)
     reinforcement_detected = not correction_detected and detect_reinforcement(filtered_messages)
-    queue = get_memory_queue()
+    queue = get_memory_queue(app_config)
     queue.add_nowait(
         thread_id=event.thread_id,
         messages=filtered_messages,
diff --git a/backend/packages/harness/deerflow/agents/memory/updater.py b/backend/packages/harness/deerflow/agents/memory/updater.py
index 7e782dcbc..fbdd5f3cc 100644
--- a/backend/packages/harness/deerflow/agents/memory/updater.py
+++ b/backend/packages/harness/deerflow/agents/memory/updater.py
@@ -21,7 +21,8 @@ from deerflow.agents.memory.storage import (
     get_memory_storage,
     utc_now_iso_z,
 )
-from deerflow.config.memory_config import get_memory_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.memory_config import MemoryConfig
 from deerflow.models import create_chat_model
 
 logger = logging.getLogger(__name__)
@@ -38,44 +39,33 @@ def _create_empty_memory() -> dict[str, Any]:
     return create_empty_memory()
 
 
-def _save_memory_to_file(memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
-    """Backward-compatible wrapper around the configured memory storage save path."""
-    return get_memory_storage().save(memory_data, agent_name)
+def _save_memory_to_file(memory_config: MemoryConfig, memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> bool:
+    """Save via the configured memory storage."""
+    return get_memory_storage(memory_config).save(memory_data, agent_name, user_id=user_id)
 
 
-def get_memory_data(agent_name: str | None = None) -> dict[str, Any]:
+def get_memory_data(memory_config: MemoryConfig, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
     """Get the current memory data via storage provider."""
-    return get_memory_storage().load(agent_name)
+    return get_memory_storage(memory_config).load(agent_name, user_id=user_id)
 
 
-def reload_memory_data(agent_name: str | None = None) -> dict[str, Any]:
+def reload_memory_data(memory_config: MemoryConfig, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
     """Reload memory data via storage provider."""
-    return get_memory_storage().reload(agent_name)
+    return get_memory_storage(memory_config).reload(agent_name, user_id=user_id)
 
 
-def import_memory_data(memory_data: dict[str, Any], agent_name: str | None = None) -> dict[str, Any]:
-    """Persist imported memory data via storage provider.
-
-    Args:
-        memory_data: Full memory payload to persist.
-        agent_name: If provided, imports into per-agent memory.
-
-    Returns:
-        The saved memory data after storage normalization.
-
-    Raises:
-        OSError: If persisting the imported memory fails.
-    """
-    storage = get_memory_storage()
-    if not storage.save(memory_data, agent_name):
+def import_memory_data(memory_config: MemoryConfig, memory_data: dict[str, Any], agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
+    """Persist imported memory data via storage provider."""
+    storage = get_memory_storage(memory_config)
+    if not storage.save(memory_data, agent_name, user_id=user_id):
         raise OSError("Failed to save imported memory data")
-    return storage.load(agent_name)
+    return storage.load(agent_name, user_id=user_id)
 
 
-def clear_memory_data(agent_name: str | None = None) -> dict[str, Any]:
+def clear_memory_data(memory_config: MemoryConfig, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
     """Clear all stored memory data and persist an empty structure."""
     cleared_memory = create_empty_memory()
-    if not _save_memory_to_file(cleared_memory, agent_name):
+    if not _save_memory_to_file(memory_config, cleared_memory, agent_name, user_id=user_id):
         raise OSError("Failed to save cleared memory data")
     return cleared_memory
 
@@ -88,10 +78,13 @@ def _validate_confidence(confidence: float) -> float:
 
 
 def create_memory_fact(
+    memory_config: MemoryConfig,
     content: str,
     category: str = "context",
     confidence: float = 0.5,
     agent_name: str | None = None,
+    *,
+    user_id: str | None = None,
 ) -> dict[str, Any]:
     """Create a new fact and persist the updated memory data."""
     normalized_content = content.strip()
@@ -101,7 +94,7 @@ def create_memory_fact(
     normalized_category = category.strip() or "context"
     validated_confidence = _validate_confidence(confidence)
     now = utc_now_iso_z()
-    memory_data = get_memory_data(agent_name)
+    memory_data = get_memory_data(memory_config, agent_name, user_id=user_id)
     updated_memory = dict(memory_data)
     facts = list(memory_data.get("facts", []))
     facts.append(
@@ -116,15 +109,15 @@ def create_memory_fact(
     )
     updated_memory["facts"] = facts
 
-    if not _save_memory_to_file(updated_memory, agent_name):
+    if not _save_memory_to_file(memory_config, updated_memory, agent_name, user_id=user_id):
         raise OSError("Failed to save memory data after creating fact")
 
     return updated_memory
 
 
-def delete_memory_fact(fact_id: str, agent_name: str | None = None) -> dict[str, Any]:
+def delete_memory_fact(memory_config: MemoryConfig, fact_id: str, agent_name: str | None = None, *, user_id: str | None = None) -> dict[str, Any]:
     """Delete a fact by its id and persist the updated memory data."""
-    memory_data = get_memory_data(agent_name)
+    memory_data = get_memory_data(memory_config, agent_name, user_id=user_id)
     facts = memory_data.get("facts", [])
     updated_facts = [fact for fact in facts if fact.get("id") != fact_id]
     if len(updated_facts) == len(facts):
@@ -133,21 +126,24 @@ def delete_memory_fact(fact_id: str, agent_name: str | None = None) -> dict[str,
     updated_memory = dict(memory_data)
     updated_memory["facts"] = updated_facts
 
-    if not _save_memory_to_file(updated_memory, agent_name):
+    if not _save_memory_to_file(memory_config, updated_memory, agent_name, user_id=user_id):
         raise OSError(f"Failed to save memory data after deleting fact '{fact_id}'")
 
     return updated_memory
 
 
 def update_memory_fact(
+    memory_config: MemoryConfig,
     fact_id: str,
     content: str | None = None,
     category: str | None = None,
     confidence: float | None = None,
     agent_name: str | None = None,
+    *,
+    user_id: str | None = None,
 ) -> dict[str, Any]:
     """Update an existing fact and persist the updated memory data."""
-    memory_data = get_memory_data(agent_name)
+    memory_data = get_memory_data(memory_config, agent_name, user_id=user_id)
     updated_memory = dict(memory_data)
     updated_facts: list[dict[str, Any]] = []
     found = False
@@ -174,7 +170,7 @@ def update_memory_fact(
 
     updated_memory["facts"] = updated_facts
 
-    if not _save_memory_to_file(updated_memory, agent_name):
+    if not _save_memory_to_file(memory_config, updated_memory, agent_name, user_id=user_id):
         raise OSError(f"Failed to save memory data after updating fact '{fact_id}'")
 
     return updated_memory
@@ -299,19 +295,25 @@ def _fact_content_key(content: Any) -> str | None:
 class MemoryUpdater:
     """Updates memory using LLM based on conversation context."""
 
-    def __init__(self, model_name: str | None = None):
+    def __init__(self, app_config: AppConfig, model_name: str | None = None):
         """Initialize the memory updater.
 
         Args:
+            app_config: Application config (the updater needs both ``memory``
+                section for behavior and the full config for ``create_chat_model``).
             model_name: Optional model name to use. If None, uses config or default.
         """
+        self._app_config = app_config
         self._model_name = model_name
 
+    @property
+    def _memory_config(self) -> MemoryConfig:
+        return self._app_config.memory
+
     def _get_model(self):
         """Get the model for memory updates."""
-        config = get_memory_config()
-        model_name = self._model_name or config.model_name
-        return create_chat_model(name=model_name, thinking_enabled=False)
+        model_name = self._model_name or self._memory_config.model_name
+        return create_chat_model(name=model_name, thinking_enabled=False, app_config=self._app_config)
 
     def _build_correction_hint(
         self,
@@ -344,13 +346,14 @@ class MemoryUpdater:
         agent_name: str | None,
         correction_detected: bool,
         reinforcement_detected: bool,
+        user_id: str | None = None,
     ) -> tuple[dict[str, Any], str] | None:
         """Load memory and build the update prompt for a conversation."""
-        config = get_memory_config()
+        config = self._memory_config
         if not config.enabled or not messages:
             return None
 
-        current_memory = get_memory_data(agent_name)
+        current_memory = get_memory_data(config, agent_name, user_id=user_id)
         conversation_text = format_conversation_for_update(messages)
         if not conversation_text.strip():
             return None
@@ -372,6 +375,7 @@ class MemoryUpdater:
         response_content: Any,
         thread_id: str | None,
         agent_name: str | None,
+        user_id: str | None = None,
     ) -> bool:
         """Parse the model response, apply updates, and persist memory."""
         response_text = _extract_text(response_content).strip()
@@ -385,7 +389,7 @@ class MemoryUpdater:
         # cannot corrupt the still-cached original object reference.
         updated_memory = self._apply_updates(copy.deepcopy(current_memory), update_data, thread_id)
         updated_memory = _strip_upload_mentions_from_memory(updated_memory)
-        return get_memory_storage().save(updated_memory, agent_name)
+        return get_memory_storage(self._memory_config).save(updated_memory, agent_name, user_id=user_id)
 
     async def aupdate_memory(
         self,
@@ -394,6 +398,7 @@ class MemoryUpdater:
         agent_name: str | None = None,
         correction_detected: bool = False,
         reinforcement_detected: bool = False,
+        user_id: str | None = None,
     ) -> bool:
         """Update memory asynchronously based on conversation messages."""
         try:
@@ -403,6 +408,7 @@ class MemoryUpdater:
                 agent_name=agent_name,
                 correction_detected=correction_detected,
                 reinforcement_detected=reinforcement_detected,
+                user_id=user_id,
             )
             if prepared is None:
                 return False
@@ -416,6 +422,7 @@ class MemoryUpdater:
                 response_content=response.content,
                 thread_id=thread_id,
                 agent_name=agent_name,
+                user_id=user_id,
             )
         except json.JSONDecodeError as e:
             logger.warning("Failed to parse LLM response for memory update: %s", e)
@@ -431,6 +438,7 @@ class MemoryUpdater:
         agent_name: str | None = None,
         correction_detected: bool = False,
         reinforcement_detected: bool = False,
+        user_id: str | None = None,
     ) -> bool:
         """Synchronously update memory via the async updater path.
 
@@ -440,19 +448,83 @@ class MemoryUpdater:
             agent_name: If provided, updates per-agent memory. If None, updates global memory.
             correction_detected: Whether recent turns include an explicit correction signal.
             reinforcement_detected: Whether recent turns include a positive reinforcement signal.
+            user_id: If provided, scopes memory to a specific user.
 
         Returns:
             True if update was successful, False otherwise.
         """
-        return _run_async_update_sync(
-            self.aupdate_memory(
-                messages=messages,
-                thread_id=thread_id,
-                agent_name=agent_name,
-                correction_detected=correction_detected,
-                reinforcement_detected=reinforcement_detected,
+        config = self._memory_config
+        if not config.enabled:
+            return False
+
+        if not messages:
+            return False
+
+        try:
+            # Get current memory
+            current_memory = get_memory_data(config, agent_name, user_id=user_id)
+
+            # Format conversation for prompt
+            conversation_text = format_conversation_for_update(messages)
+
+            if not conversation_text.strip():
+                return False
+
+            # Build prompt
+            correction_hint = ""
+            if correction_detected:
+                correction_hint = (
+                    "IMPORTANT: Explicit correction signals were detected in this conversation. "
+                    "Pay special attention to what the agent got wrong, what the user corrected, "
+                    "and record the correct approach as a fact with category "
+                    '"correction" and confidence >= 0.95 when appropriate.'
+                )
+            if reinforcement_detected:
+                reinforcement_hint = (
+                    "IMPORTANT: Positive reinforcement signals were detected in this conversation. "
+                    "The user explicitly confirmed the agent's approach was correct or helpful. "
+                    "Record the confirmed approach, style, or preference as a fact with category "
+                    '"preference" or "behavior" and confidence >= 0.9 when appropriate.'
+                )
+                correction_hint = (correction_hint + "\n" + reinforcement_hint).strip() if correction_hint else reinforcement_hint
+
+            prompt = MEMORY_UPDATE_PROMPT.format(
+                current_memory=json.dumps(current_memory, indent=2),
+                conversation=conversation_text,
+                correction_hint=correction_hint,
             )
-        )
+
+            # Call LLM
+            model = self._get_model()
+            response = model.invoke(prompt)
+            response_text = _extract_text(response.content).strip()
+
+            # Parse response
+            # Remove markdown code blocks if present
+            if response_text.startswith("```"):
+                lines = response_text.split("\n")
+                response_text = "\n".join(lines[1:-1] if lines[-1] == "```" else lines[1:])
+
+            update_data = json.loads(response_text)
+
+            # Apply updates
+            updated_memory = self._apply_updates(current_memory, update_data, thread_id)
+
+            # Strip file-upload mentions from all summaries before saving.
+            # Uploaded files are session-scoped and won't exist in future sessions,
+            # so recording upload events in long-term memory causes the agent to
+            # try (and fail) to locate those files in subsequent conversations.
+            updated_memory = _strip_upload_mentions_from_memory(updated_memory)
+
+            # Save
+            return get_memory_storage(config).save(updated_memory, agent_name, user_id=user_id)
+
+        except json.JSONDecodeError as e:
+            logger.warning("Failed to parse LLM response for memory update: %s", e)
+            return False
+        except Exception as e:
+            logger.exception("Memory update failed: %s", e)
+            return False
 
     def _apply_updates(
         self,
@@ -470,7 +542,7 @@ class MemoryUpdater:
         Returns:
             Updated memory data.
         """
-        config = get_memory_config()
+        config = self._memory_config
         now = utc_now_iso_z()
 
         # Update user sections
@@ -547,6 +619,7 @@ def update_memory_from_conversation(
     agent_name: str | None = None,
     correction_detected: bool = False,
     reinforcement_detected: bool = False,
+    user_id: str | None = None,
 ) -> bool:
     """Convenience function to update memory from a conversation.
 
@@ -556,9 +629,10 @@ def update_memory_from_conversation(
         agent_name: If provided, updates per-agent memory. If None, updates global memory.
         correction_detected: Whether recent turns include an explicit correction signal.
         reinforcement_detected: Whether recent turns include a positive reinforcement signal.
+        user_id: If provided, scopes memory to a specific user.
 
     Returns:
         True if successful, False otherwise.
     """
     updater = MemoryUpdater()
-    return updater.update_memory(messages, thread_id, agent_name, correction_detected, reinforcement_detected)
+    return updater.update_memory(messages, thread_id, agent_name, correction_detected, reinforcement_detected, user_id=user_id)
diff --git a/backend/packages/harness/deerflow/agents/middlewares/llm_error_handling_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/llm_error_handling_middleware.py
index 4ef9f5e7d..e3f4161e1 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/llm_error_handling_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/llm_error_handling_middleware.py
@@ -20,7 +20,7 @@ from langchain.agents.middleware.types import (
 from langchain_core.messages import AIMessage
 from langgraph.errors import GraphBubbleUp
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 
 logger = logging.getLogger(__name__)
 
@@ -78,7 +78,7 @@ class LLMErrorHandlingMiddleware(AgentMiddleware[AgentState]):
 
         # Load Circuit Breaker configs from app config if available, fall back to defaults
         try:
-            app_config = get_app_config()
+            app_config = AppConfig.from_file()
             self.circuit_failure_threshold = app_config.circuit_breaker.failure_threshold
             self.circuit_recovery_timeout_sec = app_config.circuit_breaker.recovery_timeout_sec
         except (FileNotFoundError, RuntimeError):
diff --git a/backend/packages/harness/deerflow/agents/middlewares/loop_detection_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/loop_detection_middleware.py
index 4c1ba28ec..be8eaae3d 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/loop_detection_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/loop_detection_middleware.py
@@ -25,6 +25,8 @@ from langchain.agents.middleware import AgentMiddleware
 from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime
 
+from deerflow.config.deer_flow_context import DeerFlowContext
+
 logger = logging.getLogger(__name__)
 
 # Defaults — can be overridden via constructor
@@ -181,12 +183,9 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
         self._tool_freq: dict[str, dict[str, int]] = defaultdict(lambda: defaultdict(int))
         self._tool_freq_warned: dict[str, set[str]] = defaultdict(set)
 
-    def _get_thread_id(self, runtime: Runtime) -> str:
+    def _get_thread_id(self, runtime: Runtime[DeerFlowContext]) -> str:
         """Extract thread_id from runtime context for per-thread tracking."""
-        thread_id = runtime.context.get("thread_id") if runtime.context else None
-        if thread_id:
-            return thread_id
-        return "default"
+        return runtime.context.thread_id or "default"
 
     def _evict_if_needed(self) -> None:
         """Evict least recently used threads if over the limit.
@@ -367,11 +366,11 @@ class LoopDetectionMiddleware(AgentMiddleware[AgentState]):
         return None
 
     @override
-    def after_model(self, state: AgentState, runtime: Runtime) -> dict | None:
+    def after_model(self, state: AgentState, runtime: Runtime[DeerFlowContext]) -> dict | None:
         return self._apply(state, runtime)
 
     @override
-    async def aafter_model(self, state: AgentState, runtime: Runtime) -> dict | None:
+    async def aafter_model(self, state: AgentState, runtime: Runtime[DeerFlowContext]) -> dict | None:
         return self._apply(state, runtime)
 
     def reset(self, thread_id: str | None = None) -> None:
diff --git a/backend/packages/harness/deerflow/agents/middlewares/memory_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/memory_middleware.py
index f1dccf689..263ff353d 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/memory_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/memory_middleware.py
@@ -5,12 +5,12 @@ from typing import override
 
 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
-from langgraph.config import get_config
 from langgraph.runtime import Runtime
 
 from deerflow.agents.memory.message_processing import detect_correction, detect_reinforcement, filter_messages_for_memory
 from deerflow.agents.memory.queue import get_memory_queue
-from deerflow.config.memory_config import get_memory_config
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.runtime.user_context import get_effective_user_id
 
 logger = logging.getLogger(__name__)
 
@@ -43,7 +43,7 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
         self._agent_name = agent_name
 
     @override
-    def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime) -> dict | None:
+    def after_agent(self, state: MemoryMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
         """Queue conversation for memory update after agent completes.
 
         Args:
@@ -53,15 +53,11 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
         Returns:
             None (no state changes needed from this middleware).
         """
-        config = get_memory_config()
-        if not config.enabled:
+        memory_config = runtime.context.app_config.memory
+        if not memory_config.enabled:
             return None
 
-        # Get thread ID from runtime context first, then fall back to LangGraph's configurable metadata
-        thread_id = runtime.context.get("thread_id") if runtime.context else None
-        if thread_id is None:
-            config_data = get_config()
-            thread_id = config_data.get("configurable", {}).get("thread_id")
+        thread_id = runtime.context.thread_id
         if not thread_id:
             logger.debug("No thread_id in context, skipping memory update")
             return None
@@ -86,11 +82,16 @@ class MemoryMiddleware(AgentMiddleware[MemoryMiddlewareState]):
         # Queue the filtered conversation for memory update
         correction_detected = detect_correction(filtered_messages)
         reinforcement_detected = not correction_detected and detect_reinforcement(filtered_messages)
-        queue = get_memory_queue()
+        # Capture user_id at enqueue time while the request context is still alive.
+        # threading.Timer fires on a different thread where ContextVar values are not
+        # propagated, so we must store user_id explicitly in ConversationContext.
+        user_id = get_effective_user_id()
+        queue = get_memory_queue(runtime.context.app_config)
         queue.add(
             thread_id=thread_id,
             messages=filtered_messages,
             agent_name=self._agent_name,
+            user_id=user_id,
             correction_detected=correction_detected,
             reinforcement_detected=reinforcement_detected,
         )
diff --git a/backend/packages/harness/deerflow/agents/middlewares/thread_data_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/thread_data_middleware.py
index c25531e02..df7cc0fc3 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/thread_data_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/thread_data_middleware.py
@@ -3,11 +3,12 @@ from typing import NotRequired, override
 
 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
-from langgraph.config import get_config
 from langgraph.runtime import Runtime
 
 from deerflow.agents.thread_state import ThreadDataState
+from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.config.paths import Paths, get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 
 logger = logging.getLogger(__name__)
 
@@ -46,50 +47,50 @@ class ThreadDataMiddleware(AgentMiddleware[ThreadDataMiddlewareState]):
         self._paths = Paths(base_dir) if base_dir else get_paths()
         self._lazy_init = lazy_init
 
-    def _get_thread_paths(self, thread_id: str) -> dict[str, str]:
+    def _get_thread_paths(self, thread_id: str, user_id: str | None = None) -> dict[str, str]:
         """Get the paths for a thread's data directories.
 
         Args:
             thread_id: The thread ID.
+            user_id: Optional user ID for per-user path isolation.
 
         Returns:
             Dictionary with workspace_path, uploads_path, and outputs_path.
         """
         return {
-            "workspace_path": str(self._paths.sandbox_work_dir(thread_id)),
-            "uploads_path": str(self._paths.sandbox_uploads_dir(thread_id)),
-            "outputs_path": str(self._paths.sandbox_outputs_dir(thread_id)),
+            "workspace_path": str(self._paths.sandbox_work_dir(thread_id, user_id=user_id)),
+            "uploads_path": str(self._paths.sandbox_uploads_dir(thread_id, user_id=user_id)),
+            "outputs_path": str(self._paths.sandbox_outputs_dir(thread_id, user_id=user_id)),
         }
 
-    def _create_thread_directories(self, thread_id: str) -> dict[str, str]:
+    def _create_thread_directories(self, thread_id: str, user_id: str | None = None) -> dict[str, str]:
         """Create the thread data directories.
 
         Args:
             thread_id: The thread ID.
+            user_id: Optional user ID for per-user path isolation.
 
         Returns:
             Dictionary with the created directory paths.
         """
-        self._paths.ensure_thread_dirs(thread_id)
-        return self._get_thread_paths(thread_id)
+        self._paths.ensure_thread_dirs(thread_id, user_id=user_id)
+        return self._get_thread_paths(thread_id, user_id=user_id)
 
     @override
-    def before_agent(self, state: ThreadDataMiddlewareState, runtime: Runtime) -> dict | None:
-        context = runtime.context or {}
-        thread_id = context.get("thread_id")
-        if thread_id is None:
-            config = get_config()
-            thread_id = config.get("configurable", {}).get("thread_id")
+    def before_agent(self, state: ThreadDataMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
+        thread_id = runtime.context.thread_id
 
-        if thread_id is None:
+        if not thread_id:
             raise ValueError("Thread ID is required in runtime context or config.configurable")
 
+        user_id = get_effective_user_id()
+
         if self._lazy_init:
             # Lazy initialization: only compute paths, don't create directories
-            paths = self._get_thread_paths(thread_id)
+            paths = self._get_thread_paths(thread_id, user_id=user_id)
         else:
             # Eager initialization: create directories immediately
-            paths = self._create_thread_directories(thread_id)
+            paths = self._create_thread_directories(thread_id, user_id=user_id)
             logger.debug("Created thread data directories for thread %s", thread_id)
 
         return {
diff --git a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py
index c17b46387..a0fcfad00 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py
@@ -2,13 +2,16 @@
 
 import logging
 import re
-from typing import NotRequired, override
+from typing import Any, NotRequired, override
 
 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
+from langgraph.config import get_config
 from langgraph.runtime import Runtime
 
-from deerflow.config.title_config import get_title_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.title_config import TitleConfig
 from deerflow.models import create_chat_model
 
 logger = logging.getLogger(__name__)
@@ -44,10 +47,9 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
 
         return ""
 
-    def _should_generate_title(self, state: TitleMiddlewareState) -> bool:
+    def _should_generate_title(self, state: TitleMiddlewareState, title_config: TitleConfig) -> bool:
         """Check if we should generate a title for this thread."""
-        config = get_title_config()
-        if not config.enabled:
+        if not title_config.enabled:
             return False
 
         # Check if thread already has a title in state
@@ -66,12 +68,11 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
         # Generate title after first complete exchange
         return len(user_messages) == 1 and len(assistant_messages) >= 1
 
-    def _build_title_prompt(self, state: TitleMiddlewareState) -> tuple[str, str]:
+    def _build_title_prompt(self, state: TitleMiddlewareState, title_config: TitleConfig) -> tuple[str, str]:
         """Extract user/assistant messages and build the title prompt.
 
         Returns (prompt_string, user_msg) so callers can use user_msg as fallback.
         """
-        config = get_title_config()
         messages = state.get("messages", [])
 
         user_msg_content = next((m.content for m in messages if m.type == "human"), "")
@@ -80,8 +81,8 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
         user_msg = self._normalize_content(user_msg_content)
         assistant_msg = self._strip_think_tags(self._normalize_content(assistant_msg_content))
 
-        prompt = config.prompt_template.format(
-            max_words=config.max_words,
+        prompt = title_config.prompt_template.format(
+            max_words=title_config.max_words,
             user_msg=user_msg[:500],
             assistant_msg=assistant_msg[:500],
         )
@@ -91,54 +92,66 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]):
         """Remove <think>...</think> blocks emitted by reasoning models (e.g. minimax, DeepSeek-R1)."""
         return re.sub(r"<think>[\s\S]*?</think>", "", text, flags=re.IGNORECASE).strip()
 
-    def _parse_title(self, content: object) -> str:
+    def _parse_title(self, content: object, title_config: TitleConfig) -> str:
         """Normalize model output into a clean title string."""
-        config = get_title_config()
         title_content = self._normalize_content(content)
         title_content = self._strip_think_tags(title_content)
         title = title_content.strip().strip('"').strip("'")
-        return title[: config.max_chars] if len(title) > config.max_chars else title
+        return title[: title_config.max_chars] if len(title) > title_config.max_chars else title
 
-    def _fallback_title(self, user_msg: str) -> str:
-        config = get_title_config()
-        fallback_chars = min(config.max_chars, 50)
+    def _fallback_title(self, user_msg: str, title_config: TitleConfig) -> str:
+        fallback_chars = min(title_config.max_chars, 50)
         if len(user_msg) > fallback_chars:
             return user_msg[:fallback_chars].rstrip() + "..."
         return user_msg if user_msg else "New Conversation"
 
-    def _generate_title_result(self, state: TitleMiddlewareState) -> dict | None:
+    def _get_runnable_config(self) -> dict[str, Any]:
+        """Inherit the parent RunnableConfig and add middleware tag.
+
+        This ensures RunJournal identifies LLM calls from this middleware
+        as ``middleware:title`` instead of ``lead_agent``.
+        """
+        try:
+            parent = get_config()
+        except Exception:
+            parent = {}
+        config = {**parent}
+        config["tags"] = [*(config.get("tags") or []), "middleware:title"]
+        return config
+
+    def _generate_title_result(self, state: TitleMiddlewareState, title_config: TitleConfig) -> dict | None:
         """Generate a local fallback title without blocking on an LLM call."""
-        if not self._should_generate_title(state):
+        if not self._should_generate_title(state, title_config):
             return None
 
-        _, user_msg = self._build_title_prompt(state)
-        return {"title": self._fallback_title(user_msg)}
+        _, user_msg = self._build_title_prompt(state, title_config)
+        return {"title": self._fallback_title(user_msg, title_config)}
 
-    async def _agenerate_title_result(self, state: TitleMiddlewareState) -> dict | None:
+    async def _agenerate_title_result(self, state: TitleMiddlewareState, app_config: AppConfig) -> dict | None:
         """Generate a title asynchronously and fall back locally on failure."""
-        if not self._should_generate_title(state):
+        title_config = app_config.title
+        if not self._should_generate_title(state, title_config):
             return None
 
-        config = get_title_config()
-        prompt, user_msg = self._build_title_prompt(state)
+        prompt, user_msg = self._build_title_prompt(state, title_config)
 
         try:
-            if config.model_name:
-                model = create_chat_model(name=config.model_name, thinking_enabled=False)
+            if title_config.model_name:
+                model = create_chat_model(name=title_config.model_name, thinking_enabled=False, app_config=app_config)
             else:
-                model = create_chat_model(thinking_enabled=False)
-            response = await model.ainvoke(prompt, config={"run_name": "title_agent"})
-            title = self._parse_title(response.content)
+                model = create_chat_model(thinking_enabled=False, app_config=app_config)
+            response = await model.ainvoke(prompt, config=self._get_runnable_config())
+            title = self._parse_title(response.content, title_config)
             if title:
                 return {"title": title}
         except Exception:
             logger.debug("Failed to generate async title; falling back to local title", exc_info=True)
-        return {"title": self._fallback_title(user_msg)}
+        return {"title": self._fallback_title(user_msg, title_config)}
 
     @override
-    def after_model(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
-        return self._generate_title_result(state)
+    def after_model(self, state: TitleMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
+        return self._generate_title_result(state, runtime.context.app_config.title)
 
     @override
-    async def aafter_model(self, state: TitleMiddlewareState, runtime: Runtime) -> dict | None:
-        return await self._agenerate_title_result(state)
+    async def aafter_model(self, state: TitleMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
+        return await self._agenerate_title_result(state, runtime.context.app_config)
diff --git a/backend/packages/harness/deerflow/agents/middlewares/tool_error_handling_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/tool_error_handling_middleware.py
index 52be28bfb..5ddbb4bbd 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/tool_error_handling_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/tool_error_handling_middleware.py
@@ -1,8 +1,10 @@
 """Tool error handling middleware and shared runtime middleware builders."""
 
+from __future__ import annotations
+
 import logging
 from collections.abc import Awaitable, Callable
-from typing import override
+from typing import TYPE_CHECKING, override
 
 from langchain.agents import AgentState
 from langchain.agents.middleware import AgentMiddleware
@@ -11,6 +13,9 @@ from langgraph.errors import GraphBubbleUp
 from langgraph.prebuilt.tool_node import ToolCallRequest
 from langgraph.types import Command
 
+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+
 logger = logging.getLogger(__name__)
 
 _MISSING_TOOL_CALL_ID = "missing_tool_call_id"
@@ -67,6 +72,7 @@ class ToolErrorHandlingMiddleware(AgentMiddleware[AgentState]):
 
 def _build_runtime_middlewares(
     *,
+    app_config: "AppConfig",
     include_uploads: bool,
     include_dangling_tool_call_patch: bool,
     lazy_init: bool = True,
@@ -94,9 +100,7 @@ def _build_runtime_middlewares(
     middlewares.append(LLMErrorHandlingMiddleware())
 
     # Guardrail middleware (if configured)
-    from deerflow.config.guardrails_config import get_guardrails_config
-
-    guardrails_config = get_guardrails_config()
+    guardrails_config = app_config.guardrails
     if guardrails_config.enabled and guardrails_config.provider:
         import inspect
 
@@ -125,9 +129,10 @@ def _build_runtime_middlewares(
     return middlewares
 
 
-def build_lead_runtime_middlewares(*, lazy_init: bool = True) -> list[AgentMiddleware]:
+def build_lead_runtime_middlewares(*, app_config: "AppConfig", lazy_init: bool = True) -> list[AgentMiddleware]:
     """Middlewares shared by lead agent runtime before lead-only middlewares."""
     return _build_runtime_middlewares(
+        app_config=app_config,
         include_uploads=True,
         include_dangling_tool_call_patch=True,
         lazy_init=lazy_init,
diff --git a/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py
index 0fb217bcc..c0f3774b7 100644
--- a/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/uploads_middleware.py
@@ -9,7 +9,9 @@ from langchain.agents.middleware import AgentMiddleware
 from langchain_core.messages import HumanMessage
 from langgraph.runtime import Runtime
 
+from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.config.paths import Paths, get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.utils.file_conversion import extract_outline
 
 logger = logging.getLogger(__name__)
@@ -184,7 +186,7 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
         return files if files else None
 
     @override
-    def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime) -> dict | None:
+    def before_agent(self, state: UploadsMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
         """Inject uploaded files information before agent execution.
 
         New files come from the current message's additional_kwargs.files.
@@ -213,15 +215,8 @@ class UploadsMiddleware(AgentMiddleware[UploadsMiddlewareState]):
             return None
 
         # Resolve uploads directory for existence checks
-        thread_id = (runtime.context or {}).get("thread_id")
-        if thread_id is None:
-            try:
-                from langgraph.config import get_config
-
-                thread_id = get_config().get("configurable", {}).get("thread_id")
-            except RuntimeError:
-                pass  # get_config() raises outside a runnable context (e.g. unit tests)
-        uploads_dir = self._paths.sandbox_uploads_dir(thread_id) if thread_id else None
+        thread_id = runtime.context.thread_id
+        uploads_dir = self._paths.sandbox_uploads_dir(thread_id, user_id=get_effective_user_id()) if thread_id else None
 
         # Get newly uploaded files from the current message's additional_kwargs.files
         new_files = self._files_from_kwargs(last_message, uploads_dir) or []
diff --git a/backend/packages/harness/deerflow/client.py b/backend/packages/harness/deerflow/client.py
index a26d838af..648bebc86 100644
--- a/backend/packages/harness/deerflow/client.py
+++ b/backend/packages/harness/deerflow/client.py
@@ -36,10 +36,12 @@ from deerflow.agents.lead_agent.agent import _build_middlewares
 from deerflow.agents.lead_agent.prompt import apply_prompt_template
 from deerflow.agents.thread_state import ThreadState
 from deerflow.config.agents_config import AGENT_NAME_PATTERN
-from deerflow.config.app_config import get_app_config, reload_app_config
-from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.extensions_config import ExtensionsConfig
 from deerflow.config.paths import get_paths
 from deerflow.models import create_chat_model
+from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.skills.installer import install_skill_from_archive
 from deerflow.uploads.manager import (
     claim_unique_filename,
@@ -115,6 +117,7 @@ class DeerFlowClient:
         config_path: str | None = None,
         checkpointer=None,
         *,
+        config: AppConfig | None = None,
         model_name: str | None = None,
         thinking_enabled: bool = True,
         subagent_enabled: bool = False,
@@ -129,9 +132,14 @@ class DeerFlowClient:
 
         Args:
             config_path: Path to config.yaml. Uses default resolution if None.
+                Ignored when ``config`` is provided.
             checkpointer: LangGraph checkpointer instance for state persistence.
                 Required for multi-turn conversations on the same thread_id.
                 Without a checkpointer, each call is stateless.
+            config: Optional pre-constructed AppConfig. When provided, it takes
+                precedence over ``config_path`` and no file is read. Enables
+                multi-client isolation: two clients with different configs can
+                coexist in the same process without touching process-global state.
             model_name: Override the default model name from config.
             thinking_enabled: Enable model's extended thinking.
             subagent_enabled: Enable subagent delegation.
@@ -140,9 +148,18 @@ class DeerFlowClient:
             available_skills: Optional set of skill names to make available. If None (default), all scanned skills are available.
             middlewares: Optional list of custom middlewares to inject into the agent.
         """
-        if config_path is not None:
-            reload_app_config(config_path)
-        self._app_config = get_app_config()
+        # Constructor-captured config: the client owns its AppConfig for its lifetime.
+        # Multiple clients with different configs do not contend.
+        #
+        # Priority: explicit ``config=`` > explicit ``config_path=`` > ``AppConfig.from_file()``
+        # with default path resolution. There is no ambient global fallback; if
+        # config.yaml cannot be located, ``from_file`` raises loudly.
+        if config is not None:
+            self._app_config = config
+        elif config_path is not None:
+            self._app_config = AppConfig.from_file(config_path)
+        else:
+            self._app_config = AppConfig.from_file()
 
         if agent_name is not None and not AGENT_NAME_PATTERN.match(agent_name):
             raise ValueError(f"Invalid agent name '{agent_name}'. Must match pattern: {AGENT_NAME_PATTERN.pattern}")
@@ -170,6 +187,15 @@ class DeerFlowClient:
         self._agent = None
         self._agent_config_key = None
 
+    def _reload_config(self) -> None:
+        """Reload config from file and refresh the cached reference.
+
+        Only the client's own ``_app_config`` is rebuilt. Other clients
+        and the process-global are untouched, so multi-client coexistence
+        survives reload.
+        """
+        self._app_config = AppConfig.from_file()
+
     # ------------------------------------------------------------------
     # Internal helpers
     # ------------------------------------------------------------------
@@ -227,10 +253,11 @@ class DeerFlowClient:
         max_concurrent_subagents = cfg.get("max_concurrent_subagents", 3)
 
         kwargs: dict[str, Any] = {
-            "model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled),
+            "model": create_chat_model(name=model_name, thinking_enabled=thinking_enabled, app_config=self._app_config),
             "tools": self._get_tools(model_name=model_name, subagent_enabled=subagent_enabled),
-            "middleware": _build_middlewares(config, model_name=model_name, agent_name=self._agent_name, custom_middlewares=self._middlewares),
+            "middleware": _build_middlewares(self._app_config, config, model_name=model_name, agent_name=self._agent_name, custom_middlewares=self._middlewares),
             "system_prompt": apply_prompt_template(
+                self._app_config,
                 subagent_enabled=subagent_enabled,
                 max_concurrent_subagents=max_concurrent_subagents,
                 agent_name=self._agent_name,
@@ -240,9 +267,9 @@ class DeerFlowClient:
         }
         checkpointer = self._checkpointer
         if checkpointer is None:
-            from deerflow.agents.checkpointer import get_checkpointer
+            from deerflow.runtime.checkpointer import get_checkpointer
 
-            checkpointer = get_checkpointer()
+            checkpointer = get_checkpointer(self._app_config)
         if checkpointer is not None:
             kwargs["checkpointer"] = checkpointer
 
@@ -250,12 +277,11 @@ class DeerFlowClient:
         self._agent_config_key = key
         logger.info("Agent created: agent_name=%s, model=%s, thinking=%s", self._agent_name, model_name, thinking_enabled)
 
-    @staticmethod
-    def _get_tools(*, model_name: str | None, subagent_enabled: bool):
+    def _get_tools(self, *, model_name: str | None, subagent_enabled: bool):
         """Lazy import to avoid circular dependency at module level."""
         from deerflow.tools import get_available_tools
 
-        return get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled)
+        return get_available_tools(model_name=model_name, subagent_enabled=subagent_enabled, app_config=self._app_config)
 
     @staticmethod
     def _serialize_tool_calls(tool_calls) -> list[dict]:
@@ -374,9 +400,9 @@ class DeerFlowClient:
         """
         checkpointer = self._checkpointer
         if checkpointer is None:
-            from deerflow.agents.checkpointer.provider import get_checkpointer
+            from deerflow.runtime.checkpointer.provider import get_checkpointer
 
-            checkpointer = get_checkpointer()
+            checkpointer = get_checkpointer(self._app_config)
 
         thread_info_map = {}
 
@@ -429,9 +455,9 @@ class DeerFlowClient:
         """
         checkpointer = self._checkpointer
         if checkpointer is None:
-            from deerflow.agents.checkpointer.provider import get_checkpointer
+            from deerflow.runtime.checkpointer.provider import get_checkpointer
 
-            checkpointer = get_checkpointer()
+            checkpointer = get_checkpointer(self._app_config)
 
         config = {"configurable": {"thread_id": thread_id}}
         checkpoints = []
@@ -551,9 +577,7 @@ class DeerFlowClient:
         self._ensure_agent(config)
 
         state: dict[str, Any] = {"messages": [HumanMessage(content=message)]}
-        context = {"thread_id": thread_id}
-        if self._agent_name:
-            context["agent_name"] = self._agent_name
+        context = DeerFlowContext(app_config=self._app_config, thread_id=thread_id, agent_name=self._agent_name)
 
         seen_ids: set[str] = set()
         # Cross-mode handoff: ids already streamed via LangGraph ``messages``
@@ -762,7 +786,7 @@ class DeerFlowClient:
                     "category": s.category,
                     "enabled": s.enabled,
                 }
-                for s in load_skills(enabled_only=enabled_only)
+                for s in load_skills(self._app_config, enabled_only=enabled_only)
             ]
         }
 
@@ -774,19 +798,19 @@ class DeerFlowClient:
         """
         from deerflow.agents.memory.updater import get_memory_data
 
-        return get_memory_data()
+        return get_memory_data(self._app_config.memory, user_id=get_effective_user_id())
 
     def export_memory(self) -> dict:
         """Export current memory data for backup or transfer."""
         from deerflow.agents.memory.updater import get_memory_data
 
-        return get_memory_data()
+        return get_memory_data(self._app_config.memory, user_id=get_effective_user_id())
 
     def import_memory(self, memory_data: dict) -> dict:
         """Import and persist full memory data."""
         from deerflow.agents.memory.updater import import_memory_data
 
-        return import_memory_data(memory_data)
+        return import_memory_data(self._app_config.memory, memory_data, user_id=get_effective_user_id())
 
     def get_model(self, name: str) -> dict | None:
         """Get a specific model's configuration by name.
@@ -821,8 +845,8 @@ class DeerFlowClient:
             Dict with "mcp_servers" key mapping server name to config,
             matching the Gateway API ``McpConfigResponse`` schema.
         """
-        config = get_extensions_config()
-        return {"mcp_servers": {name: server.model_dump() for name, server in config.mcp_servers.items()}}
+        ext = self._app_config.extensions
+        return {"mcp_servers": {name: server.model_dump() for name, server in ext.mcp_servers.items()}}
 
     def update_mcp_config(self, mcp_servers: dict[str, dict]) -> dict:
         """Update MCP server configurations.
@@ -844,18 +868,19 @@ class DeerFlowClient:
         if config_path is None:
             raise FileNotFoundError("Cannot locate extensions_config.json. Set DEER_FLOW_EXTENSIONS_CONFIG_PATH or ensure it exists in the project root.")
 
-        current_config = get_extensions_config()
+        current_ext = self._app_config.extensions
 
         config_data = {
             "mcpServers": mcp_servers,
-            "skills": {name: {"enabled": skill.enabled} for name, skill in current_config.skills.items()},
+            "skills": {name: {"enabled": skill.enabled} for name, skill in current_ext.skills.items()},
         }
 
         self._atomic_write_json(config_path, config_data)
 
         self._agent = None
         self._agent_config_key = None
-        reloaded = reload_extensions_config()
+        self._reload_config()
+        reloaded = self._app_config.extensions
         return {"mcp_servers": {name: server.model_dump() for name, server in reloaded.mcp_servers.items()}}
 
     # ------------------------------------------------------------------
@@ -873,7 +898,7 @@ class DeerFlowClient:
         """
         from deerflow.skills.loader import load_skills
 
-        skill = next((s for s in load_skills(enabled_only=False) if s.name == name), None)
+        skill = next((s for s in load_skills(self._app_config, enabled_only=False) if s.name == name), None)
         if skill is None:
             return None
         return {
@@ -900,7 +925,7 @@ class DeerFlowClient:
         """
         from deerflow.skills.loader import load_skills
 
-        skills = load_skills(enabled_only=False)
+        skills = load_skills(self._app_config, enabled_only=False)
         skill = next((s for s in skills if s.name == name), None)
         if skill is None:
             raise ValueError(f"Skill '{name}' not found")
@@ -909,21 +934,25 @@ class DeerFlowClient:
         if config_path is None:
             raise FileNotFoundError("Cannot locate extensions_config.json. Set DEER_FLOW_EXTENSIONS_CONFIG_PATH or ensure it exists in the project root.")
 
-        extensions_config = get_extensions_config()
-        extensions_config.skills[name] = SkillStateConfig(enabled=enabled)
+        # Do not mutate self._app_config (frozen value). Compose the new
+        # skills state in a fresh dict, write it to disk, and let _reload_config()
+        # below rebuild AppConfig from the updated file.
+        ext = self._app_config.extensions
+        new_skills = {n: {"enabled": sc.enabled} for n, sc in ext.skills.items()}
+        new_skills[name] = {"enabled": enabled}
 
         config_data = {
-            "mcpServers": {n: s.model_dump() for n, s in extensions_config.mcp_servers.items()},
-            "skills": {n: {"enabled": sc.enabled} for n, sc in extensions_config.skills.items()},
+            "mcpServers": {n: s.model_dump() for n, s in ext.mcp_servers.items()},
+            "skills": new_skills,
         }
 
         self._atomic_write_json(config_path, config_data)
 
         self._agent = None
         self._agent_config_key = None
-        reload_extensions_config()
+        self._reload_config()
 
-        updated = next((s for s in load_skills(enabled_only=False) if s.name == name), None)
+        updated = next((s for s in load_skills(self._app_config, enabled_only=False) if s.name == name), None)
         if updated is None:
             raise RuntimeError(f"Skill '{name}' disappeared after update")
         return {
@@ -961,25 +990,25 @@ class DeerFlowClient:
         """
         from deerflow.agents.memory.updater import reload_memory_data
 
-        return reload_memory_data()
+        return reload_memory_data(self._app_config.memory, user_id=get_effective_user_id())
 
     def clear_memory(self) -> dict:
         """Clear all persisted memory data."""
         from deerflow.agents.memory.updater import clear_memory_data
 
-        return clear_memory_data()
+        return clear_memory_data(self._app_config.memory, user_id=get_effective_user_id())
 
     def create_memory_fact(self, content: str, category: str = "context", confidence: float = 0.5) -> dict:
         """Create a single fact manually."""
         from deerflow.agents.memory.updater import create_memory_fact
 
-        return create_memory_fact(content=content, category=category, confidence=confidence)
+        return create_memory_fact(self._app_config.memory, content=content, category=category, confidence=confidence)
 
     def delete_memory_fact(self, fact_id: str) -> dict:
         """Delete a single fact from memory by fact id."""
         from deerflow.agents.memory.updater import delete_memory_fact
 
-        return delete_memory_fact(fact_id)
+        return delete_memory_fact(self._app_config.memory, fact_id)
 
     def update_memory_fact(
         self,
@@ -992,6 +1021,7 @@ class DeerFlowClient:
         from deerflow.agents.memory.updater import update_memory_fact
 
         return update_memory_fact(
+            self._app_config.memory,
             fact_id=fact_id,
             content=content,
             category=category,
@@ -1004,9 +1034,7 @@ class DeerFlowClient:
         Returns:
             Memory config dict.
         """
-        from deerflow.config.memory_config import get_memory_config
-
-        config = get_memory_config()
+        config = self._app_config.memory
         return {
             "enabled": config.enabled,
             "storage_path": config.storage_path,
@@ -1184,7 +1212,7 @@ class DeerFlowClient:
             ValueError: If the path is invalid.
         """
         try:
-            actual = get_paths().resolve_virtual_path(thread_id, path)
+            actual = get_paths().resolve_virtual_path(thread_id, path, user_id=get_effective_user_id())
         except ValueError as exc:
             if "traversal" in str(exc):
                 from deerflow.uploads.manager import PathTraversalError
diff --git a/backend/packages/harness/deerflow/community/aio_sandbox/aio_sandbox_provider.py b/backend/packages/harness/deerflow/community/aio_sandbox/aio_sandbox_provider.py
index 952b6731b..680380bd1 100644
--- a/backend/packages/harness/deerflow/community/aio_sandbox/aio_sandbox_provider.py
+++ b/backend/packages/harness/deerflow/community/aio_sandbox/aio_sandbox_provider.py
@@ -25,8 +25,9 @@ except ImportError:  # pragma: no cover - Windows fallback
     fcntl = None  # type: ignore[assignment]
     import msvcrt
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 from deerflow.sandbox.sandbox import Sandbox
 from deerflow.sandbox.sandbox_provider import SandboxProvider
 
@@ -89,7 +90,8 @@ class AioSandboxProvider(SandboxProvider):
           API_KEY: $MY_API_KEY
     """
 
-    def __init__(self):
+    def __init__(self, app_config: "AppConfig"):
+        self._app_config = app_config
         self._lock = threading.Lock()
         self._sandboxes: dict[str, AioSandbox] = {}  # sandbox_id -> AioSandbox instance
         self._sandbox_infos: dict[str, SandboxInfo] = {}  # sandbox_id -> SandboxInfo (for destroy)
@@ -158,8 +160,7 @@ class AioSandboxProvider(SandboxProvider):
 
     def _load_config(self) -> dict:
         """Load sandbox configuration from app config."""
-        config = get_app_config()
-        sandbox_config = config.sandbox
+        sandbox_config = self._app_config.sandbox
 
         idle_timeout = getattr(sandbox_config, "idle_timeout", None)
         replicas = getattr(sandbox_config, "replicas", None)
@@ -270,28 +271,27 @@ class AioSandboxProvider(SandboxProvider):
         mounted Docker socket (DooD), the host Docker daemon can resolve the paths.
         """
         paths = get_paths()
-        paths.ensure_thread_dirs(thread_id)
+        user_id = get_effective_user_id()
+        paths.ensure_thread_dirs(thread_id, user_id=user_id)
 
         return [
-            (paths.host_sandbox_work_dir(thread_id), f"{VIRTUAL_PATH_PREFIX}/workspace", False),
-            (paths.host_sandbox_uploads_dir(thread_id), f"{VIRTUAL_PATH_PREFIX}/uploads", False),
-            (paths.host_sandbox_outputs_dir(thread_id), f"{VIRTUAL_PATH_PREFIX}/outputs", False),
+            (paths.host_sandbox_work_dir(thread_id, user_id=user_id), f"{VIRTUAL_PATH_PREFIX}/workspace", False),
+            (paths.host_sandbox_uploads_dir(thread_id, user_id=user_id), f"{VIRTUAL_PATH_PREFIX}/uploads", False),
+            (paths.host_sandbox_outputs_dir(thread_id, user_id=user_id), f"{VIRTUAL_PATH_PREFIX}/outputs", False),
             # ACP workspace: read-only inside the sandbox (lead agent reads results;
             # the ACP subprocess writes from the host side, not from within the container).
-            (paths.host_acp_workspace_dir(thread_id), "/mnt/acp-workspace", True),
+            (paths.host_acp_workspace_dir(thread_id, user_id=user_id), "/mnt/acp-workspace", True),
         ]
 
-    @staticmethod
-    def _get_skills_mount() -> tuple[str, str, bool] | None:
+    def _get_skills_mount(self) -> tuple[str, str, bool] | None:
         """Get the skills directory mount configuration.
 
         Mount source uses DEER_FLOW_HOST_SKILLS_PATH when running inside Docker (DooD)
         so the host Docker daemon can resolve the path.
         """
         try:
-            config = get_app_config()
-            skills_path = config.skills.get_skills_path()
-            container_path = config.skills.container_path
+            skills_path = self._app_config.skills.get_skills_path()
+            container_path = self._app_config.skills.container_path
 
             if skills_path.exists():
                 # When running inside Docker with DooD, use host-side skills path.
@@ -490,8 +490,9 @@ class AioSandboxProvider(SandboxProvider):
         across multiple processes, preventing container-name conflicts.
         """
         paths = get_paths()
-        paths.ensure_thread_dirs(thread_id)
-        lock_path = paths.thread_dir(thread_id) / f"{sandbox_id}.lock"
+        user_id = get_effective_user_id()
+        paths.ensure_thread_dirs(thread_id, user_id=user_id)
+        lock_path = paths.thread_dir(thread_id, user_id=user_id) / f"{sandbox_id}.lock"
 
         with open(lock_path, "a", encoding="utf-8") as lock_file:
             locked = False
diff --git a/backend/packages/harness/deerflow/community/ddg_search/tools.py b/backend/packages/harness/deerflow/community/ddg_search/tools.py
index 7639fe8ec..437e41d6c 100644
--- a/backend/packages/harness/deerflow/community/ddg_search/tools.py
+++ b/backend/packages/harness/deerflow/community/ddg_search/tools.py
@@ -5,9 +5,9 @@ Web Search Tool - Search the web using DuckDuckGo (no API key required).
 import json
 import logging
 
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 
-from deerflow.config import get_app_config
+from deerflow.config.deer_flow_context import resolve_context
 
 logger = logging.getLogger(__name__)
 
@@ -55,6 +55,7 @@ def _search_text(
 @tool("web_search", parse_docstring=True)
 def web_search_tool(
     query: str,
+    runtime: ToolRuntime,
     max_results: int = 5,
 ) -> str:
     """Search the web for information. Use this tool to find current information, news, articles, and facts from the internet.
@@ -63,11 +64,11 @@ def web_search_tool(
         query: Search keywords describing what you want to find. Be specific for better results.
         max_results: Maximum number of results to return. Default is 5.
     """
-    config = get_app_config().get_tool_config("web_search")
+    tool_config = resolve_context(runtime).app_config.get_tool_config("web_search")
 
     # Override max_results from config if set
-    if config is not None and "max_results" in config.model_extra:
-        max_results = config.model_extra.get("max_results", max_results)
+    if tool_config is not None and "max_results" in tool_config.model_extra:
+        max_results = tool_config.model_extra.get("max_results", max_results)
 
     results = _search_text(
         query=query,
diff --git a/backend/packages/harness/deerflow/community/exa/tools.py b/backend/packages/harness/deerflow/community/exa/tools.py
index 974280402..e1eb7372e 100644
--- a/backend/packages/harness/deerflow/community/exa/tools.py
+++ b/backend/packages/harness/deerflow/community/exa/tools.py
@@ -1,37 +1,39 @@
 import json
 
 from exa_py import Exa
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import resolve_context
 
 
-def _get_exa_client(tool_name: str = "web_search") -> Exa:
-    config = get_app_config().get_tool_config(tool_name)
+def _get_exa_client(app_config: AppConfig, tool_name: str = "web_search") -> Exa:
+    tool_config = app_config.get_tool_config(tool_name)
     api_key = None
-    if config is not None and "api_key" in config.model_extra:
-        api_key = config.model_extra.get("api_key")
+    if tool_config is not None and "api_key" in tool_config.model_extra:
+        api_key = tool_config.model_extra.get("api_key")
     return Exa(api_key=api_key)
 
 
 @tool("web_search", parse_docstring=True)
-def web_search_tool(query: str) -> str:
+def web_search_tool(query: str, runtime: ToolRuntime) -> str:
     """Search the web.
 
     Args:
         query: The query to search for.
     """
     try:
-        config = get_app_config().get_tool_config("web_search")
+        app_config = resolve_context(runtime).app_config
+        tool_config = app_config.get_tool_config("web_search")
         max_results = 5
         search_type = "auto"
         contents_max_characters = 1000
-        if config is not None:
-            max_results = config.model_extra.get("max_results", max_results)
-            search_type = config.model_extra.get("search_type", search_type)
-            contents_max_characters = config.model_extra.get("contents_max_characters", contents_max_characters)
+        if tool_config is not None:
+            max_results = tool_config.model_extra.get("max_results", max_results)
+            search_type = tool_config.model_extra.get("search_type", search_type)
+            contents_max_characters = tool_config.model_extra.get("contents_max_characters", contents_max_characters)
 
-        client = _get_exa_client()
+        client = _get_exa_client(app_config)
         res = client.search(
             query,
             type=search_type,
@@ -54,7 +56,7 @@ def web_search_tool(query: str) -> str:
 
 
 @tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str) -> str:
+def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
     """Fetch the contents of a web page at a given URL.
     Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
     This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -65,7 +67,7 @@ def web_fetch_tool(url: str) -> str:
         url: The URL to fetch the contents of.
     """
     try:
-        client = _get_exa_client("web_fetch")
+        client = _get_exa_client(resolve_context(runtime).app_config, "web_fetch")
         res = client.get_contents([url], text={"max_characters": 4096})
 
         if res.results:
diff --git a/backend/packages/harness/deerflow/community/firecrawl/tools.py b/backend/packages/harness/deerflow/community/firecrawl/tools.py
index 86f44150a..38445e893 100644
--- a/backend/packages/harness/deerflow/community/firecrawl/tools.py
+++ b/backend/packages/harness/deerflow/community/firecrawl/tools.py
@@ -1,33 +1,35 @@
 import json
 
 from firecrawl import FirecrawlApp
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import resolve_context
 
 
-def _get_firecrawl_client(tool_name: str = "web_search") -> FirecrawlApp:
-    config = get_app_config().get_tool_config(tool_name)
+def _get_firecrawl_client(app_config: AppConfig, tool_name: str = "web_search") -> FirecrawlApp:
+    tool_config = app_config.get_tool_config(tool_name)
     api_key = None
-    if config is not None and "api_key" in config.model_extra:
-        api_key = config.model_extra.get("api_key")
+    if tool_config is not None and "api_key" in tool_config.model_extra:
+        api_key = tool_config.model_extra.get("api_key")
     return FirecrawlApp(api_key=api_key)  # type: ignore[arg-type]
 
 
 @tool("web_search", parse_docstring=True)
-def web_search_tool(query: str) -> str:
+def web_search_tool(query: str, runtime: ToolRuntime) -> str:
     """Search the web.
 
     Args:
         query: The query to search for.
     """
     try:
-        config = get_app_config().get_tool_config("web_search")
+        app_config = resolve_context(runtime).app_config
+        tool_config = app_config.get_tool_config("web_search")
         max_results = 5
-        if config is not None:
-            max_results = config.model_extra.get("max_results", max_results)
+        if tool_config is not None:
+            max_results = tool_config.model_extra.get("max_results", max_results)
 
-        client = _get_firecrawl_client("web_search")
+        client = _get_firecrawl_client(app_config, "web_search")
         result = client.search(query, limit=max_results)
 
         # result.web contains list of SearchResultWeb objects
@@ -47,7 +49,7 @@ def web_search_tool(query: str) -> str:
 
 
 @tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str) -> str:
+def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
     """Fetch the contents of a web page at a given URL.
     Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
     This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -58,7 +60,8 @@ def web_fetch_tool(url: str) -> str:
         url: The URL to fetch the contents of.
     """
     try:
-        client = _get_firecrawl_client("web_fetch")
+        app_config = resolve_context(runtime).app_config
+        client = _get_firecrawl_client(app_config, "web_fetch")
         result = client.scrape(url, formats=["markdown"])
 
         markdown_content = result.markdown or ""
diff --git a/backend/packages/harness/deerflow/community/image_search/tools.py b/backend/packages/harness/deerflow/community/image_search/tools.py
index dc78a5ad3..3daa801c1 100644
--- a/backend/packages/harness/deerflow/community/image_search/tools.py
+++ b/backend/packages/harness/deerflow/community/image_search/tools.py
@@ -5,9 +5,9 @@ Image Search Tool - Search images using DuckDuckGo for reference in image genera
 import json
 import logging
 
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 
-from deerflow.config import get_app_config
+from deerflow.config.deer_flow_context import resolve_context
 
 logger = logging.getLogger(__name__)
 
@@ -77,6 +77,7 @@ def _search_images(
 @tool("image_search", parse_docstring=True)
 def image_search_tool(
     query: str,
+    runtime: ToolRuntime,
     max_results: int = 5,
     size: str | None = None,
     type_image: str | None = None,
@@ -99,11 +100,11 @@ def image_search_tool(
         type_image: Image type filter. Options: "photo", "clipart", "gif", "transparent", "line". Use "photo" for realistic references.
         layout: Layout filter. Options: "Square", "Tall", "Wide". Choose based on your generation needs.
     """
-    config = get_app_config().get_tool_config("image_search")
+    tool_config = resolve_context(runtime).app_config.get_tool_config("image_search")
 
     # Override max_results from config if set
-    if config is not None and "max_results" in config.model_extra:
-        max_results = config.model_extra.get("max_results", max_results)
+    if tool_config is not None and "max_results" in tool_config.model_extra:
+        max_results = tool_config.model_extra.get("max_results", max_results)
 
     results = _search_images(
         query=query,
diff --git a/backend/packages/harness/deerflow/community/infoquest/tools.py b/backend/packages/harness/deerflow/community/infoquest/tools.py
index 49fa1de52..9eabedce2 100644
--- a/backend/packages/harness/deerflow/community/infoquest/tools.py
+++ b/backend/packages/harness/deerflow/community/infoquest/tools.py
@@ -1,6 +1,7 @@
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import resolve_context
 from deerflow.utils.readability import ReadabilityExtractor
 
 from .infoquest_client import InfoQuestClient
@@ -8,13 +9,13 @@ from .infoquest_client import InfoQuestClient
 readability_extractor = ReadabilityExtractor()
 
 
-def _get_infoquest_client() -> InfoQuestClient:
-    search_config = get_app_config().get_tool_config("web_search")
+def _get_infoquest_client(app_config: AppConfig) -> InfoQuestClient:
+    search_config = app_config.get_tool_config("web_search")
     search_time_range = -1
     if search_config is not None and "search_time_range" in search_config.model_extra:
         search_time_range = search_config.model_extra.get("search_time_range")
 
-    fetch_config = get_app_config().get_tool_config("web_fetch")
+    fetch_config = app_config.get_tool_config("web_fetch")
     fetch_time = -1
     if fetch_config is not None and "fetch_time" in fetch_config.model_extra:
         fetch_time = fetch_config.model_extra.get("fetch_time")
@@ -25,7 +26,7 @@ def _get_infoquest_client() -> InfoQuestClient:
     if fetch_config is not None and "navigation_timeout" in fetch_config.model_extra:
         navigation_timeout = fetch_config.model_extra.get("navigation_timeout")
 
-    image_search_config = get_app_config().get_tool_config("image_search")
+    image_search_config = app_config.get_tool_config("image_search")
     image_search_time_range = -1
     if image_search_config is not None and "image_search_time_range" in image_search_config.model_extra:
         image_search_time_range = image_search_config.model_extra.get("image_search_time_range")
@@ -44,19 +45,18 @@ def _get_infoquest_client() -> InfoQuestClient:
 
 
 @tool("web_search", parse_docstring=True)
-def web_search_tool(query: str) -> str:
+def web_search_tool(query: str, runtime: ToolRuntime) -> str:
     """Search the web.
 
     Args:
         query: The query to search for.
     """
-
-    client = _get_infoquest_client()
+    client = _get_infoquest_client(resolve_context(runtime).app_config)
     return client.web_search(query)
 
 
 @tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str) -> str:
+def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
     """Fetch the contents of a web page at a given URL.
     Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
     This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -66,7 +66,7 @@ def web_fetch_tool(url: str) -> str:
     Args:
         url: The URL to fetch the contents of.
     """
-    client = _get_infoquest_client()
+    client = _get_infoquest_client(resolve_context(runtime).app_config)
     result = client.fetch(url)
     if result.startswith("Error: "):
         return result
@@ -75,7 +75,7 @@ def web_fetch_tool(url: str) -> str:
 
 
 @tool("image_search", parse_docstring=True)
-def image_search_tool(query: str) -> str:
+def image_search_tool(query: str, runtime: ToolRuntime) -> str:
     """Search for images online. Use this tool BEFORE image generation to find reference images for characters, portraits, objects, scenes, or any content requiring visual accuracy.
 
     **When to use:**
@@ -89,5 +89,5 @@ def image_search_tool(query: str) -> str:
     Args:
         query: The query to search for images.
     """
-    client = _get_infoquest_client()
+    client = _get_infoquest_client(resolve_context(runtime).app_config)
     return client.image_search(query)
diff --git a/backend/packages/harness/deerflow/community/jina_ai/tools.py b/backend/packages/harness/deerflow/community/jina_ai/tools.py
index 760e6a3b6..dda140bfb 100644
--- a/backend/packages/harness/deerflow/community/jina_ai/tools.py
+++ b/backend/packages/harness/deerflow/community/jina_ai/tools.py
@@ -1,16 +1,16 @@
 import asyncio
 
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 
 from deerflow.community.jina_ai.jina_client import JinaClient
-from deerflow.config import get_app_config
+from deerflow.config.deer_flow_context import resolve_context
 from deerflow.utils.readability import ReadabilityExtractor
 
 readability_extractor = ReadabilityExtractor()
 
 
 @tool("web_fetch", parse_docstring=True)
-async def web_fetch_tool(url: str) -> str:
+async def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
     """Fetch the contents of a web page at a given URL.
     Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
     This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -22,9 +22,9 @@ async def web_fetch_tool(url: str) -> str:
     """
     jina_client = JinaClient()
     timeout = 10
-    config = get_app_config().get_tool_config("web_fetch")
-    if config is not None and "timeout" in config.model_extra:
-        timeout = config.model_extra.get("timeout")
+    tool_config = resolve_context(runtime).app_config.get_tool_config("web_fetch")
+    if tool_config is not None and "timeout" in tool_config.model_extra:
+        timeout = tool_config.model_extra.get("timeout")
     html_content = await jina_client.crawl(url, return_format="html", timeout=timeout)
     if isinstance(html_content, str) and html_content.startswith("Error:"):
         return html_content
diff --git a/backend/packages/harness/deerflow/community/tavily/tools.py b/backend/packages/harness/deerflow/community/tavily/tools.py
index de7996c7a..65a29572a 100644
--- a/backend/packages/harness/deerflow/community/tavily/tools.py
+++ b/backend/packages/harness/deerflow/community/tavily/tools.py
@@ -1,32 +1,34 @@
 import json
 
-from langchain.tools import tool
+from langchain.tools import ToolRuntime, tool
 from tavily import TavilyClient
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import resolve_context
 
 
-def _get_tavily_client() -> TavilyClient:
-    config = get_app_config().get_tool_config("web_search")
+def _get_tavily_client(app_config: AppConfig) -> TavilyClient:
+    tool_config = app_config.get_tool_config("web_search")
     api_key = None
-    if config is not None and "api_key" in config.model_extra:
-        api_key = config.model_extra.get("api_key")
+    if tool_config is not None and "api_key" in tool_config.model_extra:
+        api_key = tool_config.model_extra.get("api_key")
     return TavilyClient(api_key=api_key)
 
 
 @tool("web_search", parse_docstring=True)
-def web_search_tool(query: str) -> str:
+def web_search_tool(query: str, runtime: ToolRuntime) -> str:
     """Search the web.
 
     Args:
         query: The query to search for.
     """
-    config = get_app_config().get_tool_config("web_search")
+    app_config = resolve_context(runtime).app_config
+    tool_config = app_config.get_tool_config("web_search")
     max_results = 5
-    if config is not None and "max_results" in config.model_extra:
-        max_results = config.model_extra.get("max_results")
+    if tool_config is not None and "max_results" in tool_config.model_extra:
+        max_results = tool_config.model_extra.get("max_results")
 
-    client = _get_tavily_client()
+    client = _get_tavily_client(app_config)
     res = client.search(query, max_results=max_results)
     normalized_results = [
         {
@@ -41,7 +43,7 @@ def web_search_tool(query: str) -> str:
 
 
 @tool("web_fetch", parse_docstring=True)
-def web_fetch_tool(url: str) -> str:
+def web_fetch_tool(url: str, runtime: ToolRuntime) -> str:
     """Fetch the contents of a web page at a given URL.
     Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
     This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
@@ -51,7 +53,8 @@ def web_fetch_tool(url: str) -> str:
     Args:
         url: The URL to fetch the contents of.
     """
-    client = _get_tavily_client()
+    app_config = resolve_context(runtime).app_config
+    client = _get_tavily_client(app_config)
     res = client.extract([url])
     if "failed_results" in res and len(res["failed_results"]) > 0:
         return f"Error: {res['failed_results'][0]['error']}"
diff --git a/backend/packages/harness/deerflow/config/__init__.py b/backend/packages/harness/deerflow/config/__init__.py
index 2e1ee82f8..106a9e8fd 100644
--- a/backend/packages/harness/deerflow/config/__init__.py
+++ b/backend/packages/harness/deerflow/config/__init__.py
@@ -1,6 +1,6 @@
-from .app_config import get_app_config
-from .extensions_config import ExtensionsConfig, get_extensions_config
-from .memory_config import MemoryConfig, get_memory_config
+from .app_config import AppConfig
+from .extensions_config import ExtensionsConfig
+from .memory_config import MemoryConfig
 from .paths import Paths, get_paths
 from .skill_evolution_config import SkillEvolutionConfig
 from .skills_config import SkillsConfig
@@ -13,18 +13,16 @@ from .tracing_config import (
 )
 
 __all__ = [
-    "get_app_config",
-    "SkillEvolutionConfig",
-    "Paths",
-    "get_paths",
-    "SkillsConfig",
+    "AppConfig",
     "ExtensionsConfig",
-    "get_extensions_config",
     "MemoryConfig",
-    "get_memory_config",
-    "get_tracing_config",
-    "get_explicitly_enabled_tracing_providers",
+    "Paths",
+    "SkillEvolutionConfig",
+    "SkillsConfig",
     "get_enabled_tracing_providers",
+    "get_explicitly_enabled_tracing_providers",
+    "get_paths",
+    "get_tracing_config",
     "is_tracing_enabled",
     "validate_enabled_tracing_providers",
 ]
diff --git a/backend/packages/harness/deerflow/config/acp_config.py b/backend/packages/harness/deerflow/config/acp_config.py
index de4b1e89f..4d05327fc 100644
--- a/backend/packages/harness/deerflow/config/acp_config.py
+++ b/backend/packages/harness/deerflow/config/acp_config.py
@@ -1,16 +1,13 @@
 """ACP (Agent Client Protocol) agent configuration loaded from config.yaml."""
 
-import logging
-from collections.abc import Mapping
-
-from pydantic import BaseModel, Field
-
-logger = logging.getLogger(__name__)
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class ACPAgentConfig(BaseModel):
     """Configuration for a single ACP-compatible agent."""
 
+    model_config = ConfigDict(frozen=True)
+
     command: str = Field(description="Command to launch the ACP agent subprocess")
     args: list[str] = Field(default_factory=list, description="Additional command arguments")
     env: dict[str, str] = Field(default_factory=dict, description="Environment variables to inject into the agent subprocess. Values starting with $ are resolved from host environment variables.")
@@ -24,28 +21,3 @@ class ACPAgentConfig(BaseModel):
             "are denied — the agent must be configured to operate without requesting permissions."
         ),
     )
-
-
-_acp_agents: dict[str, ACPAgentConfig] = {}
-
-
-def get_acp_agents() -> dict[str, ACPAgentConfig]:
-    """Get the currently configured ACP agents.
-
-    Returns:
-        Mapping of agent name -> ACPAgentConfig.  Empty dict if no ACP agents are configured.
-    """
-    return _acp_agents
-
-
-def load_acp_config_from_dict(config_dict: Mapping[str, Mapping[str, object]] | None) -> None:
-    """Load ACP agent configuration from a dictionary (typically from config.yaml).
-
-    Args:
-        config_dict: Mapping of agent name -> config fields.
-    """
-    global _acp_agents
-    if config_dict is None:
-        config_dict = {}
-    _acp_agents = {name: ACPAgentConfig(**cfg) for name, cfg in config_dict.items()}
-    logger.info("ACP config loaded: %d agent(s): %s", len(_acp_agents), list(_acp_agents.keys()))
diff --git a/backend/packages/harness/deerflow/config/agents_api_config.py b/backend/packages/harness/deerflow/config/agents_api_config.py
index 84205259e..38ead8152 100644
--- a/backend/packages/harness/deerflow/config/agents_api_config.py
+++ b/backend/packages/harness/deerflow/config/agents_api_config.py
@@ -1,32 +1,14 @@
 """Configuration for the custom agents management API."""
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class AgentsApiConfig(BaseModel):
     """Configuration for custom-agent and user-profile management routes."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(
         default=False,
         description=("Whether to expose the custom-agent management API over HTTP. When disabled, the gateway rejects read/write access to custom agent SOUL.md, config, and USER.md prompt-management routes."),
     )
-
-
-_agents_api_config: AgentsApiConfig = AgentsApiConfig()
-
-
-def get_agents_api_config() -> AgentsApiConfig:
-    """Get the current agents API configuration."""
-    return _agents_api_config
-
-
-def set_agents_api_config(config: AgentsApiConfig) -> None:
-    """Set the agents API configuration."""
-    global _agents_api_config
-    _agents_api_config = config
-
-
-def load_agents_api_config_from_dict(config_dict: dict) -> None:
-    """Load agents API configuration from a dictionary."""
-    global _agents_api_config
-    _agents_api_config = AgentsApiConfig(**config_dict)
diff --git a/backend/packages/harness/deerflow/config/agents_config.py b/backend/packages/harness/deerflow/config/agents_config.py
index 0fc985115..107bdd5b6 100644
--- a/backend/packages/harness/deerflow/config/agents_config.py
+++ b/backend/packages/harness/deerflow/config/agents_config.py
@@ -5,7 +5,7 @@ import re
 from typing import Any
 
 import yaml
-from pydantic import BaseModel
+from pydantic import BaseModel, ConfigDict
 
 from deerflow.config.paths import get_paths
 
@@ -29,6 +29,8 @@ def validate_agent_name(name: str | None) -> str | None:
 class AgentConfig(BaseModel):
     """Configuration for a custom agent."""
 
+    model_config = ConfigDict(frozen=True)
+
     name: str
     description: str = ""
     model: str | None = None
diff --git a/backend/packages/harness/deerflow/config/app_config.py b/backend/packages/harness/deerflow/config/app_config.py
index 2aa81c9f0..84f1c2f9f 100644
--- a/backend/packages/harness/deerflow/config/app_config.py
+++ b/backend/packages/harness/deerflow/config/app_config.py
@@ -1,6 +1,7 @@
+from __future__ import annotations
+
 import logging
 import os
-from contextvars import ContextVar
 from pathlib import Path
 from typing import Any, Self
 
@@ -8,23 +9,25 @@ import yaml
 from dotenv import load_dotenv
 from pydantic import BaseModel, ConfigDict, Field
 
-from deerflow.config.acp_config import load_acp_config_from_dict
-from deerflow.config.agents_api_config import AgentsApiConfig, load_agents_api_config_from_dict
-from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict
+from deerflow.config.acp_config import ACPAgentConfig
+from deerflow.config.agents_api_config import AgentsApiConfig
+from deerflow.config.checkpointer_config import CheckpointerConfig
+from deerflow.config.database_config import DatabaseConfig
 from deerflow.config.extensions_config import ExtensionsConfig
-from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict
-from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict
+from deerflow.config.guardrails_config import GuardrailsConfig
+from deerflow.config.memory_config import MemoryConfig
 from deerflow.config.model_config import ModelConfig
+from deerflow.config.run_events_config import RunEventsConfig
 from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.config.skill_evolution_config import SkillEvolutionConfig
 from deerflow.config.skills_config import SkillsConfig
-from deerflow.config.stream_bridge_config import StreamBridgeConfig, load_stream_bridge_config_from_dict
-from deerflow.config.subagents_config import SubagentsAppConfig, load_subagents_config_from_dict
-from deerflow.config.summarization_config import SummarizationConfig, load_summarization_config_from_dict
-from deerflow.config.title_config import TitleConfig, load_title_config_from_dict
+from deerflow.config.stream_bridge_config import StreamBridgeConfig
+from deerflow.config.subagents_config import SubagentsAppConfig
+from deerflow.config.summarization_config import SummarizationConfig
+from deerflow.config.title_config import TitleConfig
 from deerflow.config.token_usage_config import TokenUsageConfig
 from deerflow.config.tool_config import ToolConfig, ToolGroupConfig
-from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
+from deerflow.config.tool_search_config import ToolSearchConfig
 
 load_dotenv()
 
@@ -65,9 +68,12 @@ class AppConfig(BaseModel):
     subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration")
     guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration")
     circuit_breaker: CircuitBreakerConfig = Field(default_factory=CircuitBreakerConfig, description="LLM circuit breaker configuration")
-    model_config = ConfigDict(extra="allow", frozen=False)
+    database: DatabaseConfig = Field(default_factory=DatabaseConfig, description="Unified database backend configuration")
+    run_events: RunEventsConfig = Field(default_factory=RunEventsConfig, description="Run event storage configuration")
+    model_config = ConfigDict(extra="allow", frozen=True)
     checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration")
     stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration")
+    acp_agents: dict[str, ACPAgentConfig] = Field(default_factory=dict, description="ACP agent configurations keyed by agent name")
 
     @classmethod
     def resolve_config_path(cls, config_path: str | None = None) -> Path:
@@ -115,49 +121,6 @@ class AppConfig(BaseModel):
 
         config_data = cls.resolve_env_variables(config_data)
 
-        # Load title config if present
-        if "title" in config_data:
-            load_title_config_from_dict(config_data["title"])
-
-        # Load summarization config if present
-        if "summarization" in config_data:
-            load_summarization_config_from_dict(config_data["summarization"])
-
-        # Load memory config if present
-        if "memory" in config_data:
-            load_memory_config_from_dict(config_data["memory"])
-
-        # Always refresh agents API config so removed config sections reset
-        # singleton-backed state to its default/disabled values on reload.
-        load_agents_api_config_from_dict(config_data.get("agents_api") or {})
-
-        # Load subagents config if present
-        if "subagents" in config_data:
-            load_subagents_config_from_dict(config_data["subagents"])
-
-        # Load tool_search config if present
-        if "tool_search" in config_data:
-            load_tool_search_config_from_dict(config_data["tool_search"])
-
-        # Load guardrails config if present
-        if "guardrails" in config_data:
-            load_guardrails_config_from_dict(config_data["guardrails"])
-
-        # Load circuit_breaker config if present
-        if "circuit_breaker" in config_data:
-            config_data["circuit_breaker"] = config_data["circuit_breaker"]
-
-        # Load checkpointer config if present
-        if "checkpointer" in config_data:
-            load_checkpointer_config_from_dict(config_data["checkpointer"])
-
-        # Load stream bridge config if present
-        if "stream_bridge" in config_data:
-            load_stream_bridge_config_from_dict(config_data["stream_bridge"])
-
-        # Always refresh ACP agent config so removed entries do not linger across reloads.
-        load_acp_config_from_dict(config_data.get("acp_agents", {}))
-
         # Load extensions config separately (it's in a different file)
         extensions_config = ExtensionsConfig.from_file()
         config_data["extensions"] = extensions_config.model_dump()
@@ -268,130 +231,8 @@ class AppConfig(BaseModel):
         """
         return next((group for group in self.tool_groups if group.name == name), None)
 
-
-_app_config: AppConfig | None = None
-_app_config_path: Path | None = None
-_app_config_mtime: float | None = None
-_app_config_is_custom = False
-_current_app_config: ContextVar[AppConfig | None] = ContextVar("deerflow_current_app_config", default=None)
-_current_app_config_stack: ContextVar[tuple[AppConfig | None, ...]] = ContextVar("deerflow_current_app_config_stack", default=())
-
-
-def _get_config_mtime(config_path: Path) -> float | None:
-    """Get the modification time of a config file if it exists."""
-    try:
-        return config_path.stat().st_mtime
-    except OSError:
-        return None
-
-
-def _load_and_cache_app_config(config_path: str | None = None) -> AppConfig:
-    """Load config from disk and refresh cache metadata."""
-    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
-
-    resolved_path = AppConfig.resolve_config_path(config_path)
-    _app_config = AppConfig.from_file(str(resolved_path))
-    _app_config_path = resolved_path
-    _app_config_mtime = _get_config_mtime(resolved_path)
-    _app_config_is_custom = False
-    return _app_config
-
-
-def get_app_config() -> AppConfig:
-    """Get the DeerFlow config instance.
-
-    Returns a cached singleton instance and automatically reloads it when the
-    underlying config file path or modification time changes. Use
-    `reload_app_config()` to force a reload, or `reset_app_config()` to clear
-    the cache.
-    """
-    global _app_config, _app_config_path, _app_config_mtime
-
-    runtime_override = _current_app_config.get()
-    if runtime_override is not None:
-        return runtime_override
-
-    if _app_config is not None and _app_config_is_custom:
-        return _app_config
-
-    resolved_path = AppConfig.resolve_config_path()
-    current_mtime = _get_config_mtime(resolved_path)
-
-    should_reload = _app_config is None or _app_config_path != resolved_path or _app_config_mtime != current_mtime
-    if should_reload:
-        if _app_config_path == resolved_path and _app_config_mtime is not None and current_mtime is not None and _app_config_mtime != current_mtime:
-            logger.info(
-                "Config file has been modified (mtime: %s -> %s), reloading AppConfig",
-                _app_config_mtime,
-                current_mtime,
-            )
-        _load_and_cache_app_config(str(resolved_path))
-    return _app_config
-
-
-def reload_app_config(config_path: str | None = None) -> AppConfig:
-    """Reload the config from file and update the cached instance.
-
-    This is useful when the config file has been modified and you want
-    to pick up the changes without restarting the application.
-
-    Args:
-        config_path: Optional path to config file. If not provided,
-                     uses the default resolution strategy.
-
-    Returns:
-        The newly loaded AppConfig instance.
-    """
-    return _load_and_cache_app_config(config_path)
-
-
-def reset_app_config() -> None:
-    """Reset the cached config instance.
-
-    This clears the singleton cache, causing the next call to
-    `get_app_config()` to reload from file. Useful for testing
-    or when switching between different configurations.
-    """
-    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
-    _app_config = None
-    _app_config_path = None
-    _app_config_mtime = None
-    _app_config_is_custom = False
-
-
-def set_app_config(config: AppConfig) -> None:
-    """Set a custom config instance.
-
-    This allows injecting a custom or mock config for testing purposes.
-
-    Args:
-        config: The AppConfig instance to use.
-    """
-    global _app_config, _app_config_path, _app_config_mtime, _app_config_is_custom
-    _app_config = config
-    _app_config_path = None
-    _app_config_mtime = None
-    _app_config_is_custom = True
-
-
-def peek_current_app_config() -> AppConfig | None:
-    """Return the runtime-scoped AppConfig override, if one is active."""
-    return _current_app_config.get()
-
-
-def push_current_app_config(config: AppConfig) -> None:
-    """Push a runtime-scoped AppConfig override for the current execution context."""
-    stack = _current_app_config_stack.get()
-    _current_app_config_stack.set(stack + (_current_app_config.get(),))
-    _current_app_config.set(config)
-
-
-def pop_current_app_config() -> None:
-    """Pop the latest runtime-scoped AppConfig override for the current execution context."""
-    stack = _current_app_config_stack.get()
-    if not stack:
-        _current_app_config.set(None)
-        return
-    previous = stack[-1]
-    _current_app_config_stack.set(stack[:-1])
-    _current_app_config.set(previous)
+    # AppConfig is a pure value object: construct with ``from_file()``, pass around.
+    # Composition roots that hold the resolved instance:
+    #   - Gateway:   ``app.state.config`` via ``Depends(get_config)``
+    #   - Client:    ``DeerFlowClient._app_config``
+    #   - Agent run: ``Runtime[DeerFlowContext].context.app_config``
diff --git a/backend/packages/harness/deerflow/config/checkpointer_config.py b/backend/packages/harness/deerflow/config/checkpointer_config.py
index 6947cefb7..1e81177e8 100644
--- a/backend/packages/harness/deerflow/config/checkpointer_config.py
+++ b/backend/packages/harness/deerflow/config/checkpointer_config.py
@@ -2,7 +2,7 @@
 
 from typing import Literal
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 CheckpointerType = Literal["memory", "sqlite", "postgres"]
 
@@ -10,6 +10,8 @@ CheckpointerType = Literal["memory", "sqlite", "postgres"]
 class CheckpointerConfig(BaseModel):
     """Configuration for LangGraph state persistence checkpointer."""
 
+    model_config = ConfigDict(frozen=True)
+
     type: CheckpointerType = Field(
         description="Checkpointer backend type. "
         "'memory' is in-process only (lost on restart). "
@@ -23,24 +25,3 @@ class CheckpointerConfig(BaseModel):
         "For sqlite, use a file path like '.deer-flow/checkpoints.db' or ':memory:' for in-memory. "
         "For postgres, use a DSN like 'postgresql://user:pass@localhost:5432/db'.",
     )
-
-
-# Global configuration instance — None means no checkpointer is configured.
-_checkpointer_config: CheckpointerConfig | None = None
-
-
-def get_checkpointer_config() -> CheckpointerConfig | None:
-    """Get the current checkpointer configuration, or None if not configured."""
-    return _checkpointer_config
-
-
-def set_checkpointer_config(config: CheckpointerConfig | None) -> None:
-    """Set the checkpointer configuration."""
-    global _checkpointer_config
-    _checkpointer_config = config
-
-
-def load_checkpointer_config_from_dict(config_dict: dict) -> None:
-    """Load checkpointer configuration from a dictionary."""
-    global _checkpointer_config
-    _checkpointer_config = CheckpointerConfig(**config_dict)
diff --git a/backend/packages/harness/deerflow/config/database_config.py b/backend/packages/harness/deerflow/config/database_config.py
new file mode 100644
index 000000000..95edc84e5
--- /dev/null
+++ b/backend/packages/harness/deerflow/config/database_config.py
@@ -0,0 +1,103 @@
+"""Unified database backend configuration.
+
+Controls BOTH the LangGraph checkpointer and the DeerFlow application
+persistence layer (runs, threads metadata, users, etc.). The user
+configures one backend; the system handles physical separation details.
+
+SQLite mode: checkpointer and app share a single .db file
+({sqlite_dir}/deerflow.db) with WAL journal mode enabled on every
+connection. WAL allows concurrent readers and a single writer without
+blocking, making a unified file safe for both workloads.  Writers
+that contend for the lock wait via the default 5-second sqlite3
+busy timeout rather than failing immediately.
+
+Postgres mode: both use the same database URL but maintain independent
+connection pools with different lifecycles.
+
+Memory mode: checkpointer uses MemorySaver, app uses in-memory stores.
+No database is initialized.
+
+Sensitive values (postgres_url) should use $VAR syntax in config.yaml
+to reference environment variables from .env:
+
+    database:
+      backend: postgres
+      postgres_url: $DATABASE_URL
+
+The $VAR resolution is handled by AppConfig.resolve_env_variables()
+before this config is instantiated -- DatabaseConfig itself does not
+need to do any environment variable processing.
+"""
+
+from __future__ import annotations
+
+import os
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class DatabaseConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+    backend: Literal["memory", "sqlite", "postgres"] = Field(
+        default="memory",
+        description=("Storage backend for both checkpointer and application data. 'memory' for development (no persistence across restarts), 'sqlite' for single-node deployment, 'postgres' for production multi-node deployment."),
+    )
+    sqlite_dir: str = Field(
+        default=".deer-flow/data",
+        description=("Directory for the SQLite database file. Both checkpointer and application data share {sqlite_dir}/deerflow.db."),
+    )
+    postgres_url: str = Field(
+        default="",
+        description=(
+            "PostgreSQL connection URL, shared by checkpointer and app. "
+            "Use $DATABASE_URL in config.yaml to reference .env. "
+            "Example: postgresql://user:pass@host:5432/deerflow "
+            "(the +asyncpg driver suffix is added automatically where needed)."
+        ),
+    )
+    echo_sql: bool = Field(
+        default=False,
+        description="Echo all SQL statements to log (debug only).",
+    )
+    pool_size: int = Field(
+        default=5,
+        description="Connection pool size for the app ORM engine (postgres only).",
+    )
+
+    # -- Derived helpers (not user-configured) --
+
+    @property
+    def _resolved_sqlite_dir(self) -> str:
+        """Resolve sqlite_dir to an absolute path (relative to CWD)."""
+        from pathlib import Path
+
+        return str(Path(self.sqlite_dir).resolve())
+
+    @property
+    def sqlite_path(self) -> str:
+        """Unified SQLite file path shared by checkpointer and app."""
+        return os.path.join(self._resolved_sqlite_dir, "deerflow.db")
+
+    # Backward-compatible aliases
+    @property
+    def checkpointer_sqlite_path(self) -> str:
+        """SQLite file path for the LangGraph checkpointer (alias for sqlite_path)."""
+        return self.sqlite_path
+
+    @property
+    def app_sqlite_path(self) -> str:
+        """SQLite file path for application ORM data (alias for sqlite_path)."""
+        return self.sqlite_path
+
+    @property
+    def app_sqlalchemy_url(self) -> str:
+        """SQLAlchemy async URL for the application ORM engine."""
+        if self.backend == "sqlite":
+            return f"sqlite+aiosqlite:///{self.sqlite_path}"
+        if self.backend == "postgres":
+            url = self.postgres_url
+            if url.startswith("postgresql://"):
+                url = url.replace("postgresql://", "postgresql+asyncpg://", 1)
+            return url
+        raise ValueError(f"No SQLAlchemy URL for backend={self.backend!r}")
diff --git a/backend/packages/harness/deerflow/config/deer_flow_context.py b/backend/packages/harness/deerflow/config/deer_flow_context.py
new file mode 100644
index 000000000..42e816c30
--- /dev/null
+++ b/backend/packages/harness/deerflow/config/deer_flow_context.py
@@ -0,0 +1,55 @@
+"""Per-invocation context for DeerFlow agent execution.
+
+Injected via LangGraph Runtime. Middleware and tools access this
+via Runtime[DeerFlowContext] parameters, through resolve_context().
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass(frozen=True)
+class DeerFlowContext:
+    """Typed, immutable, per-invocation context injected via LangGraph Runtime.
+
+    Fields are all known at run start and never change during execution.
+    Mutable runtime state (e.g. sandbox_id) flows through ThreadState, not here.
+    """
+
+    app_config: AppConfig
+    thread_id: str
+    agent_name: str | None = None
+
+
+def resolve_context(runtime: Any) -> DeerFlowContext:
+    """Return the typed DeerFlowContext that the runtime carries.
+
+    Gateway mode (``DeerFlowClient``, ``run_agent``) always attaches a typed
+    ``DeerFlowContext`` via ``agent.astream(context=...)``; the LangGraph
+    Server path uses ``langgraph.json`` registration where the top-level
+    ``make_lead_agent`` loads ``AppConfig`` from disk itself, so we still
+    arrive here with a typed context.
+
+    Only the dict/None shapes that legacy tests used to exercise would fall
+    through this function; we now reject them loudly instead of papering
+    over the missing context with an ambient ``AppConfig`` lookup.
+    """
+    ctx = getattr(runtime, "context", None)
+    if isinstance(ctx, DeerFlowContext):
+        return ctx
+
+    raise RuntimeError(
+        "resolve_context: runtime.context is not a DeerFlowContext "
+        "(got type %s). Every entry point must attach one at invoke time — "
+        "Gateway/Client via agent.astream(context=DeerFlowContext(...)), "
+        "LangGraph Server via the make_lead_agent boundary that loads "
+        "AppConfig.from_file()." % type(ctx).__name__
+    )
diff --git a/backend/packages/harness/deerflow/config/extensions_config.py b/backend/packages/harness/deerflow/config/extensions_config.py
index e7a48d166..4c31697c6 100644
--- a/backend/packages/harness/deerflow/config/extensions_config.py
+++ b/backend/packages/harness/deerflow/config/extensions_config.py
@@ -11,6 +11,8 @@ from pydantic import BaseModel, ConfigDict, Field
 class McpOAuthConfig(BaseModel):
     """OAuth configuration for an MCP server (HTTP/SSE transports)."""
 
+    model_config = ConfigDict(extra="allow", frozen=True)
+
     enabled: bool = Field(default=True, description="Whether OAuth token injection is enabled")
     token_url: str = Field(description="OAuth token endpoint URL")
     grant_type: Literal["client_credentials", "refresh_token"] = Field(
@@ -28,12 +30,13 @@ class McpOAuthConfig(BaseModel):
     default_token_type: str = Field(default="Bearer", description="Default token type when missing in token response")
     refresh_skew_seconds: int = Field(default=60, description="Refresh token this many seconds before expiry")
     extra_token_params: dict[str, str] = Field(default_factory=dict, description="Additional form params sent to token endpoint")
-    model_config = ConfigDict(extra="allow")
 
 
 class McpServerConfig(BaseModel):
     """Configuration for a single MCP server."""
 
+    model_config = ConfigDict(extra="allow", frozen=True)
+
     enabled: bool = Field(default=True, description="Whether this MCP server is enabled")
     type: str = Field(default="stdio", description="Transport type: 'stdio', 'sse', or 'http'")
     command: str | None = Field(default=None, description="Command to execute to start the MCP server (for stdio type)")
@@ -43,12 +46,13 @@ class McpServerConfig(BaseModel):
     headers: dict[str, str] = Field(default_factory=dict, description="HTTP headers to send (for sse or http type)")
     oauth: McpOAuthConfig | None = Field(default=None, description="OAuth configuration (for sse or http type)")
     description: str = Field(default="", description="Human-readable description of what this MCP server provides")
-    model_config = ConfigDict(extra="allow")
 
 
 class SkillStateConfig(BaseModel):
     """Configuration for a single skill's state."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(default=True, description="Whether this skill is enabled")
 
 
@@ -64,7 +68,7 @@ class ExtensionsConfig(BaseModel):
         default_factory=dict,
         description="Map of skill name to state configuration",
     )
-    model_config = ConfigDict(extra="allow", populate_by_name=True)
+    model_config = ConfigDict(extra="allow", frozen=True, populate_by_name=True)
 
     @classmethod
     def resolve_config_path(cls, config_path: str | None = None) -> Path | None:
@@ -195,62 +199,3 @@ class ExtensionsConfig(BaseModel):
             # Default to enable for public & custom skill
             return skill_category in ("public", "custom")
         return skill_config.enabled
-
-
-_extensions_config: ExtensionsConfig | None = None
-
-
-def get_extensions_config() -> ExtensionsConfig:
-    """Get the extensions config instance.
-
-    Returns a cached singleton instance. Use `reload_extensions_config()` to reload
-    from file, or `reset_extensions_config()` to clear the cache.
-
-    Returns:
-        The cached ExtensionsConfig instance.
-    """
-    global _extensions_config
-    if _extensions_config is None:
-        _extensions_config = ExtensionsConfig.from_file()
-    return _extensions_config
-
-
-def reload_extensions_config(config_path: str | None = None) -> ExtensionsConfig:
-    """Reload the extensions config from file and update the cached instance.
-
-    This is useful when the config file has been modified and you want
-    to pick up the changes without restarting the application.
-
-    Args:
-        config_path: Optional path to extensions config file. If not provided,
-                     uses the default resolution strategy.
-
-    Returns:
-        The newly loaded ExtensionsConfig instance.
-    """
-    global _extensions_config
-    _extensions_config = ExtensionsConfig.from_file(config_path)
-    return _extensions_config
-
-
-def reset_extensions_config() -> None:
-    """Reset the cached extensions config instance.
-
-    This clears the singleton cache, causing the next call to
-    `get_extensions_config()` to reload from file. Useful for testing
-    or when switching between different configurations.
-    """
-    global _extensions_config
-    _extensions_config = None
-
-
-def set_extensions_config(config: ExtensionsConfig) -> None:
-    """Set a custom extensions config instance.
-
-    This allows injecting a custom or mock config for testing purposes.
-
-    Args:
-        config: The ExtensionsConfig instance to use.
-    """
-    global _extensions_config
-    _extensions_config = config
diff --git a/backend/packages/harness/deerflow/config/guardrails_config.py b/backend/packages/harness/deerflow/config/guardrails_config.py
index fe7a0b889..b60e6d678 100644
--- a/backend/packages/harness/deerflow/config/guardrails_config.py
+++ b/backend/packages/harness/deerflow/config/guardrails_config.py
@@ -1,11 +1,13 @@
 """Configuration for pre-tool-call authorization."""
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class GuardrailProviderConfig(BaseModel):
     """Configuration for a guardrail provider."""
 
+    model_config = ConfigDict(frozen=True)
+
     use: str = Field(description="Class path (e.g. 'deerflow.guardrails.builtin:AllowlistProvider')")
     config: dict = Field(default_factory=dict, description="Provider-specific settings passed as kwargs")
 
@@ -18,31 +20,9 @@ class GuardrailsConfig(BaseModel):
     agent's passport reference, and returns an allow/deny decision.
     """
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(default=False, description="Enable guardrail middleware")
     fail_closed: bool = Field(default=True, description="Block tool calls if provider errors")
     passport: str | None = Field(default=None, description="OAP passport path or hosted agent ID")
     provider: GuardrailProviderConfig | None = Field(default=None, description="Guardrail provider configuration")
-
-
-_guardrails_config: GuardrailsConfig | None = None
-
-
-def get_guardrails_config() -> GuardrailsConfig:
-    """Get the guardrails config, returning defaults if not loaded."""
-    global _guardrails_config
-    if _guardrails_config is None:
-        _guardrails_config = GuardrailsConfig()
-    return _guardrails_config
-
-
-def load_guardrails_config_from_dict(data: dict) -> GuardrailsConfig:
-    """Load guardrails config from a dict (called during AppConfig loading)."""
-    global _guardrails_config
-    _guardrails_config = GuardrailsConfig.model_validate(data)
-    return _guardrails_config
-
-
-def reset_guardrails_config() -> None:
-    """Reset the cached config instance. Used in tests to prevent singleton leaks."""
-    global _guardrails_config
-    _guardrails_config = None
diff --git a/backend/packages/harness/deerflow/config/memory_config.py b/backend/packages/harness/deerflow/config/memory_config.py
index 8565aa216..c1209c5e1 100644
--- a/backend/packages/harness/deerflow/config/memory_config.py
+++ b/backend/packages/harness/deerflow/config/memory_config.py
@@ -1,11 +1,13 @@
 """Configuration for memory mechanism."""
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class MemoryConfig(BaseModel):
     """Configuration for global memory mechanism."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(
         default=True,
         description="Whether to enable memory mechanism",
@@ -14,8 +16,9 @@ class MemoryConfig(BaseModel):
         default="",
         description=(
             "Path to store memory data. "
-            "If empty, defaults to `{base_dir}/memory.json` (see Paths.memory_file). "
-            "Absolute paths are used as-is. "
+            "If empty, defaults to per-user memory at `{base_dir}/users/{user_id}/memory.json`. "
+            "Absolute paths are used as-is and opt out of per-user isolation "
+            "(all users share the same file). "
             "Relative paths are resolved against `Paths.base_dir` "
             "(not the backend working directory). "
             "Note: if you previously set this to `.deer-flow/memory.json`, "
@@ -59,24 +62,3 @@ class MemoryConfig(BaseModel):
         le=8000,
         description="Maximum tokens to use for memory injection",
     )
-
-
-# Global configuration instance
-_memory_config: MemoryConfig = MemoryConfig()
-
-
-def get_memory_config() -> MemoryConfig:
-    """Get the current memory configuration."""
-    return _memory_config
-
-
-def set_memory_config(config: MemoryConfig) -> None:
-    """Set the memory configuration."""
-    global _memory_config
-    _memory_config = config
-
-
-def load_memory_config_from_dict(config_dict: dict) -> None:
-    """Load memory configuration from a dictionary."""
-    global _memory_config
-    _memory_config = MemoryConfig(**config_dict)
diff --git a/backend/packages/harness/deerflow/config/model_config.py b/backend/packages/harness/deerflow/config/model_config.py
index e9a3e1c16..fde36222f 100644
--- a/backend/packages/harness/deerflow/config/model_config.py
+++ b/backend/packages/harness/deerflow/config/model_config.py
@@ -12,7 +12,7 @@ class ModelConfig(BaseModel):
         description="Class path of the model provider(e.g. langchain_openai.ChatOpenAI)",
     )
     model: str = Field(..., description="Model name")
-    model_config = ConfigDict(extra="allow")
+    model_config = ConfigDict(extra="allow", frozen=True)
     use_responses_api: bool | None = Field(
         default=None,
         description="Whether to route OpenAI ChatOpenAI calls through the /v1/responses API",
diff --git a/backend/packages/harness/deerflow/config/paths.py b/backend/packages/harness/deerflow/config/paths.py
index 2d5661e63..f1ce7eae1 100644
--- a/backend/packages/harness/deerflow/config/paths.py
+++ b/backend/packages/harness/deerflow/config/paths.py
@@ -7,6 +7,7 @@ from pathlib import Path, PureWindowsPath
 VIRTUAL_PATH_PREFIX = "/mnt/user-data"
 
 _SAFE_THREAD_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
+_SAFE_USER_ID_RE = re.compile(r"^[A-Za-z0-9_\-]+$")
 
 
 def _default_local_base_dir() -> Path:
@@ -22,6 +23,13 @@ def _validate_thread_id(thread_id: str) -> str:
     return thread_id
 
 
+def _validate_user_id(user_id: str) -> str:
+    """Validate a user ID before using it in filesystem paths."""
+    if not _SAFE_USER_ID_RE.match(user_id):
+        raise ValueError(f"Invalid user_id {user_id!r}: only alphanumeric characters, hyphens, and underscores are allowed.")
+    return user_id
+
+
 def _join_host_path(base: str, *parts: str) -> str:
     """Join host filesystem path segments while preserving native style.
 
@@ -134,44 +142,63 @@ class Paths:
         """Per-agent memory file: `{base_dir}/agents/{name}/memory.json`."""
         return self.agent_dir(name) / "memory.json"
 
-    def thread_dir(self, thread_id: str) -> Path:
+    def user_dir(self, user_id: str) -> Path:
+        """Directory for a specific user: `{base_dir}/users/{user_id}/`."""
+        return self.base_dir / "users" / _validate_user_id(user_id)
+
+    def user_memory_file(self, user_id: str) -> Path:
+        """Per-user memory file: `{base_dir}/users/{user_id}/memory.json`."""
+        return self.user_dir(user_id) / "memory.json"
+
+    def user_agent_memory_file(self, user_id: str, agent_name: str) -> Path:
+        """Per-user per-agent memory: `{base_dir}/users/{user_id}/agents/{name}/memory.json`."""
+        return self.user_dir(user_id) / "agents" / agent_name.lower() / "memory.json"
+
+    def thread_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
         """
-        Host path for a thread's data: `{base_dir}/threads/{thread_id}/`
+        Host path for a thread's data.
+
+        When *user_id* is provided:
+            `{base_dir}/users/{user_id}/threads/{thread_id}/`
+        Otherwise (legacy layout):
+            `{base_dir}/threads/{thread_id}/`
 
         This directory contains a `user-data/` subdirectory that is mounted
         as `/mnt/user-data/` inside the sandbox.
 
         Raises:
-            ValueError: If `thread_id` contains unsafe characters (path separators
-                        or `..`) that could cause directory traversal.
+            ValueError: If `thread_id` or `user_id` contains unsafe characters (path
+                        separators or `..`) that could cause directory traversal.
         """
+        if user_id is not None:
+            return self.user_dir(user_id) / "threads" / _validate_thread_id(thread_id)
         return self.base_dir / "threads" / _validate_thread_id(thread_id)
 
-    def sandbox_work_dir(self, thread_id: str) -> Path:
+    def sandbox_work_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
         """
         Host path for the agent's workspace directory.
         Host: `{base_dir}/threads/{thread_id}/user-data/workspace/`
         Sandbox: `/mnt/user-data/workspace/`
         """
-        return self.thread_dir(thread_id) / "user-data" / "workspace"
+        return self.thread_dir(thread_id, user_id=user_id) / "user-data" / "workspace"
 
-    def sandbox_uploads_dir(self, thread_id: str) -> Path:
+    def sandbox_uploads_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
         """
         Host path for user-uploaded files.
         Host: `{base_dir}/threads/{thread_id}/user-data/uploads/`
         Sandbox: `/mnt/user-data/uploads/`
         """
-        return self.thread_dir(thread_id) / "user-data" / "uploads"
+        return self.thread_dir(thread_id, user_id=user_id) / "user-data" / "uploads"
 
-    def sandbox_outputs_dir(self, thread_id: str) -> Path:
+    def sandbox_outputs_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
         """
         Host path for agent-generated artifacts.
         Host: `{base_dir}/threads/{thread_id}/user-data/outputs/`
         Sandbox: `/mnt/user-data/outputs/`
         """
-        return self.thread_dir(thread_id) / "user-data" / "outputs"
+        return self.thread_dir(thread_id, user_id=user_id) / "user-data" / "outputs"
 
-    def acp_workspace_dir(self, thread_id: str) -> Path:
+    def acp_workspace_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
         """
         Host path for the ACP workspace of a specific thread.
         Host: `{base_dir}/threads/{thread_id}/acp-workspace/`
@@ -180,41 +207,43 @@ class Paths:
         Each thread gets its own isolated ACP workspace so that concurrent
         sessions cannot read each other's ACP agent outputs.
         """
-        return self.thread_dir(thread_id) / "acp-workspace"
+        return self.thread_dir(thread_id, user_id=user_id) / "acp-workspace"
 
-    def sandbox_user_data_dir(self, thread_id: str) -> Path:
+    def sandbox_user_data_dir(self, thread_id: str, *, user_id: str | None = None) -> Path:
         """
         Host path for the user-data root.
         Host: `{base_dir}/threads/{thread_id}/user-data/`
         Sandbox: `/mnt/user-data/`
         """
-        return self.thread_dir(thread_id) / "user-data"
+        return self.thread_dir(thread_id, user_id=user_id) / "user-data"
 
-    def host_thread_dir(self, thread_id: str) -> str:
+    def host_thread_dir(self, thread_id: str, *, user_id: str | None = None) -> str:
         """Host path for a thread directory, preserving Windows path syntax."""
+        if user_id is not None:
+            return _join_host_path(self._host_base_dir_str(), "users", _validate_user_id(user_id), "threads", _validate_thread_id(thread_id))
         return _join_host_path(self._host_base_dir_str(), "threads", _validate_thread_id(thread_id))
 
-    def host_sandbox_user_data_dir(self, thread_id: str) -> str:
+    def host_sandbox_user_data_dir(self, thread_id: str, *, user_id: str | None = None) -> str:
         """Host path for a thread's user-data root."""
-        return _join_host_path(self.host_thread_dir(thread_id), "user-data")
+        return _join_host_path(self.host_thread_dir(thread_id, user_id=user_id), "user-data")
 
-    def host_sandbox_work_dir(self, thread_id: str) -> str:
+    def host_sandbox_work_dir(self, thread_id: str, *, user_id: str | None = None) -> str:
         """Host path for the workspace mount source."""
-        return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "workspace")
+        return _join_host_path(self.host_sandbox_user_data_dir(thread_id, user_id=user_id), "workspace")
 
-    def host_sandbox_uploads_dir(self, thread_id: str) -> str:
+    def host_sandbox_uploads_dir(self, thread_id: str, *, user_id: str | None = None) -> str:
         """Host path for the uploads mount source."""
-        return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "uploads")
+        return _join_host_path(self.host_sandbox_user_data_dir(thread_id, user_id=user_id), "uploads")
 
-    def host_sandbox_outputs_dir(self, thread_id: str) -> str:
+    def host_sandbox_outputs_dir(self, thread_id: str, *, user_id: str | None = None) -> str:
         """Host path for the outputs mount source."""
-        return _join_host_path(self.host_sandbox_user_data_dir(thread_id), "outputs")
+        return _join_host_path(self.host_sandbox_user_data_dir(thread_id, user_id=user_id), "outputs")
 
-    def host_acp_workspace_dir(self, thread_id: str) -> str:
+    def host_acp_workspace_dir(self, thread_id: str, *, user_id: str | None = None) -> str:
         """Host path for the ACP workspace mount source."""
-        return _join_host_path(self.host_thread_dir(thread_id), "acp-workspace")
+        return _join_host_path(self.host_thread_dir(thread_id, user_id=user_id), "acp-workspace")
 
-    def ensure_thread_dirs(self, thread_id: str) -> None:
+    def ensure_thread_dirs(self, thread_id: str, *, user_id: str | None = None) -> None:
         """Create all standard sandbox directories for a thread.
 
         Directories are created with mode 0o777 so that sandbox containers
@@ -228,24 +257,24 @@ class Paths:
         ACP agent invocation.
         """
         for d in [
-            self.sandbox_work_dir(thread_id),
-            self.sandbox_uploads_dir(thread_id),
-            self.sandbox_outputs_dir(thread_id),
-            self.acp_workspace_dir(thread_id),
+            self.sandbox_work_dir(thread_id, user_id=user_id),
+            self.sandbox_uploads_dir(thread_id, user_id=user_id),
+            self.sandbox_outputs_dir(thread_id, user_id=user_id),
+            self.acp_workspace_dir(thread_id, user_id=user_id),
         ]:
             d.mkdir(parents=True, exist_ok=True)
             d.chmod(0o777)
 
-    def delete_thread_dir(self, thread_id: str) -> None:
+    def delete_thread_dir(self, thread_id: str, *, user_id: str | None = None) -> None:
         """Delete all persisted data for a thread.
 
         The operation is idempotent: missing thread directories are ignored.
         """
-        thread_dir = self.thread_dir(thread_id)
+        thread_dir = self.thread_dir(thread_id, user_id=user_id)
         if thread_dir.exists():
             shutil.rmtree(thread_dir)
 
-    def resolve_virtual_path(self, thread_id: str, virtual_path: str) -> Path:
+    def resolve_virtual_path(self, thread_id: str, virtual_path: str, *, user_id: str | None = None) -> Path:
         """Resolve a sandbox virtual path to the actual host filesystem path.
 
         Args:
@@ -253,6 +282,7 @@ class Paths:
             virtual_path: Virtual path as seen inside the sandbox, e.g.
                           ``/mnt/user-data/outputs/report.pdf``.
                           Leading slashes are stripped before matching.
+            user_id: Optional user ID for user-scoped path resolution.
 
         Returns:
             The resolved absolute host filesystem path.
@@ -270,7 +300,7 @@ class Paths:
             raise ValueError(f"Path must start with /{prefix}")
 
         relative = stripped[len(prefix) :].lstrip("/")
-        base = self.sandbox_user_data_dir(thread_id).resolve()
+        base = self.sandbox_user_data_dir(thread_id, user_id=user_id).resolve()
         actual = (base / relative).resolve()
 
         try:
diff --git a/backend/packages/harness/deerflow/config/run_events_config.py b/backend/packages/harness/deerflow/config/run_events_config.py
new file mode 100644
index 000000000..056d0b535
--- /dev/null
+++ b/backend/packages/harness/deerflow/config/run_events_config.py
@@ -0,0 +1,34 @@
+"""Run event storage configuration.
+
+Controls where run events (messages + execution traces) are persisted.
+
+Backends:
+- memory: In-memory storage, data lost on restart. Suitable for
+  development and testing.
+- db: SQL database via SQLAlchemy ORM. Provides full query capability.
+  Suitable for production deployments.
+- jsonl: Append-only JSONL files. Lightweight alternative for
+  single-node deployments that need persistence without a database.
+"""
+
+from __future__ import annotations
+
+from typing import Literal
+
+from pydantic import BaseModel, ConfigDict, Field
+
+
+class RunEventsConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+    backend: Literal["memory", "db", "jsonl"] = Field(
+        default="memory",
+        description="Storage backend for run events. 'memory' for development (no persistence), 'db' for production (SQL queries), 'jsonl' for lightweight single-node persistence.",
+    )
+    max_trace_content: int = Field(
+        default=10240,
+        description="Maximum trace content size in bytes before truncation (db backend only).",
+    )
+    track_token_usage: bool = Field(
+        default=True,
+        description="Whether RunJournal should accumulate token counts to RunRow.",
+    )
diff --git a/backend/packages/harness/deerflow/config/sandbox_config.py b/backend/packages/harness/deerflow/config/sandbox_config.py
index d9aac4ab4..314101ce9 100644
--- a/backend/packages/harness/deerflow/config/sandbox_config.py
+++ b/backend/packages/harness/deerflow/config/sandbox_config.py
@@ -4,6 +4,8 @@ from pydantic import BaseModel, ConfigDict, Field
 class VolumeMountConfig(BaseModel):
     """Configuration for a volume mount."""
 
+    model_config = ConfigDict(frozen=True)
+
     host_path: str = Field(..., description="Path on the host machine")
     container_path: str = Field(..., description="Path inside the container")
     read_only: bool = Field(default=False, description="Whether the mount is read-only")
@@ -80,4 +82,4 @@ class SandboxConfig(BaseModel):
         description="Maximum characters to keep from ls tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.",
     )
 
-    model_config = ConfigDict(extra="allow")
+    model_config = ConfigDict(extra="allow", frozen=True)
diff --git a/backend/packages/harness/deerflow/config/skill_evolution_config.py b/backend/packages/harness/deerflow/config/skill_evolution_config.py
index 056117f6c..1170417b6 100644
--- a/backend/packages/harness/deerflow/config/skill_evolution_config.py
+++ b/backend/packages/harness/deerflow/config/skill_evolution_config.py
@@ -1,9 +1,11 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class SkillEvolutionConfig(BaseModel):
     """Configuration for agent-managed skill evolution."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(
         default=False,
         description="Whether the agent can create and modify skills under skills/custom.",
diff --git a/backend/packages/harness/deerflow/config/skills_config.py b/backend/packages/harness/deerflow/config/skills_config.py
index 31a6ca902..272d2897a 100644
--- a/backend/packages/harness/deerflow/config/skills_config.py
+++ b/backend/packages/harness/deerflow/config/skills_config.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 def _default_repo_root() -> Path:
@@ -11,6 +11,8 @@ def _default_repo_root() -> Path:
 class SkillsConfig(BaseModel):
     """Configuration for skills system"""
 
+    model_config = ConfigDict(frozen=True)
+
     path: str | None = Field(
         default=None,
         description="Path to skills directory. If not specified, defaults to ../skills relative to backend directory",
diff --git a/backend/packages/harness/deerflow/config/stream_bridge_config.py b/backend/packages/harness/deerflow/config/stream_bridge_config.py
index 895c4639c..9460f9eb4 100644
--- a/backend/packages/harness/deerflow/config/stream_bridge_config.py
+++ b/backend/packages/harness/deerflow/config/stream_bridge_config.py
@@ -2,7 +2,7 @@
 
 from typing import Literal
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 StreamBridgeType = Literal["memory", "redis"]
 
@@ -10,6 +10,8 @@ StreamBridgeType = Literal["memory", "redis"]
 class StreamBridgeConfig(BaseModel):
     """Configuration for the stream bridge that connects agent workers to SSE endpoints."""
 
+    model_config = ConfigDict(frozen=True)
+
     type: StreamBridgeType = Field(
         default="memory",
         description="Stream bridge backend type. 'memory' uses in-process asyncio.Queue (single-process only). 'redis' uses Redis Streams (planned for Phase 2, not yet implemented).",
@@ -22,25 +24,3 @@ class StreamBridgeConfig(BaseModel):
         default=256,
         description="Maximum number of events buffered per run in the memory bridge.",
     )
-
-
-# Global configuration instance — None means no stream bridge is configured
-# (falls back to memory with defaults).
-_stream_bridge_config: StreamBridgeConfig | None = None
-
-
-def get_stream_bridge_config() -> StreamBridgeConfig | None:
-    """Get the current stream bridge configuration, or None if not configured."""
-    return _stream_bridge_config
-
-
-def set_stream_bridge_config(config: StreamBridgeConfig | None) -> None:
-    """Set the stream bridge configuration."""
-    global _stream_bridge_config
-    _stream_bridge_config = config
-
-
-def load_stream_bridge_config_from_dict(config_dict: dict) -> None:
-    """Load stream bridge configuration from a dictionary."""
-    global _stream_bridge_config
-    _stream_bridge_config = StreamBridgeConfig(**config_dict)
diff --git a/backend/packages/harness/deerflow/config/subagents_config.py b/backend/packages/harness/deerflow/config/subagents_config.py
index e7219284d..025a20547 100644
--- a/backend/packages/harness/deerflow/config/subagents_config.py
+++ b/backend/packages/harness/deerflow/config/subagents_config.py
@@ -1,15 +1,13 @@
 """Configuration for the subagent system loaded from config.yaml."""
 
-import logging
-
-from pydantic import BaseModel, Field
-
-logger = logging.getLogger(__name__)
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class SubagentOverrideConfig(BaseModel):
     """Per-agent configuration overrides."""
 
+    model_config = ConfigDict(frozen=True)
+
     timeout_seconds: int | None = Field(
         default=None,
         ge=1,
@@ -71,6 +69,8 @@ class CustomSubagentConfig(BaseModel):
 class SubagentsAppConfig(BaseModel):
     """Configuration for the subagent system."""
 
+    model_config = ConfigDict(frozen=True)
+
     timeout_seconds: int = Field(
         default=900,
         ge=1,
@@ -140,48 +140,3 @@ class SubagentsAppConfig(BaseModel):
         if override is not None and override.skills is not None:
             return override.skills
         return None
-
-
-_subagents_config: SubagentsAppConfig = SubagentsAppConfig()
-
-
-def get_subagents_app_config() -> SubagentsAppConfig:
-    """Get the current subagents configuration."""
-    return _subagents_config
-
-
-def load_subagents_config_from_dict(config_dict: dict) -> None:
-    """Load subagents configuration from a dictionary."""
-    global _subagents_config
-    _subagents_config = SubagentsAppConfig(**config_dict)
-
-    overrides_summary = {}
-    for name, override in _subagents_config.agents.items():
-        parts = []
-        if override.timeout_seconds is not None:
-            parts.append(f"timeout={override.timeout_seconds}s")
-        if override.max_turns is not None:
-            parts.append(f"max_turns={override.max_turns}")
-        if override.model is not None:
-            parts.append(f"model={override.model}")
-        if override.skills is not None:
-            parts.append(f"skills={override.skills}")
-        if parts:
-            overrides_summary[name] = ", ".join(parts)
-
-    custom_agents_names = list(_subagents_config.custom_agents.keys())
-
-    if overrides_summary or custom_agents_names:
-        logger.info(
-            "Subagents config loaded: default timeout=%ss, default max_turns=%s, per-agent overrides=%s, custom_agents=%s",
-            _subagents_config.timeout_seconds,
-            _subagents_config.max_turns,
-            overrides_summary or "none",
-            custom_agents_names or "none",
-        )
-    else:
-        logger.info(
-            "Subagents config loaded: default timeout=%ss, default max_turns=%s, no per-agent overrides",
-            _subagents_config.timeout_seconds,
-            _subagents_config.max_turns,
-        )
diff --git a/backend/packages/harness/deerflow/config/summarization_config.py b/backend/packages/harness/deerflow/config/summarization_config.py
index fab268ec5..d3705e867 100644
--- a/backend/packages/harness/deerflow/config/summarization_config.py
+++ b/backend/packages/harness/deerflow/config/summarization_config.py
@@ -2,7 +2,7 @@
 
 from typing import Literal
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 ContextSizeType = Literal["fraction", "tokens", "messages"]
 
@@ -10,6 +10,8 @@ ContextSizeType = Literal["fraction", "tokens", "messages"]
 class ContextSize(BaseModel):
     """Context size specification for trigger or keep parameters."""
 
+    model_config = ConfigDict(frozen=True)
+
     type: ContextSizeType = Field(description="Type of context size specification")
     value: int | float = Field(description="Value for the context size specification")
 
@@ -21,6 +23,8 @@ class ContextSize(BaseModel):
 class SummarizationConfig(BaseModel):
     """Configuration for automatic conversation summarization."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(
         default=False,
         description="Whether to enable automatic conversation summarization",
@@ -70,24 +74,3 @@ class SummarizationConfig(BaseModel):
         default_factory=lambda: ["read_file", "read", "view", "cat"],
         description="Tool names treated as skill file reads when preserving recently-loaded skills across summarization.",
     )
-
-
-# Global configuration instance
-_summarization_config: SummarizationConfig = SummarizationConfig()
-
-
-def get_summarization_config() -> SummarizationConfig:
-    """Get the current summarization configuration."""
-    return _summarization_config
-
-
-def set_summarization_config(config: SummarizationConfig) -> None:
-    """Set the summarization configuration."""
-    global _summarization_config
-    _summarization_config = config
-
-
-def load_summarization_config_from_dict(config_dict: dict) -> None:
-    """Load summarization configuration from a dictionary."""
-    global _summarization_config
-    _summarization_config = SummarizationConfig(**config_dict)
diff --git a/backend/packages/harness/deerflow/config/title_config.py b/backend/packages/harness/deerflow/config/title_config.py
index f335b4952..508bb5c2a 100644
--- a/backend/packages/harness/deerflow/config/title_config.py
+++ b/backend/packages/harness/deerflow/config/title_config.py
@@ -1,11 +1,13 @@
 """Configuration for automatic thread title generation."""
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class TitleConfig(BaseModel):
     """Configuration for automatic thread title generation."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(
         default=True,
         description="Whether to enable automatic title generation",
@@ -30,24 +32,3 @@ class TitleConfig(BaseModel):
         default=("Generate a concise title (max {max_words} words) for this conversation.\nUser: {user_msg}\nAssistant: {assistant_msg}\n\nReturn ONLY the title, no quotes, no explanation."),
         description="Prompt template for title generation",
     )
-
-
-# Global configuration instance
-_title_config: TitleConfig = TitleConfig()
-
-
-def get_title_config() -> TitleConfig:
-    """Get the current title configuration."""
-    return _title_config
-
-
-def set_title_config(config: TitleConfig) -> None:
-    """Set the title configuration."""
-    global _title_config
-    _title_config = config
-
-
-def load_title_config_from_dict(config_dict: dict) -> None:
-    """Load title configuration from a dictionary."""
-    global _title_config
-    _title_config = TitleConfig(**config_dict)
diff --git a/backend/packages/harness/deerflow/config/token_usage_config.py b/backend/packages/harness/deerflow/config/token_usage_config.py
index ab1e26294..5818cc44b 100644
--- a/backend/packages/harness/deerflow/config/token_usage_config.py
+++ b/backend/packages/harness/deerflow/config/token_usage_config.py
@@ -1,7 +1,9 @@
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class TokenUsageConfig(BaseModel):
     """Configuration for token usage tracking."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(default=False, description="Enable token usage tracking middleware")
diff --git a/backend/packages/harness/deerflow/config/tool_config.py b/backend/packages/harness/deerflow/config/tool_config.py
index e9c0673d8..10ec85893 100644
--- a/backend/packages/harness/deerflow/config/tool_config.py
+++ b/backend/packages/harness/deerflow/config/tool_config.py
@@ -5,7 +5,7 @@ class ToolGroupConfig(BaseModel):
     """Config section for a tool group"""
 
     name: str = Field(..., description="Unique name for the tool group")
-    model_config = ConfigDict(extra="allow")
+    model_config = ConfigDict(extra="allow", frozen=True)
 
 
 class ToolConfig(BaseModel):
@@ -17,4 +17,4 @@ class ToolConfig(BaseModel):
         ...,
         description="Variable name of the tool provider(e.g. deerflow.sandbox.tools:bash_tool)",
     )
-    model_config = ConfigDict(extra="allow")
+    model_config = ConfigDict(extra="allow", frozen=True)
diff --git a/backend/packages/harness/deerflow/config/tool_search_config.py b/backend/packages/harness/deerflow/config/tool_search_config.py
index cdeddabf2..7ea11d9b4 100644
--- a/backend/packages/harness/deerflow/config/tool_search_config.py
+++ b/backend/packages/harness/deerflow/config/tool_search_config.py
@@ -1,6 +1,6 @@
 """Configuration for deferred tool loading via tool_search."""
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 
 class ToolSearchConfig(BaseModel):
@@ -11,25 +11,9 @@ class ToolSearchConfig(BaseModel):
     via the tool_search tool at runtime.
     """
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(
         default=False,
         description="Defer tools and enable tool_search",
     )
-
-
-_tool_search_config: ToolSearchConfig | None = None
-
-
-def get_tool_search_config() -> ToolSearchConfig:
-    """Get the tool search config, loading from AppConfig if needed."""
-    global _tool_search_config
-    if _tool_search_config is None:
-        _tool_search_config = ToolSearchConfig()
-    return _tool_search_config
-
-
-def load_tool_search_config_from_dict(data: dict) -> ToolSearchConfig:
-    """Load tool search config from a dict (called during AppConfig loading)."""
-    global _tool_search_config
-    _tool_search_config = ToolSearchConfig.model_validate(data)
-    return _tool_search_config
diff --git a/backend/packages/harness/deerflow/config/tracing_config.py b/backend/packages/harness/deerflow/config/tracing_config.py
index 1ef5ebeb4..a8d8fa06f 100644
--- a/backend/packages/harness/deerflow/config/tracing_config.py
+++ b/backend/packages/harness/deerflow/config/tracing_config.py
@@ -1,7 +1,7 @@
 import os
 import threading
 
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, ConfigDict, Field
 
 _config_lock = threading.Lock()
 
@@ -9,6 +9,8 @@ _config_lock = threading.Lock()
 class LangSmithTracingConfig(BaseModel):
     """Configuration for LangSmith tracing."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(...)
     api_key: str | None = Field(...)
     project: str = Field(...)
@@ -26,6 +28,8 @@ class LangSmithTracingConfig(BaseModel):
 class LangfuseTracingConfig(BaseModel):
     """Configuration for Langfuse tracing."""
 
+    model_config = ConfigDict(frozen=True)
+
     enabled: bool = Field(...)
     public_key: str | None = Field(...)
     secret_key: str | None = Field(...)
@@ -50,6 +54,8 @@ class LangfuseTracingConfig(BaseModel):
 class TracingConfig(BaseModel):
     """Tracing configuration for supported providers."""
 
+    model_config = ConfigDict(frozen=True)
+
     langsmith: LangSmithTracingConfig = Field(...)
     langfuse: LangfuseTracingConfig = Field(...)
 
diff --git a/backend/packages/harness/deerflow/models/factory.py b/backend/packages/harness/deerflow/models/factory.py
index aec9b291a..af23b1fa5 100644
--- a/backend/packages/harness/deerflow/models/factory.py
+++ b/backend/packages/harness/deerflow/models/factory.py
@@ -2,7 +2,7 @@ import logging
 
 from langchain.chat_models import BaseChatModel
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.reflection import resolve_class
 from deerflow.tracing import build_tracing_callbacks
 
@@ -46,16 +46,23 @@ def _enable_stream_usage_by_default(model_use_path: str, model_settings_from_con
         model_settings_from_config["stream_usage"] = True
 
 
-def create_chat_model(name: str | None = None, thinking_enabled: bool = False, **kwargs) -> BaseChatModel:
+def create_chat_model(
+    name: str | None = None,
+    thinking_enabled: bool = False,
+    *,
+    app_config: "AppConfig",
+    **kwargs,
+) -> BaseChatModel:
     """Create a chat model instance from the config.
 
     Args:
         name: The name of the model to create. If None, the first model in the config will be used.
+        app_config: Application config — required.
 
     Returns:
         A chat model instance.
     """
-    config = get_app_config()
+    config = app_config
     if name is None:
         name = config.models[0].name
     model_config = config.get_model_config(name)
diff --git a/backend/packages/harness/deerflow/persistence/__init__.py b/backend/packages/harness/deerflow/persistence/__init__.py
new file mode 100644
index 000000000..dfd64be95
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/__init__.py
@@ -0,0 +1,13 @@
+"""DeerFlow application persistence layer (SQLAlchemy 2.0 async ORM).
+
+This module manages DeerFlow's own application data -- runs metadata,
+thread ownership, cron jobs, users. It is completely separate from
+LangGraph's checkpointer, which manages graph execution state.
+
+Usage:
+    from deerflow.persistence import init_engine, close_engine, get_session_factory
+"""
+
+from deerflow.persistence.engine import close_engine, get_engine, get_session_factory, init_engine
+
+__all__ = ["close_engine", "get_engine", "get_session_factory", "init_engine"]
diff --git a/backend/packages/harness/deerflow/persistence/base.py b/backend/packages/harness/deerflow/persistence/base.py
new file mode 100644
index 000000000..fd99d5f74
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/base.py
@@ -0,0 +1,40 @@
+"""SQLAlchemy declarative base with automatic to_dict support.
+
+All DeerFlow ORM models inherit from this Base. It provides a generic
+to_dict() method via SQLAlchemy's inspect() so individual models don't
+need to write their own serialization logic.
+
+LangGraph's checkpointer tables are NOT managed by this Base.
+"""
+
+from __future__ import annotations
+
+from sqlalchemy import inspect as sa_inspect
+from sqlalchemy.orm import DeclarativeBase
+
+
+class Base(DeclarativeBase):
+    """Base class for all DeerFlow ORM models.
+
+    Provides:
+    - Automatic to_dict() via SQLAlchemy column inspection.
+    - Standard __repr__() showing all column values.
+    """
+
+    def to_dict(self, *, exclude: set[str] | None = None) -> dict:
+        """Convert ORM instance to plain dict.
+
+        Uses SQLAlchemy's inspect() to iterate mapped column attributes.
+
+        Args:
+            exclude: Optional set of column keys to omit.
+
+        Returns:
+            Dict of {column_key: value} for all mapped columns.
+        """
+        exclude = exclude or set()
+        return {c.key: getattr(self, c.key) for c in sa_inspect(type(self)).mapper.column_attrs if c.key not in exclude}
+
+    def __repr__(self) -> str:
+        cols = ", ".join(f"{c.key}={getattr(self, c.key)!r}" for c in sa_inspect(type(self)).mapper.column_attrs)
+        return f"{type(self).__name__}({cols})"
diff --git a/backend/packages/harness/deerflow/persistence/engine.py b/backend/packages/harness/deerflow/persistence/engine.py
new file mode 100644
index 000000000..2777c2450
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/engine.py
@@ -0,0 +1,190 @@
+"""Async SQLAlchemy engine lifecycle management.
+
+Initializes at Gateway startup, provides session factory for
+repositories, disposes at shutdown.
+
+When database.backend="memory", init_engine is a no-op and
+get_session_factory() returns None. Repositories must check for
+None and fall back to in-memory implementations.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+
+from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker, create_async_engine
+
+
+def _json_serializer(obj: object) -> str:
+    """JSON serializer with ensure_ascii=False for Chinese character support."""
+    return json.dumps(obj, ensure_ascii=False)
+
+
+logger = logging.getLogger(__name__)
+
+_engine: AsyncEngine | None = None
+_session_factory: async_sessionmaker[AsyncSession] | None = None
+
+
+async def _auto_create_postgres_db(url: str) -> None:
+    """Connect to the ``postgres`` maintenance DB and CREATE DATABASE.
+
+    The target database name is extracted from *url*.  The connection is
+    made to the default ``postgres`` database on the same server using
+    ``AUTOCOMMIT`` isolation (CREATE DATABASE cannot run inside a
+    transaction).
+    """
+    from sqlalchemy import text
+    from sqlalchemy.engine.url import make_url
+
+    parsed = make_url(url)
+    db_name = parsed.database
+    if not db_name:
+        raise ValueError("Cannot auto-create database: no database name in URL")
+
+    # Connect to the default 'postgres' database to issue CREATE DATABASE
+    maint_url = parsed.set(database="postgres")
+    maint_engine = create_async_engine(maint_url, isolation_level="AUTOCOMMIT")
+    try:
+        async with maint_engine.connect() as conn:
+            await conn.execute(text(f'CREATE DATABASE "{db_name}"'))
+        logger.info("Auto-created PostgreSQL database: %s", db_name)
+    finally:
+        await maint_engine.dispose()
+
+
+async def init_engine(
+    backend: str,
+    *,
+    url: str = "",
+    echo: bool = False,
+    pool_size: int = 5,
+    sqlite_dir: str = "",
+) -> None:
+    """Create the async engine and session factory, then auto-create tables.
+
+    Args:
+        backend: "memory", "sqlite", or "postgres".
+        url: SQLAlchemy async URL (for sqlite/postgres).
+        echo: Echo SQL to log.
+        pool_size: Postgres connection pool size.
+        sqlite_dir: Directory to create for SQLite (ensured to exist).
+    """
+    global _engine, _session_factory
+
+    if backend == "memory":
+        logger.info("Persistence backend=memory -- ORM engine not initialized")
+        return
+
+    if backend == "postgres":
+        try:
+            import asyncpg  # noqa: F401
+        except ImportError:
+            raise ImportError("database.backend is set to 'postgres' but asyncpg is not installed.\nInstall it with:\n    uv sync --extra postgres\nOr switch to backend: sqlite in config.yaml for single-node deployment.") from None
+
+    if backend == "sqlite":
+        import os
+
+        from sqlalchemy import event
+
+        os.makedirs(sqlite_dir or ".", exist_ok=True)
+        _engine = create_async_engine(url, echo=echo, json_serializer=_json_serializer)
+
+        # Enable WAL on every new connection. SQLite PRAGMA settings are
+        # per-connection, so we wire the listener instead of running PRAGMA
+        # once at startup. WAL gives concurrent reads + writers without
+        # blocking and is the standard recommendation for any production
+        # SQLite deployment (TC-UPG-06 in AUTH_TEST_PLAN.md). The companion
+        # ``synchronous=NORMAL`` is the safe-and-fast pairing — fsync only
+        # at WAL checkpoint boundaries instead of every commit.
+        # Note: we do not set PRAGMA busy_timeout here — Python's sqlite3
+        # driver already defaults to a 5-second busy timeout (see the
+        # ``timeout`` kwarg of ``sqlite3.connect``), and aiosqlite /
+        # SQLAlchemy's aiosqlite dialect inherit that default.  Setting
+        # it again would be a no-op.
+        @event.listens_for(_engine.sync_engine, "connect")
+        def _enable_sqlite_wal(dbapi_conn, _record):  # noqa: ARG001 — SQLAlchemy contract
+            cursor = dbapi_conn.cursor()
+            try:
+                cursor.execute("PRAGMA journal_mode=WAL;")
+                cursor.execute("PRAGMA synchronous=NORMAL;")
+                cursor.execute("PRAGMA foreign_keys=ON;")
+            finally:
+                cursor.close()
+    elif backend == "postgres":
+        _engine = create_async_engine(
+            url,
+            echo=echo,
+            pool_size=pool_size,
+            pool_pre_ping=True,
+            json_serializer=_json_serializer,
+        )
+    else:
+        raise ValueError(f"Unknown persistence backend: {backend!r}")
+
+    _session_factory = async_sessionmaker(_engine, expire_on_commit=False)
+
+    # Auto-create tables (dev convenience). Production should use Alembic.
+    from deerflow.persistence.base import Base
+
+    # Import all models so Base.metadata discovers them.
+    # When no models exist yet (scaffolding phase), this is a no-op.
+    try:
+        import deerflow.persistence.models  # noqa: F401
+    except ImportError:
+        # Models package not yet available — tables won't be auto-created.
+        # This is expected during initial scaffolding or minimal installs.
+        logger.debug("deerflow.persistence.models not found; skipping auto-create tables")
+
+    try:
+        async with _engine.begin() as conn:
+            await conn.run_sync(Base.metadata.create_all)
+    except Exception as exc:
+        if backend == "postgres" and "does not exist" in str(exc):
+            # Database not yet created — attempt to auto-create it, then retry.
+            await _auto_create_postgres_db(url)
+            # Rebuild engine against the now-existing database
+            await _engine.dispose()
+            _engine = create_async_engine(url, echo=echo, pool_size=pool_size, pool_pre_ping=True, json_serializer=_json_serializer)
+            _session_factory = async_sessionmaker(_engine, expire_on_commit=False)
+            async with _engine.begin() as conn:
+                await conn.run_sync(Base.metadata.create_all)
+        else:
+            raise
+
+    logger.info("Persistence engine initialized: backend=%s", backend)
+
+
+async def init_engine_from_config(config) -> None:
+    """Convenience: init engine from a DatabaseConfig object."""
+    if config.backend == "memory":
+        await init_engine("memory")
+        return
+    await init_engine(
+        backend=config.backend,
+        url=config.app_sqlalchemy_url,
+        echo=config.echo_sql,
+        pool_size=config.pool_size,
+        sqlite_dir=config.sqlite_dir if config.backend == "sqlite" else "",
+    )
+
+
+def get_session_factory() -> async_sessionmaker[AsyncSession] | None:
+    """Return the async session factory, or None if backend=memory."""
+    return _session_factory
+
+
+def get_engine() -> AsyncEngine | None:
+    """Return the async engine, or None if not initialized."""
+    return _engine
+
+
+async def close_engine() -> None:
+    """Dispose the engine, release all connections."""
+    global _engine, _session_factory
+    if _engine is not None:
+        await _engine.dispose()
+        logger.info("Persistence engine closed")
+    _engine = None
+    _session_factory = None
diff --git a/backend/packages/harness/deerflow/persistence/feedback/__init__.py b/backend/packages/harness/deerflow/persistence/feedback/__init__.py
new file mode 100644
index 000000000..ee958b027
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/feedback/__init__.py
@@ -0,0 +1,6 @@
+"""Feedback persistence — ORM and SQL repository."""
+
+from deerflow.persistence.feedback.model import FeedbackRow
+from deerflow.persistence.feedback.sql import FeedbackRepository
+
+__all__ = ["FeedbackRepository", "FeedbackRow"]
diff --git a/backend/packages/harness/deerflow/persistence/feedback/model.py b/backend/packages/harness/deerflow/persistence/feedback/model.py
new file mode 100644
index 000000000..f06bc84e7
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/feedback/model.py
@@ -0,0 +1,34 @@
+"""ORM model for user feedback on runs."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from sqlalchemy import DateTime, String, Text, UniqueConstraint
+from sqlalchemy.orm import Mapped, mapped_column
+
+from deerflow.persistence.base import Base
+
+
+class FeedbackRow(Base):
+    __tablename__ = "feedback"
+
+    __table_args__ = (
+        UniqueConstraint("thread_id", "run_id", "user_id", name="uq_feedback_thread_run_user"),
+    )
+
+    feedback_id: Mapped[str] = mapped_column(String(64), primary_key=True)
+    run_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
+    thread_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
+    user_id: Mapped[str | None] = mapped_column(String(64), index=True)
+    message_id: Mapped[str | None] = mapped_column(String(64))
+    # message_id is an optional RunEventStore event identifier —
+    # allows feedback to target a specific message or the entire run
+
+    rating: Mapped[int] = mapped_column(nullable=False)
+    # +1 (thumbs-up) or -1 (thumbs-down)
+
+    comment: Mapped[str | None] = mapped_column(Text)
+    # Optional text feedback from the user
+
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
diff --git a/backend/packages/harness/deerflow/persistence/feedback/sql.py b/backend/packages/harness/deerflow/persistence/feedback/sql.py
new file mode 100644
index 000000000..1db74ce84
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/feedback/sql.py
@@ -0,0 +1,217 @@
+"""SQLAlchemy-backed feedback storage.
+
+Each method acquires its own short-lived session.
+"""
+
+from __future__ import annotations
+
+import uuid
+from datetime import UTC, datetime
+
+from sqlalchemy import case, func, select
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from deerflow.persistence.feedback.model import FeedbackRow
+from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
+
+
+class FeedbackRepository:
+    def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
+        self._sf = session_factory
+
+    @staticmethod
+    def _row_to_dict(row: FeedbackRow) -> dict:
+        d = row.to_dict()
+        val = d.get("created_at")
+        if isinstance(val, datetime):
+            d["created_at"] = val.isoformat()
+        return d
+
+    async def create(
+        self,
+        *,
+        run_id: str,
+        thread_id: str,
+        rating: int,
+        user_id: str | None | _AutoSentinel = AUTO,
+        message_id: str | None = None,
+        comment: str | None = None,
+    ) -> dict:
+        """Create a feedback record. rating must be +1 or -1."""
+        if rating not in (1, -1):
+            raise ValueError(f"rating must be +1 or -1, got {rating}")
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.create")
+        row = FeedbackRow(
+            feedback_id=str(uuid.uuid4()),
+            run_id=run_id,
+            thread_id=thread_id,
+            user_id=resolved_user_id,
+            message_id=message_id,
+            rating=rating,
+            comment=comment,
+            created_at=datetime.now(UTC),
+        )
+        async with self._sf() as session:
+            session.add(row)
+            await session.commit()
+            await session.refresh(row)
+            return self._row_to_dict(row)
+
+    async def get(
+        self,
+        feedback_id: str,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> dict | None:
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.get")
+        async with self._sf() as session:
+            row = await session.get(FeedbackRow, feedback_id)
+            if row is None:
+                return None
+            if resolved_user_id is not None and row.user_id != resolved_user_id:
+                return None
+            return self._row_to_dict(row)
+
+    async def list_by_run(
+        self,
+        thread_id: str,
+        run_id: str,
+        *,
+        limit: int = 100,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> list[dict]:
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.list_by_run")
+        stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id, FeedbackRow.run_id == run_id)
+        if resolved_user_id is not None:
+            stmt = stmt.where(FeedbackRow.user_id == resolved_user_id)
+        stmt = stmt.order_by(FeedbackRow.created_at.asc()).limit(limit)
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            return [self._row_to_dict(r) for r in result.scalars()]
+
+    async def list_by_thread(
+        self,
+        thread_id: str,
+        *,
+        limit: int = 100,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> list[dict]:
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.list_by_thread")
+        stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id)
+        if resolved_user_id is not None:
+            stmt = stmt.where(FeedbackRow.user_id == resolved_user_id)
+        stmt = stmt.order_by(FeedbackRow.created_at.asc()).limit(limit)
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            return [self._row_to_dict(r) for r in result.scalars()]
+
+    async def delete(
+        self,
+        feedback_id: str,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> bool:
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.delete")
+        async with self._sf() as session:
+            row = await session.get(FeedbackRow, feedback_id)
+            if row is None:
+                return False
+            if resolved_user_id is not None and row.user_id != resolved_user_id:
+                return False
+            await session.delete(row)
+            await session.commit()
+            return True
+
+    async def upsert(
+        self,
+        *,
+        run_id: str,
+        thread_id: str,
+        rating: int,
+        user_id: str | None | _AutoSentinel = AUTO,
+        comment: str | None = None,
+    ) -> dict:
+        """Create or update feedback for (thread_id, run_id, user_id). rating must be +1 or -1."""
+        if rating not in (1, -1):
+            raise ValueError(f"rating must be +1 or -1, got {rating}")
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.upsert")
+        async with self._sf() as session:
+            stmt = select(FeedbackRow).where(
+                FeedbackRow.thread_id == thread_id,
+                FeedbackRow.run_id == run_id,
+                FeedbackRow.user_id == resolved_user_id,
+            )
+            result = await session.execute(stmt)
+            row = result.scalar_one_or_none()
+            if row is not None:
+                row.rating = rating
+                row.comment = comment
+                row.created_at = datetime.now(UTC)
+            else:
+                row = FeedbackRow(
+                    feedback_id=str(uuid.uuid4()),
+                    run_id=run_id,
+                    thread_id=thread_id,
+                    user_id=resolved_user_id,
+                    rating=rating,
+                    comment=comment,
+                    created_at=datetime.now(UTC),
+                )
+                session.add(row)
+            await session.commit()
+            await session.refresh(row)
+            return self._row_to_dict(row)
+
+    async def delete_by_run(
+        self,
+        *,
+        thread_id: str,
+        run_id: str,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> bool:
+        """Delete the current user's feedback for a run. Returns True if a record was deleted."""
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.delete_by_run")
+        async with self._sf() as session:
+            stmt = select(FeedbackRow).where(
+                FeedbackRow.thread_id == thread_id,
+                FeedbackRow.run_id == run_id,
+                FeedbackRow.user_id == resolved_user_id,
+            )
+            result = await session.execute(stmt)
+            row = result.scalar_one_or_none()
+            if row is None:
+                return False
+            await session.delete(row)
+            await session.commit()
+            return True
+
+    async def list_by_thread_grouped(
+        self,
+        thread_id: str,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> dict[str, dict]:
+        """Return feedback grouped by run_id for a thread: {run_id: feedback_dict}."""
+        resolved_user_id = resolve_user_id(user_id, method_name="FeedbackRepository.list_by_thread_grouped")
+        stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id)
+        if resolved_user_id is not None:
+            stmt = stmt.where(FeedbackRow.user_id == resolved_user_id)
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            return {row.run_id: self._row_to_dict(row) for row in result.scalars()}
+
+    async def aggregate_by_run(self, thread_id: str, run_id: str) -> dict:
+        """Aggregate feedback stats for a run using database-side counting."""
+        stmt = select(
+            func.count().label("total"),
+            func.coalesce(func.sum(case((FeedbackRow.rating == 1, 1), else_=0)), 0).label("positive"),
+            func.coalesce(func.sum(case((FeedbackRow.rating == -1, 1), else_=0)), 0).label("negative"),
+        ).where(FeedbackRow.thread_id == thread_id, FeedbackRow.run_id == run_id)
+        async with self._sf() as session:
+            row = (await session.execute(stmt)).one()
+            return {
+                "run_id": run_id,
+                "total": row.total,
+                "positive": row.positive,
+                "negative": row.negative,
+            }
diff --git a/backend/packages/harness/deerflow/persistence/migrations/alembic.ini b/backend/packages/harness/deerflow/persistence/migrations/alembic.ini
new file mode 100644
index 000000000..71b4b1dc0
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/migrations/alembic.ini
@@ -0,0 +1,38 @@
+[alembic]
+script_location = %(here)s
+# Default URL for offline mode / autogenerate.
+# Runtime uses engine from DeerFlow config.
+sqlalchemy.url = sqlite+aiosqlite:///./data/deerflow.db
+
+[loggers]
+keys = root,sqlalchemy,alembic
+
+[handlers]
+keys = console
+
+[formatters]
+keys = generic
+
+[logger_root]
+level = WARN
+handlers = console
+
+[logger_sqlalchemy]
+level = WARN
+handlers =
+qualname = sqlalchemy.engine
+
+[logger_alembic]
+level = INFO
+handlers =
+qualname = alembic
+
+[handler_console]
+class = StreamHandler
+args = (sys.stderr,)
+level = NOTSET
+formatter = generic
+
+[formatter_generic]
+format = %(levelname)-5.5s [%(name)s] %(message)s
+datefmt = %H:%M:%S
diff --git a/backend/packages/harness/deerflow/persistence/migrations/env.py b/backend/packages/harness/deerflow/persistence/migrations/env.py
new file mode 100644
index 000000000..04c186fa0
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/migrations/env.py
@@ -0,0 +1,65 @@
+"""Alembic environment for DeerFlow application tables.
+
+ONLY manages DeerFlow's tables (runs, threads_meta, cron_jobs, users).
+LangGraph's checkpointer tables are managed by LangGraph itself -- they
+have their own schema lifecycle and must not be touched by Alembic.
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+from logging.config import fileConfig
+
+from alembic import context
+from sqlalchemy.ext.asyncio import create_async_engine
+
+from deerflow.persistence.base import Base
+
+# Import all models so metadata is populated.
+try:
+    import deerflow.persistence.models  # noqa: F401 — register ORM models with Base.metadata
+except ImportError:
+    # Models not available — migration will work with existing metadata only.
+    logging.getLogger(__name__).warning("Could not import deerflow.persistence.models; Alembic may not detect all tables")
+
+config = context.config
+if config.config_file_name is not None:
+    fileConfig(config.config_file_name)
+
+target_metadata = Base.metadata
+
+
+def run_migrations_offline() -> None:
+    url = config.get_main_option("sqlalchemy.url")
+    context.configure(
+        url=url,
+        target_metadata=target_metadata,
+        literal_binds=True,
+        render_as_batch=True,
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+def do_run_migrations(connection):
+    context.configure(
+        connection=connection,
+        target_metadata=target_metadata,
+        render_as_batch=True,  # Required for SQLite ALTER TABLE support
+    )
+    with context.begin_transaction():
+        context.run_migrations()
+
+
+async def run_migrations_online() -> None:
+    connectable = create_async_engine(config.get_main_option("sqlalchemy.url"))
+    async with connectable.connect() as connection:
+        await connection.run_sync(do_run_migrations)
+    await connectable.dispose()
+
+
+if context.is_offline_mode():
+    run_migrations_offline()
+else:
+    asyncio.run(run_migrations_online())
diff --git a/backend/packages/harness/deerflow/persistence/migrations/versions/.gitkeep b/backend/packages/harness/deerflow/persistence/migrations/versions/.gitkeep
new file mode 100644
index 000000000..e69de29bb
diff --git a/backend/packages/harness/deerflow/persistence/models/__init__.py b/backend/packages/harness/deerflow/persistence/models/__init__.py
new file mode 100644
index 000000000..ab29a3536
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/models/__init__.py
@@ -0,0 +1,23 @@
+"""ORM model registration entry point.
+
+Importing this module ensures all ORM models are registered with
+``Base.metadata`` so Alembic autogenerate detects every table.
+
+The actual ORM classes have moved to entity-specific subpackages:
+- ``deerflow.persistence.thread_meta``
+- ``deerflow.persistence.run``
+- ``deerflow.persistence.feedback``
+- ``deerflow.persistence.user``
+
+``RunEventRow`` remains in ``deerflow.persistence.models.run_event`` because
+its storage implementation lives in ``deerflow.runtime.events.store.db`` and
+there is no matching entity directory.
+"""
+
+from deerflow.persistence.feedback.model import FeedbackRow
+from deerflow.persistence.models.run_event import RunEventRow
+from deerflow.persistence.run.model import RunRow
+from deerflow.persistence.thread_meta.model import ThreadMetaRow
+from deerflow.persistence.user.model import UserRow
+
+__all__ = ["FeedbackRow", "RunEventRow", "RunRow", "ThreadMetaRow", "UserRow"]
diff --git a/backend/packages/harness/deerflow/persistence/models/run_event.py b/backend/packages/harness/deerflow/persistence/models/run_event.py
new file mode 100644
index 000000000..4f22b4616
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/models/run_event.py
@@ -0,0 +1,35 @@
+"""ORM model for run events."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from sqlalchemy import JSON, DateTime, Index, String, Text, UniqueConstraint
+from sqlalchemy.orm import Mapped, mapped_column
+
+from deerflow.persistence.base import Base
+
+
+class RunEventRow(Base):
+    __tablename__ = "run_events"
+
+    id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True)
+    thread_id: Mapped[str] = mapped_column(String(64), nullable=False)
+    run_id: Mapped[str] = mapped_column(String(64), nullable=False)
+    # Owner of the conversation this event belongs to. Nullable for data
+    # created before auth was introduced; populated by auth middleware on
+    # new writes and by the boot-time orphan migration on existing rows.
+    user_id: Mapped[str | None] = mapped_column(String(64), nullable=True, index=True)
+    event_type: Mapped[str] = mapped_column(String(32), nullable=False)
+    category: Mapped[str] = mapped_column(String(16), nullable=False)
+    # "message" | "trace" | "lifecycle"
+    content: Mapped[str] = mapped_column(Text, default="")
+    event_metadata: Mapped[dict] = mapped_column(JSON, default=dict)
+    seq: Mapped[int] = mapped_column(nullable=False)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
+
+    __table_args__ = (
+        UniqueConstraint("thread_id", "seq", name="uq_events_thread_seq"),
+        Index("ix_events_thread_cat_seq", "thread_id", "category", "seq"),
+        Index("ix_events_run", "thread_id", "run_id", "seq"),
+    )
diff --git a/backend/packages/harness/deerflow/persistence/run/__init__.py b/backend/packages/harness/deerflow/persistence/run/__init__.py
new file mode 100644
index 000000000..0aa01e7ea
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/run/__init__.py
@@ -0,0 +1,6 @@
+"""Run metadata persistence — ORM and SQL repository."""
+
+from deerflow.persistence.run.model import RunRow
+from deerflow.persistence.run.sql import RunRepository
+
+__all__ = ["RunRepository", "RunRow"]
diff --git a/backend/packages/harness/deerflow/persistence/run/model.py b/backend/packages/harness/deerflow/persistence/run/model.py
new file mode 100644
index 000000000..d0dfe4085
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/run/model.py
@@ -0,0 +1,49 @@
+"""ORM model for run metadata."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from sqlalchemy import JSON, DateTime, Index, String, Text
+from sqlalchemy.orm import Mapped, mapped_column
+
+from deerflow.persistence.base import Base
+
+
+class RunRow(Base):
+    __tablename__ = "runs"
+
+    run_id: Mapped[str] = mapped_column(String(64), primary_key=True)
+    thread_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
+    assistant_id: Mapped[str | None] = mapped_column(String(128))
+    user_id: Mapped[str | None] = mapped_column(String(64), index=True)
+    status: Mapped[str] = mapped_column(String(20), default="pending")
+    # "pending" | "running" | "success" | "error" | "timeout" | "interrupted"
+
+    model_name: Mapped[str | None] = mapped_column(String(128))
+    multitask_strategy: Mapped[str] = mapped_column(String(20), default="reject")
+    metadata_json: Mapped[dict] = mapped_column(JSON, default=dict)
+    kwargs_json: Mapped[dict] = mapped_column(JSON, default=dict)
+    error: Mapped[str | None] = mapped_column(Text)
+
+    # Convenience fields (for listing pages without querying RunEventStore)
+    message_count: Mapped[int] = mapped_column(default=0)
+    first_human_message: Mapped[str | None] = mapped_column(Text)
+    last_ai_message: Mapped[str | None] = mapped_column(Text)
+
+    # Token usage (accumulated in-memory by RunJournal, written on run completion)
+    total_input_tokens: Mapped[int] = mapped_column(default=0)
+    total_output_tokens: Mapped[int] = mapped_column(default=0)
+    total_tokens: Mapped[int] = mapped_column(default=0)
+    llm_call_count: Mapped[int] = mapped_column(default=0)
+    lead_agent_tokens: Mapped[int] = mapped_column(default=0)
+    subagent_tokens: Mapped[int] = mapped_column(default=0)
+    middleware_tokens: Mapped[int] = mapped_column(default=0)
+
+    # Follow-up association
+    follow_up_to_run_id: Mapped[str | None] = mapped_column(String(64))
+
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
+
+    __table_args__ = (Index("ix_runs_thread_status", "thread_id", "status"),)
diff --git a/backend/packages/harness/deerflow/persistence/run/sql.py b/backend/packages/harness/deerflow/persistence/run/sql.py
new file mode 100644
index 000000000..fcd1a3411
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/run/sql.py
@@ -0,0 +1,255 @@
+"""SQLAlchemy-backed RunStore implementation.
+
+Each method acquires and releases its own short-lived session.
+Run status updates happen from background workers that may live
+minutes -- we don't hold connections across long execution.
+"""
+
+from __future__ import annotations
+
+import json
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy import func, select, update
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from deerflow.persistence.run.model import RunRow
+from deerflow.runtime.runs.store.base import RunStore
+from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
+
+
+class RunRepository(RunStore):
+    def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
+        self._sf = session_factory
+
+    @staticmethod
+    def _safe_json(obj: Any) -> Any:
+        """Ensure obj is JSON-serializable. Falls back to model_dump() or str()."""
+        if obj is None:
+            return None
+        if isinstance(obj, (str, int, float, bool)):
+            return obj
+        if isinstance(obj, dict):
+            return {k: RunRepository._safe_json(v) for k, v in obj.items()}
+        if isinstance(obj, (list, tuple)):
+            return [RunRepository._safe_json(v) for v in obj]
+        if hasattr(obj, "model_dump"):
+            try:
+                return obj.model_dump()
+            except Exception:
+                pass
+        if hasattr(obj, "dict"):
+            try:
+                return obj.dict()
+            except Exception:
+                pass
+        try:
+            json.dumps(obj)
+            return obj
+        except (TypeError, ValueError):
+            return str(obj)
+
+    @staticmethod
+    def _row_to_dict(row: RunRow) -> dict[str, Any]:
+        d = row.to_dict()
+        # Remap JSON columns to match RunStore interface
+        d["metadata"] = d.pop("metadata_json", {})
+        d["kwargs"] = d.pop("kwargs_json", {})
+        # Convert datetime to ISO string for consistency with MemoryRunStore
+        for key in ("created_at", "updated_at"):
+            val = d.get(key)
+            if isinstance(val, datetime):
+                d[key] = val.isoformat()
+        return d
+
+    async def put(
+        self,
+        run_id,
+        *,
+        thread_id,
+        assistant_id=None,
+        user_id: str | None | _AutoSentinel = AUTO,
+        status="pending",
+        multitask_strategy="reject",
+        metadata=None,
+        kwargs=None,
+        error=None,
+        created_at=None,
+        follow_up_to_run_id=None,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.put")
+        now = datetime.now(UTC)
+        row = RunRow(
+            run_id=run_id,
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            user_id=resolved_user_id,
+            status=status,
+            multitask_strategy=multitask_strategy,
+            metadata_json=self._safe_json(metadata) or {},
+            kwargs_json=self._safe_json(kwargs) or {},
+            error=error,
+            follow_up_to_run_id=follow_up_to_run_id,
+            created_at=datetime.fromisoformat(created_at) if created_at else now,
+            updated_at=now,
+        )
+        async with self._sf() as session:
+            session.add(row)
+            await session.commit()
+
+    async def get(
+        self,
+        run_id,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.get")
+        async with self._sf() as session:
+            row = await session.get(RunRow, run_id)
+            if row is None:
+                return None
+            if resolved_user_id is not None and row.user_id != resolved_user_id:
+                return None
+            return self._row_to_dict(row)
+
+    async def list_by_thread(
+        self,
+        thread_id,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+        limit=100,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.list_by_thread")
+        stmt = select(RunRow).where(RunRow.thread_id == thread_id)
+        if resolved_user_id is not None:
+            stmt = stmt.where(RunRow.user_id == resolved_user_id)
+        stmt = stmt.order_by(RunRow.created_at.desc()).limit(limit)
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            return [self._row_to_dict(r) for r in result.scalars()]
+
+    async def update_status(self, run_id, status, *, error=None):
+        values: dict[str, Any] = {"status": status, "updated_at": datetime.now(UTC)}
+        if error is not None:
+            values["error"] = error
+        async with self._sf() as session:
+            await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values))
+            await session.commit()
+
+    async def delete(
+        self,
+        run_id,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="RunRepository.delete")
+        async with self._sf() as session:
+            row = await session.get(RunRow, run_id)
+            if row is None:
+                return
+            if resolved_user_id is not None and row.user_id != resolved_user_id:
+                return
+            await session.delete(row)
+            await session.commit()
+
+    async def list_pending(self, *, before=None):
+        if before is None:
+            before_dt = datetime.now(UTC)
+        elif isinstance(before, datetime):
+            before_dt = before
+        else:
+            before_dt = datetime.fromisoformat(before)
+        stmt = select(RunRow).where(RunRow.status == "pending", RunRow.created_at <= before_dt).order_by(RunRow.created_at.asc())
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            return [self._row_to_dict(r) for r in result.scalars()]
+
+    async def update_run_completion(
+        self,
+        run_id: str,
+        *,
+        status: str,
+        total_input_tokens: int = 0,
+        total_output_tokens: int = 0,
+        total_tokens: int = 0,
+        llm_call_count: int = 0,
+        lead_agent_tokens: int = 0,
+        subagent_tokens: int = 0,
+        middleware_tokens: int = 0,
+        message_count: int = 0,
+        last_ai_message: str | None = None,
+        first_human_message: str | None = None,
+        error: str | None = None,
+    ) -> None:
+        """Update status + token usage + convenience fields on run completion."""
+        values: dict[str, Any] = {
+            "status": status,
+            "total_input_tokens": total_input_tokens,
+            "total_output_tokens": total_output_tokens,
+            "total_tokens": total_tokens,
+            "llm_call_count": llm_call_count,
+            "lead_agent_tokens": lead_agent_tokens,
+            "subagent_tokens": subagent_tokens,
+            "middleware_tokens": middleware_tokens,
+            "message_count": message_count,
+            "updated_at": datetime.now(UTC),
+        }
+        if last_ai_message is not None:
+            values["last_ai_message"] = last_ai_message[:2000]
+        if first_human_message is not None:
+            values["first_human_message"] = first_human_message[:2000]
+        if error is not None:
+            values["error"] = error
+        async with self._sf() as session:
+            await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values))
+            await session.commit()
+
+    async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]:
+        """Aggregate token usage via a single SQL GROUP BY query."""
+        _completed = RunRow.status.in_(("success", "error"))
+        _thread = RunRow.thread_id == thread_id
+
+        stmt = (
+            select(
+                func.coalesce(RunRow.model_name, "unknown").label("model"),
+                func.count().label("runs"),
+                func.coalesce(func.sum(RunRow.total_tokens), 0).label("total_tokens"),
+                func.coalesce(func.sum(RunRow.total_input_tokens), 0).label("total_input_tokens"),
+                func.coalesce(func.sum(RunRow.total_output_tokens), 0).label("total_output_tokens"),
+                func.coalesce(func.sum(RunRow.lead_agent_tokens), 0).label("lead_agent"),
+                func.coalesce(func.sum(RunRow.subagent_tokens), 0).label("subagent"),
+                func.coalesce(func.sum(RunRow.middleware_tokens), 0).label("middleware"),
+            )
+            .where(_thread, _completed)
+            .group_by(func.coalesce(RunRow.model_name, "unknown"))
+        )
+
+        async with self._sf() as session:
+            rows = (await session.execute(stmt)).all()
+
+        total_tokens = total_input = total_output = total_runs = 0
+        lead_agent = subagent = middleware = 0
+        by_model: dict[str, dict] = {}
+        for r in rows:
+            by_model[r.model] = {"tokens": r.total_tokens, "runs": r.runs}
+            total_tokens += r.total_tokens
+            total_input += r.total_input_tokens
+            total_output += r.total_output_tokens
+            total_runs += r.runs
+            lead_agent += r.lead_agent
+            subagent += r.subagent
+            middleware += r.middleware
+
+        return {
+            "total_tokens": total_tokens,
+            "total_input_tokens": total_input,
+            "total_output_tokens": total_output,
+            "total_runs": total_runs,
+            "by_model": by_model,
+            "by_caller": {
+                "lead_agent": lead_agent,
+                "subagent": subagent,
+                "middleware": middleware,
+            },
+        }
diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/__init__.py b/backend/packages/harness/deerflow/persistence/thread_meta/__init__.py
new file mode 100644
index 000000000..080ce8093
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/thread_meta/__init__.py
@@ -0,0 +1,38 @@
+"""Thread metadata persistence — ORM, abstract store, and concrete implementations."""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from deerflow.persistence.thread_meta.base import ThreadMetaStore
+from deerflow.persistence.thread_meta.memory import MemoryThreadMetaStore
+from deerflow.persistence.thread_meta.model import ThreadMetaRow
+from deerflow.persistence.thread_meta.sql import ThreadMetaRepository
+
+if TYPE_CHECKING:
+    from langgraph.store.base import BaseStore
+    from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+__all__ = [
+    "MemoryThreadMetaStore",
+    "ThreadMetaRepository",
+    "ThreadMetaRow",
+    "ThreadMetaStore",
+    "make_thread_store",
+]
+
+
+def make_thread_store(
+    session_factory: async_sessionmaker[AsyncSession] | None,
+    store: BaseStore | None = None,
+) -> ThreadMetaStore:
+    """Create the appropriate ThreadMetaStore based on available backends.
+
+    Returns a SQL-backed repository when a session factory is available,
+    otherwise falls back to the in-memory LangGraph Store implementation.
+    """
+    if session_factory is not None:
+        return ThreadMetaRepository(session_factory)
+    if store is None:
+        raise ValueError("make_thread_store requires either a session_factory (SQL) or a store (memory)")
+    return MemoryThreadMetaStore(store)
diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/base.py b/backend/packages/harness/deerflow/persistence/thread_meta/base.py
new file mode 100644
index 000000000..c87c10a16
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/thread_meta/base.py
@@ -0,0 +1,76 @@
+"""Abstract interface for thread metadata storage.
+
+Implementations:
+- ThreadMetaRepository: SQL-backed (sqlite / postgres via SQLAlchemy)
+- MemoryThreadMetaStore: wraps LangGraph BaseStore (memory mode)
+
+All mutating and querying methods accept a ``user_id`` parameter with
+three-state semantics (see :mod:`deerflow.runtime.user_context`):
+
+- ``AUTO`` (default): resolve from the request-scoped contextvar.
+- Explicit ``str``: use the provided value verbatim.
+- Explicit ``None``: bypass owner filtering (migration/CLI only).
+"""
+
+from __future__ import annotations
+
+import abc
+
+from deerflow.runtime.user_context import AUTO, _AutoSentinel
+
+
+class ThreadMetaStore(abc.ABC):
+    @abc.abstractmethod
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str | None = None,
+        user_id: str | None | _AutoSentinel = AUTO,
+        display_name: str | None = None,
+        metadata: dict | None = None,
+    ) -> dict:
+        pass
+
+    @abc.abstractmethod
+    async def get(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> dict | None:
+        pass
+
+    @abc.abstractmethod
+    async def search(
+        self,
+        *,
+        metadata: dict | None = None,
+        status: str | None = None,
+        limit: int = 100,
+        offset: int = 0,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> list[dict]:
+        pass
+
+    @abc.abstractmethod
+    async def update_display_name(self, thread_id: str, display_name: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        pass
+
+    @abc.abstractmethod
+    async def update_status(self, thread_id: str, status: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        pass
+
+    @abc.abstractmethod
+    async def update_metadata(self, thread_id: str, metadata: dict, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        """Merge ``metadata`` into the thread's metadata field.
+
+        Existing keys are overwritten by the new values; keys absent from
+        ``metadata`` are preserved. No-op if the thread does not exist
+        or the owner check fails.
+        """
+        pass
+
+    @abc.abstractmethod
+    async def check_access(self, thread_id: str, user_id: str, *, require_existing: bool = False) -> bool:
+        """Check if ``user_id`` has access to ``thread_id``."""
+        pass
+
+    @abc.abstractmethod
+    async def delete(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        pass
diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/memory.py b/backend/packages/harness/deerflow/persistence/thread_meta/memory.py
new file mode 100644
index 000000000..ccf59ad42
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/thread_meta/memory.py
@@ -0,0 +1,149 @@
+"""In-memory ThreadMetaStore backed by LangGraph BaseStore.
+
+Used when database.backend=memory. Delegates to the LangGraph Store's
+``("threads",)`` namespace — the same namespace used by the Gateway
+router for thread records.
+"""
+
+from __future__ import annotations
+
+import time
+from typing import Any
+
+from langgraph.store.base import BaseStore
+
+from deerflow.persistence.thread_meta.base import ThreadMetaStore
+from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
+
+THREADS_NS: tuple[str, ...] = ("threads",)
+
+
+class MemoryThreadMetaStore(ThreadMetaStore):
+    def __init__(self, store: BaseStore) -> None:
+        self._store = store
+
+    async def _get_owned_record(
+        self,
+        thread_id: str,
+        user_id: str | None | _AutoSentinel,
+        method_name: str,
+    ) -> dict | None:
+        """Fetch a record and verify ownership. Returns a mutable copy, or None."""
+        resolved = resolve_user_id(user_id, method_name=method_name)
+        item = await self._store.aget(THREADS_NS, thread_id)
+        if item is None:
+            return None
+        record = dict(item.value)
+        if resolved is not None and record.get("user_id") != resolved:
+            return None
+        return record
+
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str | None = None,
+        user_id: str | None | _AutoSentinel = AUTO,
+        display_name: str | None = None,
+        metadata: dict | None = None,
+    ) -> dict:
+        resolved_user_id = resolve_user_id(user_id, method_name="MemoryThreadMetaStore.create")
+        now = time.time()
+        record: dict[str, Any] = {
+            "thread_id": thread_id,
+            "assistant_id": assistant_id,
+            "user_id": resolved_user_id,
+            "display_name": display_name,
+            "status": "idle",
+            "metadata": metadata or {},
+            "values": {},
+            "created_at": now,
+            "updated_at": now,
+        }
+        await self._store.aput(THREADS_NS, thread_id, record)
+        return record
+
+    async def get(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> dict | None:
+        return await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.get")
+
+    async def search(
+        self,
+        *,
+        metadata: dict | None = None,
+        status: str | None = None,
+        limit: int = 100,
+        offset: int = 0,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> list[dict]:
+        resolved_user_id = resolve_user_id(user_id, method_name="MemoryThreadMetaStore.search")
+        filter_dict: dict[str, Any] = {}
+        if metadata:
+            filter_dict.update(metadata)
+        if status:
+            filter_dict["status"] = status
+        if resolved_user_id is not None:
+            filter_dict["user_id"] = resolved_user_id
+
+        items = await self._store.asearch(
+            THREADS_NS,
+            filter=filter_dict or None,
+            limit=limit,
+            offset=offset,
+        )
+        return [self._item_to_dict(item) for item in items]
+
+    async def check_access(self, thread_id: str, user_id: str, *, require_existing: bool = False) -> bool:
+        item = await self._store.aget(THREADS_NS, thread_id)
+        if item is None:
+            return not require_existing
+        record_user_id = item.value.get("user_id")
+        if record_user_id is None:
+            return True
+        return record_user_id == user_id
+
+    async def update_display_name(self, thread_id: str, display_name: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.update_display_name")
+        if record is None:
+            return
+        record["display_name"] = display_name
+        record["updated_at"] = time.time()
+        await self._store.aput(THREADS_NS, thread_id, record)
+
+    async def update_status(self, thread_id: str, status: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.update_status")
+        if record is None:
+            return
+        record["status"] = status
+        record["updated_at"] = time.time()
+        await self._store.aput(THREADS_NS, thread_id, record)
+
+    async def update_metadata(self, thread_id: str, metadata: dict, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.update_metadata")
+        if record is None:
+            return
+        merged = dict(record.get("metadata") or {})
+        merged.update(metadata)
+        record["metadata"] = merged
+        record["updated_at"] = time.time()
+        await self._store.aput(THREADS_NS, thread_id, record)
+
+    async def delete(self, thread_id: str, *, user_id: str | None | _AutoSentinel = AUTO) -> None:
+        record = await self._get_owned_record(thread_id, user_id, "MemoryThreadMetaStore.delete")
+        if record is None:
+            return
+        await self._store.adelete(THREADS_NS, thread_id)
+
+    @staticmethod
+    def _item_to_dict(item) -> dict[str, Any]:
+        """Convert a Store SearchItem to the dict format expected by callers."""
+        val = item.value
+        return {
+            "thread_id": item.key,
+            "assistant_id": val.get("assistant_id"),
+            "user_id": val.get("user_id"),
+            "display_name": val.get("display_name"),
+            "status": val.get("status", "idle"),
+            "metadata": val.get("metadata", {}),
+            "created_at": str(val.get("created_at", "")),
+            "updated_at": str(val.get("updated_at", "")),
+        }
diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/model.py b/backend/packages/harness/deerflow/persistence/thread_meta/model.py
new file mode 100644
index 000000000..fe15315e1
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/thread_meta/model.py
@@ -0,0 +1,23 @@
+"""ORM model for thread metadata."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from sqlalchemy import JSON, DateTime, String
+from sqlalchemy.orm import Mapped, mapped_column
+
+from deerflow.persistence.base import Base
+
+
+class ThreadMetaRow(Base):
+    __tablename__ = "threads_meta"
+
+    thread_id: Mapped[str] = mapped_column(String(64), primary_key=True)
+    assistant_id: Mapped[str | None] = mapped_column(String(128), index=True)
+    user_id: Mapped[str | None] = mapped_column(String(64), index=True)
+    display_name: Mapped[str | None] = mapped_column(String(256))
+    status: Mapped[str] = mapped_column(String(20), default="idle")
+    metadata_json: Mapped[dict] = mapped_column(JSON, default=dict)
+    created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC))
+    updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC))
diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/sql.py b/backend/packages/harness/deerflow/persistence/thread_meta/sql.py
new file mode 100644
index 000000000..688fbb247
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/thread_meta/sql.py
@@ -0,0 +1,217 @@
+"""SQLAlchemy-backed thread metadata repository."""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from typing import Any
+
+from sqlalchemy import select, update
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from deerflow.persistence.thread_meta.base import ThreadMetaStore
+from deerflow.persistence.thread_meta.model import ThreadMetaRow
+from deerflow.runtime.user_context import AUTO, _AutoSentinel, resolve_user_id
+
+
+class ThreadMetaRepository(ThreadMetaStore):
+    def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None:
+        self._sf = session_factory
+
+    @staticmethod
+    def _row_to_dict(row: ThreadMetaRow) -> dict[str, Any]:
+        d = row.to_dict()
+        d["metadata"] = d.pop("metadata_json", {})
+        for key in ("created_at", "updated_at"):
+            val = d.get(key)
+            if isinstance(val, datetime):
+                d[key] = val.isoformat()
+        return d
+
+    async def create(
+        self,
+        thread_id: str,
+        *,
+        assistant_id: str | None = None,
+        user_id: str | None | _AutoSentinel = AUTO,
+        display_name: str | None = None,
+        metadata: dict | None = None,
+    ) -> dict:
+        # Auto-resolve user_id from contextvar when AUTO; explicit None
+        # creates an orphan row (used by migration scripts).
+        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.create")
+        now = datetime.now(UTC)
+        row = ThreadMetaRow(
+            thread_id=thread_id,
+            assistant_id=assistant_id,
+            user_id=resolved_user_id,
+            display_name=display_name,
+            metadata_json=metadata or {},
+            created_at=now,
+            updated_at=now,
+        )
+        async with self._sf() as session:
+            session.add(row)
+            await session.commit()
+            await session.refresh(row)
+            return self._row_to_dict(row)
+
+    async def get(
+        self,
+        thread_id: str,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> dict | None:
+        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.get")
+        async with self._sf() as session:
+            row = await session.get(ThreadMetaRow, thread_id)
+            if row is None:
+                return None
+            # Enforce owner filter unless explicitly bypassed (user_id=None).
+            if resolved_user_id is not None and row.user_id != resolved_user_id:
+                return None
+            return self._row_to_dict(row)
+
+    async def check_access(self, thread_id: str, user_id: str, *, require_existing: bool = False) -> bool:
+        """Check if ``user_id`` has access to ``thread_id``.
+
+        Two modes — one row, two distinct semantics depending on what
+        the caller is about to do:
+
+        - ``require_existing=False`` (default, permissive):
+          Returns True for: row missing (untracked legacy thread),
+          ``row.user_id`` is None (shared / pre-auth data),
+          or ``row.user_id == user_id``. Use for **read-style**
+          decorators where treating an untracked thread as accessible
+          preserves backward-compat.
+
+        - ``require_existing=True`` (strict):
+          Returns True **only** when the row exists AND
+          (``row.user_id == user_id`` OR ``row.user_id is None``).
+          Use for **destructive / mutating** decorators (DELETE, PATCH,
+          state-update) so a thread that has *already been deleted*
+          cannot be re-targeted by any caller — closing the
+          delete-idempotence cross-user gap where the row vanishing
+          made every other user appear to "own" it.
+        """
+        async with self._sf() as session:
+            row = await session.get(ThreadMetaRow, thread_id)
+            if row is None:
+                return not require_existing
+            if row.user_id is None:
+                return True
+            return row.user_id == user_id
+
+    async def search(
+        self,
+        *,
+        metadata: dict | None = None,
+        status: str | None = None,
+        limit: int = 100,
+        offset: int = 0,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> list[dict]:
+        """Search threads with optional metadata and status filters.
+
+        Owner filter is enforced by default: caller must be in a user
+        context. Pass ``user_id=None`` to bypass (migration/CLI).
+        """
+        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.search")
+        stmt = select(ThreadMetaRow).order_by(ThreadMetaRow.updated_at.desc())
+        if resolved_user_id is not None:
+            stmt = stmt.where(ThreadMetaRow.user_id == resolved_user_id)
+        if status:
+            stmt = stmt.where(ThreadMetaRow.status == status)
+
+        if metadata:
+            # When metadata filter is active, fetch a larger window and filter
+            # in Python. TODO(Phase 2): use JSON DB operators (Postgres @>,
+            # SQLite json_extract) for server-side filtering.
+            stmt = stmt.limit(limit * 5 + offset)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                rows = [self._row_to_dict(r) for r in result.scalars()]
+            rows = [r for r in rows if all(r.get("metadata", {}).get(k) == v for k, v in metadata.items())]
+            return rows[offset : offset + limit]
+        else:
+            stmt = stmt.limit(limit).offset(offset)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                return [self._row_to_dict(r) for r in result.scalars()]
+
+    async def _check_ownership(self, session: AsyncSession, thread_id: str, resolved_user_id: str | None) -> bool:
+        """Return True if the row exists and is owned (or filter bypassed)."""
+        if resolved_user_id is None:
+            return True  # explicit bypass
+        row = await session.get(ThreadMetaRow, thread_id)
+        return row is not None and row.user_id == resolved_user_id
+
+    async def update_display_name(
+        self,
+        thread_id: str,
+        display_name: str,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> None:
+        """Update the display_name (title) for a thread."""
+        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.update_display_name")
+        async with self._sf() as session:
+            if not await self._check_ownership(session, thread_id, resolved_user_id):
+                return
+            await session.execute(update(ThreadMetaRow).where(ThreadMetaRow.thread_id == thread_id).values(display_name=display_name, updated_at=datetime.now(UTC)))
+            await session.commit()
+
+    async def update_status(
+        self,
+        thread_id: str,
+        status: str,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> None:
+        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.update_status")
+        async with self._sf() as session:
+            if not await self._check_ownership(session, thread_id, resolved_user_id):
+                return
+            await session.execute(update(ThreadMetaRow).where(ThreadMetaRow.thread_id == thread_id).values(status=status, updated_at=datetime.now(UTC)))
+            await session.commit()
+
+    async def update_metadata(
+        self,
+        thread_id: str,
+        metadata: dict,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> None:
+        """Merge ``metadata`` into ``metadata_json``.
+
+        Read-modify-write inside a single session/transaction so concurrent
+        callers see consistent state. No-op if the row does not exist or
+        the user_id check fails.
+        """
+        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.update_metadata")
+        async with self._sf() as session:
+            row = await session.get(ThreadMetaRow, thread_id)
+            if row is None:
+                return
+            if resolved_user_id is not None and row.user_id != resolved_user_id:
+                return
+            merged = dict(row.metadata_json or {})
+            merged.update(metadata)
+            row.metadata_json = merged
+            row.updated_at = datetime.now(UTC)
+            await session.commit()
+
+    async def delete(
+        self,
+        thread_id: str,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ) -> None:
+        resolved_user_id = resolve_user_id(user_id, method_name="ThreadMetaRepository.delete")
+        async with self._sf() as session:
+            row = await session.get(ThreadMetaRow, thread_id)
+            if row is None:
+                return
+            if resolved_user_id is not None and row.user_id != resolved_user_id:
+                return
+            await session.delete(row)
+            await session.commit()
diff --git a/backend/packages/harness/deerflow/persistence/user/__init__.py b/backend/packages/harness/deerflow/persistence/user/__init__.py
new file mode 100644
index 000000000..a60eeef2c
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/user/__init__.py
@@ -0,0 +1,12 @@
+"""User storage subpackage.
+
+Holds the ORM model for the ``users`` table. The concrete repository
+implementation (``SQLiteUserRepository``) lives in the app layer
+(``app.gateway.auth.repositories.sqlite``) because it converts
+between the ORM row and the auth module's pydantic ``User`` class.
+This keeps the harness package free of any dependency on app code.
+"""
+
+from deerflow.persistence.user.model import UserRow
+
+__all__ = ["UserRow"]
diff --git a/backend/packages/harness/deerflow/persistence/user/model.py b/backend/packages/harness/deerflow/persistence/user/model.py
new file mode 100644
index 000000000..130d4bfcb
--- /dev/null
+++ b/backend/packages/harness/deerflow/persistence/user/model.py
@@ -0,0 +1,59 @@
+"""ORM model for the users table.
+
+Lives in the harness persistence package so it is picked up by
+``Base.metadata.create_all()`` alongside ``threads_meta``, ``runs``,
+``run_events``, and ``feedback``. Using the shared engine means:
+
+- One SQLite/Postgres database, one connection pool
+- One schema initialisation codepath
+- Consistent async sessions across auth and persistence reads
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from sqlalchemy import Boolean, DateTime, Index, String, text
+from sqlalchemy.orm import Mapped, mapped_column
+
+from deerflow.persistence.base import Base
+
+
+class UserRow(Base):
+    __tablename__ = "users"
+
+    # UUIDs are stored as 36-char strings for cross-backend portability.
+    id: Mapped[str] = mapped_column(String(36), primary_key=True)
+
+    email: Mapped[str] = mapped_column(String(320), unique=True, nullable=False, index=True)
+    password_hash: Mapped[str | None] = mapped_column(String(128), nullable=True)
+
+    # "admin" | "user" — kept as plain string to avoid ALTER TABLE pain
+    # when new roles are introduced.
+    system_role: Mapped[str] = mapped_column(String(16), nullable=False, default="user")
+
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime(timezone=True),
+        nullable=False,
+        default=lambda: datetime.now(UTC),
+    )
+
+    # OAuth linkage (optional). A partial unique index enforces one
+    # account per (provider, oauth_id) pair, leaving NULL/NULL rows
+    # unconstrained so plain password accounts can coexist.
+    oauth_provider: Mapped[str | None] = mapped_column(String(32), nullable=True)
+    oauth_id: Mapped[str | None] = mapped_column(String(128), nullable=True)
+
+    # Auth lifecycle flags
+    needs_setup: Mapped[bool] = mapped_column(Boolean, nullable=False, default=False)
+    token_version: Mapped[int] = mapped_column(nullable=False, default=0)
+
+    __table_args__ = (
+        Index(
+            "idx_users_oauth_identity",
+            "oauth_provider",
+            "oauth_id",
+            unique=True,
+            sqlite_where=text("oauth_provider IS NOT NULL AND oauth_id IS NOT NULL"),
+        ),
+    )
diff --git a/backend/packages/harness/deerflow/runtime/__init__.py b/backend/packages/harness/deerflow/runtime/__init__.py
index d7eccf101..5a3df2eb6 100644
--- a/backend/packages/harness/deerflow/runtime/__init__.py
+++ b/backend/packages/harness/deerflow/runtime/__init__.py
@@ -5,15 +5,22 @@ Re-exports the public API of :mod:`~deerflow.runtime.runs` and
 directly from ``deerflow.runtime``.
 """
 
-from .runs import ConflictError, DisconnectMode, RunManager, RunRecord, RunStatus, UnsupportedStrategyError, run_agent
+from .checkpointer import checkpointer_context, get_checkpointer, make_checkpointer, reset_checkpointer
+from .runs import ConflictError, DisconnectMode, RunContext, RunManager, RunRecord, RunStatus, UnsupportedStrategyError, run_agent
 from .serialization import serialize, serialize_channel_values, serialize_lc_object, serialize_messages_tuple
 from .store import get_store, make_store, reset_store, store_context
 from .stream_bridge import END_SENTINEL, HEARTBEAT_SENTINEL, MemoryStreamBridge, StreamBridge, StreamEvent, make_stream_bridge
 
 __all__ = [
+    # checkpointer
+    "checkpointer_context",
+    "get_checkpointer",
+    "make_checkpointer",
+    "reset_checkpointer",
     # runs
     "ConflictError",
     "DisconnectMode",
+    "RunContext",
     "RunManager",
     "RunRecord",
     "RunStatus",
diff --git a/backend/packages/harness/deerflow/agents/checkpointer/__init__.py b/backend/packages/harness/deerflow/runtime/checkpointer/__init__.py
similarity index 100%
rename from backend/packages/harness/deerflow/agents/checkpointer/__init__.py
rename to backend/packages/harness/deerflow/runtime/checkpointer/__init__.py
diff --git a/backend/packages/harness/deerflow/agents/checkpointer/async_provider.py b/backend/packages/harness/deerflow/runtime/checkpointer/async_provider.py
similarity index 53%
rename from backend/packages/harness/deerflow/agents/checkpointer/async_provider.py
rename to backend/packages/harness/deerflow/runtime/checkpointer/async_provider.py
index 1129fc6b0..f2453eb54 100644
--- a/backend/packages/harness/deerflow/agents/checkpointer/async_provider.py
+++ b/backend/packages/harness/deerflow/runtime/checkpointer/async_provider.py
@@ -7,12 +7,12 @@ Supported backends: memory, sqlite, postgres.
 
 Usage (e.g. FastAPI lifespan)::
 
-    from deerflow.agents.checkpointer.async_provider import make_checkpointer
+    from deerflow.runtime.checkpointer.async_provider import make_checkpointer
 
     async with make_checkpointer() as checkpointer:
         app.state.checkpointer = checkpointer  # InMemorySaver if not configured
 
-For sync usage see :mod:`deerflow.agents.checkpointer.provider`.
+For sync usage see :mod:`deerflow.runtime.checkpointer.provider`.
 """
 
 from __future__ import annotations
@@ -24,12 +24,12 @@ from collections.abc import AsyncIterator
 
 from langgraph.types import Checkpointer
 
-from deerflow.agents.checkpointer.provider import (
+from deerflow.config.app_config import AppConfig
+from deerflow.runtime.checkpointer.provider import (
     POSTGRES_CONN_REQUIRED,
     POSTGRES_INSTALL,
     SQLITE_INSTALL,
 )
-from deerflow.config.app_config import get_app_config
 from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str
 
 logger = logging.getLogger(__name__)
@@ -84,23 +84,74 @@ async def _async_checkpointer(config) -> AsyncIterator[Checkpointer]:
 
 
 @contextlib.asynccontextmanager
-async def make_checkpointer() -> AsyncIterator[Checkpointer]:
-    """Async context manager that yields a checkpointer for the caller's lifetime.
-    Resources are opened on enter and closed on exit — no global state::
-
-        async with make_checkpointer() as checkpointer:
-            app.state.checkpointer = checkpointer
-
-    Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
-    """
-
-    config = get_app_config()
-
-    if config.checkpointer is None:
+async def _async_checkpointer_from_database(db_config) -> AsyncIterator[Checkpointer]:
+    """Async context manager that constructs a checkpointer from unified DatabaseConfig."""
+    if db_config.backend == "memory":
         from langgraph.checkpoint.memory import InMemorySaver
 
         yield InMemorySaver()
         return
 
-    async with _async_checkpointer(config.checkpointer) as saver:
-        yield saver
+    if db_config.backend == "sqlite":
+        try:
+            from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver
+        except ImportError as exc:
+            raise ImportError(SQLITE_INSTALL) from exc
+
+        conn_str = db_config.checkpointer_sqlite_path
+        ensure_sqlite_parent_dir(conn_str)
+        async with AsyncSqliteSaver.from_conn_string(conn_str) as saver:
+            await saver.setup()
+            yield saver
+        return
+
+    if db_config.backend == "postgres":
+        try:
+            from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver
+        except ImportError as exc:
+            raise ImportError(POSTGRES_INSTALL) from exc
+
+        if not db_config.postgres_url:
+            raise ValueError("database.postgres_url is required for the postgres backend")
+
+        async with AsyncPostgresSaver.from_conn_string(db_config.postgres_url) as saver:
+            await saver.setup()
+            yield saver
+        return
+
+    raise ValueError(f"Unknown database backend: {db_config.backend!r}")
+
+
+@contextlib.asynccontextmanager
+async def make_checkpointer(app_config: AppConfig) -> AsyncIterator[Checkpointer]:
+    """Async context manager that yields a checkpointer for the caller's lifetime.
+    Resources are opened on enter and closed on exit -- no global state::
+
+        async with make_checkpointer(app_config) as checkpointer:
+            app.state.checkpointer = checkpointer
+
+    Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
+
+    Priority:
+    1. Legacy ``checkpointer:`` config section (backward compatible)
+    2. Unified ``database:`` config section
+    3. Default InMemorySaver
+    """
+
+    # Legacy: standalone checkpointer config takes precedence
+    if app_config.checkpointer is not None:
+        async with _async_checkpointer(app_config.checkpointer) as saver:
+            yield saver
+            return
+
+    # Unified database config
+    db_config = getattr(app_config, "database", None)
+    if db_config is not None and db_config.backend != "memory":
+        async with _async_checkpointer_from_database(db_config) as saver:
+            yield saver
+            return
+
+    # Default: in-memory
+    from langgraph.checkpoint.memory import InMemorySaver
+
+    yield InMemorySaver()
diff --git a/backend/packages/harness/deerflow/agents/checkpointer/provider.py b/backend/packages/harness/deerflow/runtime/checkpointer/provider.py
similarity index 80%
rename from backend/packages/harness/deerflow/agents/checkpointer/provider.py
rename to backend/packages/harness/deerflow/runtime/checkpointer/provider.py
index 252e58be5..73831c482 100644
--- a/backend/packages/harness/deerflow/agents/checkpointer/provider.py
+++ b/backend/packages/harness/deerflow/runtime/checkpointer/provider.py
@@ -7,7 +7,7 @@ Supported backends: memory, sqlite, postgres.
 
 Usage::
 
-    from deerflow.agents.checkpointer.provider import get_checkpointer, checkpointer_context
+    from deerflow.runtime.checkpointer.provider import get_checkpointer, checkpointer_context
 
     # Singleton — reused across calls, closed on process exit
     cp = get_checkpointer()
@@ -25,7 +25,7 @@ from collections.abc import Iterator
 
 from langgraph.types import Checkpointer
 
-from deerflow.config.app_config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.config.checkpointer_config import CheckpointerConfig
 from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str
 
@@ -100,10 +100,13 @@ _checkpointer: Checkpointer | None = None
 _checkpointer_ctx = None  # open context manager keeping the connection alive
 
 
-def get_checkpointer() -> Checkpointer:
+def get_checkpointer(app_config: AppConfig) -> Checkpointer:
     """Return the global sync checkpointer singleton, creating it on first call.
 
-    Returns an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
+    Returns an ``InMemorySaver`` only when ``checkpointer`` is explicitly
+    absent from config.yaml. Any other failure (missing config, invalid
+    backend, connection error) propagates — silent degradation to in-memory
+    would drop persistent-run state on process restart.
 
     Raises:
         ImportError: If the required package for the configured backend is not installed.
@@ -114,25 +117,7 @@ def get_checkpointer() -> Checkpointer:
     if _checkpointer is not None:
         return _checkpointer
 
-    # Ensure app config is loaded before checking checkpointer config
-    # This prevents returning InMemorySaver when config.yaml actually has a checkpointer section
-    # but hasn't been loaded yet
-    from deerflow.config.app_config import _app_config
-    from deerflow.config.checkpointer_config import get_checkpointer_config
-
-    config = get_checkpointer_config()
-
-    if config is None and _app_config is None:
-        # Only load app config lazily when neither the app config nor an explicit
-        # checkpointer config has been initialized yet. This keeps tests that
-        # intentionally set the global checkpointer config isolated from any
-        # ambient config.yaml on disk.
-        try:
-            get_app_config()
-        except FileNotFoundError:
-            # In test environments without config.yaml, this is expected.
-            pass
-        config = get_checkpointer_config()
+    config = app_config.checkpointer
     if config is None:
         from langgraph.checkpoint.memory import InMemorySaver
 
@@ -168,25 +153,23 @@ def reset_checkpointer() -> None:
 
 
 @contextlib.contextmanager
-def checkpointer_context() -> Iterator[Checkpointer]:
+def checkpointer_context(app_config: AppConfig) -> Iterator[Checkpointer]:
     """Sync context manager that yields a checkpointer and cleans up on exit.
 
     Unlike :func:`get_checkpointer`, this does **not** cache the instance —
     each ``with`` block creates and destroys its own connection.  Use it in
     CLI scripts or tests where you want deterministic cleanup::
 
-        with checkpointer_context() as cp:
+        with checkpointer_context(app_config) as cp:
             graph.invoke(input, config={"configurable": {"thread_id": "1"}})
 
     Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*.
     """
-
-    config = get_app_config()
-    if config.checkpointer is None:
+    if app_config.checkpointer is None:
         from langgraph.checkpoint.memory import InMemorySaver
 
         yield InMemorySaver()
         return
 
-    with _sync_checkpointer_cm(config.checkpointer) as saver:
+    with _sync_checkpointer_cm(app_config.checkpointer) as saver:
         yield saver
diff --git a/backend/packages/harness/deerflow/runtime/converters.py b/backend/packages/harness/deerflow/runtime/converters.py
new file mode 100644
index 000000000..811031160
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/converters.py
@@ -0,0 +1,134 @@
+"""Pure functions to convert LangChain message objects to OpenAI Chat Completions format.
+
+Used by RunJournal to build content dicts for event storage.
+"""
+
+from __future__ import annotations
+
+import json
+from typing import Any
+
+_ROLE_MAP = {
+    "human": "user",
+    "ai": "assistant",
+    "system": "system",
+    "tool": "tool",
+}
+
+
+def langchain_to_openai_message(message: Any) -> dict:
+    """Convert a single LangChain BaseMessage to an OpenAI message dict.
+
+    Handles:
+    - HumanMessage → {"role": "user", "content": "..."}
+    - AIMessage (text only) → {"role": "assistant", "content": "..."}
+    - AIMessage (with tool_calls) → {"role": "assistant", "content": null, "tool_calls": [...]}
+    - AIMessage (text + tool_calls) → both content and tool_calls present
+    - AIMessage (list content / multimodal) → content preserved as list
+    - SystemMessage → {"role": "system", "content": "..."}
+    - ToolMessage → {"role": "tool", "tool_call_id": "...", "content": "..."}
+    """
+    msg_type = getattr(message, "type", "")
+    role = _ROLE_MAP.get(msg_type, msg_type)
+    content = getattr(message, "content", "")
+
+    if role == "tool":
+        return {
+            "role": "tool",
+            "tool_call_id": getattr(message, "tool_call_id", ""),
+            "content": content,
+        }
+
+    if role == "assistant":
+        tool_calls = getattr(message, "tool_calls", None) or []
+        result: dict = {"role": "assistant"}
+
+        if tool_calls:
+            openai_tool_calls = []
+            for tc in tool_calls:
+                args = tc.get("args", {})
+                openai_tool_calls.append(
+                    {
+                        "id": tc.get("id", ""),
+                        "type": "function",
+                        "function": {
+                            "name": tc.get("name", ""),
+                            "arguments": json.dumps(args) if not isinstance(args, str) else args,
+                        },
+                    }
+                )
+            # If no text content, set content to null per OpenAI spec
+            result["content"] = content if (isinstance(content, list) and content) or (isinstance(content, str) and content) else None
+            result["tool_calls"] = openai_tool_calls
+        else:
+            result["content"] = content
+
+        return result
+
+    # user / system / unknown
+    return {"role": role, "content": content}
+
+
+def _infer_finish_reason(message: Any) -> str:
+    """Infer OpenAI finish_reason from an AIMessage.
+
+    Returns "tool_calls" if tool_calls present, else looks in
+    response_metadata.finish_reason, else returns "stop".
+    """
+    tool_calls = getattr(message, "tool_calls", None) or []
+    if tool_calls:
+        return "tool_calls"
+    resp_meta = getattr(message, "response_metadata", None) or {}
+    if isinstance(resp_meta, dict):
+        finish = resp_meta.get("finish_reason")
+        if finish:
+            return finish
+    return "stop"
+
+
+def langchain_to_openai_completion(message: Any) -> dict:
+    """Convert an AIMessage and its metadata to an OpenAI completion response dict.
+
+    Returns:
+        {
+            "id": message.id,
+            "model": message.response_metadata.get("model_name"),
+            "choices": [{"index": 0, "message": <openai_message>, "finish_reason": <inferred>}],
+            "usage": {"prompt_tokens": ..., "completion_tokens": ..., "total_tokens": ...} or None,
+        }
+    """
+    resp_meta = getattr(message, "response_metadata", None) or {}
+    model_name = resp_meta.get("model_name") if isinstance(resp_meta, dict) else None
+
+    openai_msg = langchain_to_openai_message(message)
+    finish_reason = _infer_finish_reason(message)
+
+    usage_metadata = getattr(message, "usage_metadata", None)
+    if usage_metadata is not None:
+        input_tokens = usage_metadata.get("input_tokens", 0) or 0
+        output_tokens = usage_metadata.get("output_tokens", 0) or 0
+        usage: dict | None = {
+            "prompt_tokens": input_tokens,
+            "completion_tokens": output_tokens,
+            "total_tokens": input_tokens + output_tokens,
+        }
+    else:
+        usage = None
+
+    return {
+        "id": getattr(message, "id", None),
+        "model": model_name,
+        "choices": [
+            {
+                "index": 0,
+                "message": openai_msg,
+                "finish_reason": finish_reason,
+            }
+        ],
+        "usage": usage,
+    }
+
+
+def langchain_messages_to_openai(messages: list) -> list[dict]:
+    """Convert a list of LangChain BaseMessages to OpenAI message dicts."""
+    return [langchain_to_openai_message(m) for m in messages]
diff --git a/backend/packages/harness/deerflow/runtime/events/__init__.py b/backend/packages/harness/deerflow/runtime/events/__init__.py
new file mode 100644
index 000000000..0da8fabe5
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/events/__init__.py
@@ -0,0 +1,4 @@
+from deerflow.runtime.events.store.base import RunEventStore
+from deerflow.runtime.events.store.memory import MemoryRunEventStore
+
+__all__ = ["MemoryRunEventStore", "RunEventStore"]
diff --git a/backend/packages/harness/deerflow/runtime/events/store/__init__.py b/backend/packages/harness/deerflow/runtime/events/store/__init__.py
new file mode 100644
index 000000000..55f0dd33f
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/events/store/__init__.py
@@ -0,0 +1,26 @@
+from deerflow.runtime.events.store.base import RunEventStore
+from deerflow.runtime.events.store.memory import MemoryRunEventStore
+
+
+def make_run_event_store(config=None) -> RunEventStore:
+    """Create a RunEventStore based on run_events.backend configuration."""
+    if config is None or config.backend == "memory":
+        return MemoryRunEventStore()
+    if config.backend == "db":
+        from deerflow.persistence.engine import get_session_factory
+
+        sf = get_session_factory()
+        if sf is None:
+            # database.backend=memory but run_events.backend=db -> fallback
+            return MemoryRunEventStore()
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        return DbRunEventStore(sf, max_trace_content=config.max_trace_content)
+    if config.backend == "jsonl":
+        from deerflow.runtime.events.store.jsonl import JsonlRunEventStore
+
+        return JsonlRunEventStore()
+    raise ValueError(f"Unknown run_events backend: {config.backend!r}")
+
+
+__all__ = ["MemoryRunEventStore", "RunEventStore", "make_run_event_store"]
diff --git a/backend/packages/harness/deerflow/runtime/events/store/base.py b/backend/packages/harness/deerflow/runtime/events/store/base.py
new file mode 100644
index 000000000..df5136ba5
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/events/store/base.py
@@ -0,0 +1,109 @@
+"""Abstract interface for run event storage.
+
+RunEventStore is the unified storage interface for run event streams.
+Messages (frontend display) and execution traces (debugging/audit) go
+through the same interface, distinguished by the ``category`` field.
+
+Implementations:
+- MemoryRunEventStore: in-memory dict (development, tests)
+- Future: DB-backed store (SQLAlchemy ORM), JSONL file store
+"""
+
+from __future__ import annotations
+
+import abc
+
+
+class RunEventStore(abc.ABC):
+    """Run event stream storage interface.
+
+    All implementations must guarantee:
+    1. put() events are retrievable in subsequent queries
+    2. seq is strictly increasing within the same thread
+    3. list_messages() only returns category="message" events
+    4. list_events() returns all events for the specified run
+    5. Returned dicts match the RunEvent field structure
+    """
+
+    @abc.abstractmethod
+    async def put(
+        self,
+        *,
+        thread_id: str,
+        run_id: str,
+        event_type: str,
+        category: str,
+        content: str | dict = "",
+        metadata: dict | None = None,
+        created_at: str | None = None,
+    ) -> dict:
+        """Write an event, auto-assign seq, return the complete record."""
+
+    @abc.abstractmethod
+    async def put_batch(self, events: list[dict]) -> list[dict]:
+        """Batch-write events. Used by RunJournal flush buffer.
+
+        Each dict's keys match put()'s keyword arguments.
+        Returns complete records with seq assigned.
+        """
+
+    @abc.abstractmethod
+    async def list_messages(
+        self,
+        thread_id: str,
+        *,
+        limit: int = 50,
+        before_seq: int | None = None,
+        after_seq: int | None = None,
+    ) -> list[dict]:
+        """Return displayable messages (category=message) for a thread, ordered by seq ascending.
+
+        Supports bidirectional cursor pagination:
+        - before_seq: return the last ``limit`` records with seq < before_seq (ascending)
+        - after_seq: return the first ``limit`` records with seq > after_seq (ascending)
+        - neither: return the latest ``limit`` records (ascending)
+        """
+
+    @abc.abstractmethod
+    async def list_events(
+        self,
+        thread_id: str,
+        run_id: str,
+        *,
+        event_types: list[str] | None = None,
+        limit: int = 500,
+    ) -> list[dict]:
+        """Return the full event stream for a run, ordered by seq ascending.
+
+        Optionally filter by event_types.
+        """
+
+    @abc.abstractmethod
+    async def list_messages_by_run(
+        self,
+        thread_id: str,
+        run_id: str,
+        *,
+        limit: int = 50,
+        before_seq: int | None = None,
+        after_seq: int | None = None,
+    ) -> list[dict]:
+        """Return displayable messages (category=message) for a specific run, ordered by seq ascending.
+
+        Supports bidirectional cursor pagination:
+        - after_seq: return the first ``limit`` records with seq > after_seq (ascending)
+        - before_seq: return the last ``limit`` records with seq < before_seq (ascending)
+        - neither: return the latest ``limit`` records (ascending)
+        """
+
+    @abc.abstractmethod
+    async def count_messages(self, thread_id: str) -> int:
+        """Count displayable messages (category=message) in a thread."""
+
+    @abc.abstractmethod
+    async def delete_by_thread(self, thread_id: str) -> int:
+        """Delete all events for a thread. Return the number of deleted events."""
+
+    @abc.abstractmethod
+    async def delete_by_run(self, thread_id: str, run_id: str) -> int:
+        """Delete all events for a specific run. Return the number of deleted events."""
diff --git a/backend/packages/harness/deerflow/runtime/events/store/db.py b/backend/packages/harness/deerflow/runtime/events/store/db.py
new file mode 100644
index 000000000..e4a21d006
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/events/store/db.py
@@ -0,0 +1,286 @@
+"""SQLAlchemy-backed RunEventStore implementation.
+
+Persists events to the ``run_events`` table. Trace content is truncated
+at ``max_trace_content`` bytes to avoid bloating the database.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from datetime import UTC, datetime
+
+from sqlalchemy import delete, func, select
+from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker
+
+from deerflow.persistence.models.run_event import RunEventRow
+from deerflow.runtime.events.store.base import RunEventStore
+from deerflow.runtime.user_context import AUTO, _AutoSentinel, get_current_user, resolve_user_id
+
+logger = logging.getLogger(__name__)
+
+
+class DbRunEventStore(RunEventStore):
+    def __init__(self, session_factory: async_sessionmaker[AsyncSession], *, max_trace_content: int = 10240):
+        self._sf = session_factory
+        self._max_trace_content = max_trace_content
+
+    @staticmethod
+    def _row_to_dict(row: RunEventRow) -> dict:
+        d = row.to_dict()
+        d["metadata"] = d.pop("event_metadata", {})
+        val = d.get("created_at")
+        if isinstance(val, datetime):
+            d["created_at"] = val.isoformat()
+        d.pop("id", None)
+        # Restore dict content that was JSON-serialized on write
+        raw = d.get("content", "")
+        if isinstance(raw, str) and d.get("metadata", {}).get("content_is_dict"):
+            try:
+                d["content"] = json.loads(raw)
+            except (json.JSONDecodeError, ValueError):
+                # Content looked like JSON (content_is_dict flag) but failed to parse;
+                # keep the raw string as-is.
+                logger.debug("Failed to deserialize content as JSON for event seq=%s", d.get("seq"))
+        return d
+
+    def _truncate_trace(self, category: str, content: str | dict, metadata: dict | None) -> tuple[str | dict, dict]:
+        if category == "trace":
+            text = json.dumps(content, default=str, ensure_ascii=False) if isinstance(content, dict) else content
+            encoded = text.encode("utf-8")
+            if len(encoded) > self._max_trace_content:
+                # Truncate by bytes, then decode back (may cut a multi-byte char, so use errors="ignore")
+                content = encoded[: self._max_trace_content].decode("utf-8", errors="ignore")
+                metadata = {**(metadata or {}), "content_truncated": True, "original_byte_length": len(encoded)}
+        return content, metadata or {}
+
+    @staticmethod
+    def _user_id_from_context() -> str | None:
+        """Soft read of user_id from contextvar for write paths.
+
+        Returns ``None`` (no filter / no stamp) if contextvar is unset,
+        which is the expected case for background worker writes. HTTP
+        request writes will have the contextvar set by auth middleware
+        and get their user_id stamped automatically.
+
+        Coerces ``user.id`` to ``str`` at the boundary: ``User.id`` is
+        typed as ``UUID`` by the auth layer, but ``run_events.user_id``
+        is ``VARCHAR(64)`` and aiosqlite cannot bind a raw UUID object
+        to a VARCHAR column ("type 'UUID' is not supported") — the
+        INSERT would silently roll back and the worker would hang.
+        """
+        user = get_current_user()
+        return str(user.id) if user is not None else None
+
+    async def put(self, *, thread_id, run_id, event_type, category, content="", metadata=None, created_at=None):  # noqa: D401
+        """Write a single event — low-frequency path only.
+
+        This opens a dedicated transaction with a FOR UPDATE lock to
+        assign a monotonic *seq*.  For high-throughput writes use
+        :meth:`put_batch`, which acquires the lock once for the whole
+        batch.  Currently the only caller is ``worker.run_agent`` for
+        the initial ``human_message`` event (once per run).
+        """
+        content, metadata = self._truncate_trace(category, content, metadata)
+        if isinstance(content, dict):
+            db_content = json.dumps(content, default=str, ensure_ascii=False)
+            metadata = {**(metadata or {}), "content_is_dict": True}
+        else:
+            db_content = content
+        user_id = self._user_id_from_context()
+        async with self._sf() as session:
+            async with session.begin():
+                # Use FOR UPDATE to serialize seq assignment within a thread.
+                # NOTE: with_for_update() on aggregates is a no-op on SQLite;
+                # the UNIQUE(thread_id, seq) constraint catches races there.
+                max_seq = await session.scalar(select(func.max(RunEventRow.seq)).where(RunEventRow.thread_id == thread_id).with_for_update())
+                seq = (max_seq or 0) + 1
+                row = RunEventRow(
+                    thread_id=thread_id,
+                    run_id=run_id,
+                    user_id=user_id,
+                    event_type=event_type,
+                    category=category,
+                    content=db_content,
+                    event_metadata=metadata,
+                    seq=seq,
+                    created_at=datetime.fromisoformat(created_at) if created_at else datetime.now(UTC),
+                )
+                session.add(row)
+            return self._row_to_dict(row)
+
+    async def put_batch(self, events):
+        if not events:
+            return []
+        user_id = self._user_id_from_context()
+        async with self._sf() as session:
+            async with session.begin():
+                # Get max seq for the thread (assume all events in batch belong to same thread).
+                # NOTE: with_for_update() on aggregates is a no-op on SQLite;
+                # the UNIQUE(thread_id, seq) constraint catches races there.
+                thread_id = events[0]["thread_id"]
+                max_seq = await session.scalar(select(func.max(RunEventRow.seq)).where(RunEventRow.thread_id == thread_id).with_for_update())
+                seq = max_seq or 0
+                rows = []
+                for e in events:
+                    seq += 1
+                    content = e.get("content", "")
+                    category = e.get("category", "trace")
+                    metadata = e.get("metadata")
+                    content, metadata = self._truncate_trace(category, content, metadata)
+                    if isinstance(content, dict):
+                        db_content = json.dumps(content, default=str, ensure_ascii=False)
+                        metadata = {**(metadata or {}), "content_is_dict": True}
+                    else:
+                        db_content = content
+                    row = RunEventRow(
+                        thread_id=e["thread_id"],
+                        run_id=e["run_id"],
+                        user_id=e.get("user_id", user_id),
+                        event_type=e["event_type"],
+                        category=category,
+                        content=db_content,
+                        event_metadata=metadata,
+                        seq=seq,
+                        created_at=datetime.fromisoformat(e["created_at"]) if e.get("created_at") else datetime.now(UTC),
+                    )
+                    session.add(row)
+                    rows.append(row)
+            return [self._row_to_dict(r) for r in rows]
+
+    async def list_messages(
+        self,
+        thread_id,
+        *,
+        limit=50,
+        before_seq=None,
+        after_seq=None,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="DbRunEventStore.list_messages")
+        stmt = select(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.category == "message")
+        if resolved_user_id is not None:
+            stmt = stmt.where(RunEventRow.user_id == resolved_user_id)
+        if before_seq is not None:
+            stmt = stmt.where(RunEventRow.seq < before_seq)
+        if after_seq is not None:
+            stmt = stmt.where(RunEventRow.seq > after_seq)
+
+        if after_seq is not None:
+            # Forward pagination: first `limit` records after cursor
+            stmt = stmt.order_by(RunEventRow.seq.asc()).limit(limit)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                return [self._row_to_dict(r) for r in result.scalars()]
+        else:
+            # before_seq or default (latest): take last `limit` records, return ascending
+            stmt = stmt.order_by(RunEventRow.seq.desc()).limit(limit)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                rows = list(result.scalars())
+                return [self._row_to_dict(r) for r in reversed(rows)]
+
+    async def list_events(
+        self,
+        thread_id,
+        run_id,
+        *,
+        event_types=None,
+        limit=500,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="DbRunEventStore.list_events")
+        stmt = select(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.run_id == run_id)
+        if resolved_user_id is not None:
+            stmt = stmt.where(RunEventRow.user_id == resolved_user_id)
+        if event_types:
+            stmt = stmt.where(RunEventRow.event_type.in_(event_types))
+        stmt = stmt.order_by(RunEventRow.seq.asc()).limit(limit)
+        async with self._sf() as session:
+            result = await session.execute(stmt)
+            return [self._row_to_dict(r) for r in result.scalars()]
+
+    async def list_messages_by_run(
+        self,
+        thread_id,
+        run_id,
+        *,
+        limit=50,
+        before_seq=None,
+        after_seq=None,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="DbRunEventStore.list_messages_by_run")
+        stmt = select(RunEventRow).where(
+            RunEventRow.thread_id == thread_id,
+            RunEventRow.run_id == run_id,
+            RunEventRow.category == "message",
+        )
+        if resolved_user_id is not None:
+            stmt = stmt.where(RunEventRow.user_id == resolved_user_id)
+        if before_seq is not None:
+            stmt = stmt.where(RunEventRow.seq < before_seq)
+        if after_seq is not None:
+            stmt = stmt.where(RunEventRow.seq > after_seq)
+
+        if after_seq is not None:
+            stmt = stmt.order_by(RunEventRow.seq.asc()).limit(limit)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                return [self._row_to_dict(r) for r in result.scalars()]
+        else:
+            stmt = stmt.order_by(RunEventRow.seq.desc()).limit(limit)
+            async with self._sf() as session:
+                result = await session.execute(stmt)
+                rows = list(result.scalars())
+                return [self._row_to_dict(r) for r in reversed(rows)]
+
+    async def count_messages(
+        self,
+        thread_id,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="DbRunEventStore.count_messages")
+        stmt = select(func.count()).select_from(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.category == "message")
+        if resolved_user_id is not None:
+            stmt = stmt.where(RunEventRow.user_id == resolved_user_id)
+        async with self._sf() as session:
+            return await session.scalar(stmt) or 0
+
+    async def delete_by_thread(
+        self,
+        thread_id,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="DbRunEventStore.delete_by_thread")
+        async with self._sf() as session:
+            count_conditions = [RunEventRow.thread_id == thread_id]
+            if resolved_user_id is not None:
+                count_conditions.append(RunEventRow.user_id == resolved_user_id)
+            count_stmt = select(func.count()).select_from(RunEventRow).where(*count_conditions)
+            count = await session.scalar(count_stmt) or 0
+            if count > 0:
+                await session.execute(delete(RunEventRow).where(*count_conditions))
+                await session.commit()
+            return count
+
+    async def delete_by_run(
+        self,
+        thread_id,
+        run_id,
+        *,
+        user_id: str | None | _AutoSentinel = AUTO,
+    ):
+        resolved_user_id = resolve_user_id(user_id, method_name="DbRunEventStore.delete_by_run")
+        async with self._sf() as session:
+            count_conditions = [RunEventRow.thread_id == thread_id, RunEventRow.run_id == run_id]
+            if resolved_user_id is not None:
+                count_conditions.append(RunEventRow.user_id == resolved_user_id)
+            count_stmt = select(func.count()).select_from(RunEventRow).where(*count_conditions)
+            count = await session.scalar(count_stmt) or 0
+            if count > 0:
+                await session.execute(delete(RunEventRow).where(*count_conditions))
+                await session.commit()
+            return count
diff --git a/backend/packages/harness/deerflow/runtime/events/store/jsonl.py b/backend/packages/harness/deerflow/runtime/events/store/jsonl.py
new file mode 100644
index 000000000..378713afc
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/events/store/jsonl.py
@@ -0,0 +1,187 @@
+"""JSONL file-backed RunEventStore implementation.
+
+Each run's events are stored in a single file:
+``.deer-flow/threads/{thread_id}/runs/{run_id}.jsonl``
+
+All categories (message, trace, lifecycle) are in the same file.
+This backend is suitable for lightweight single-node deployments.
+
+Known trade-off: ``list_messages()`` must scan all run files for a
+thread since messages from multiple runs need unified seq ordering.
+``list_events()`` reads only one file -- the fast path.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+import re
+from datetime import UTC, datetime
+from pathlib import Path
+
+from deerflow.runtime.events.store.base import RunEventStore
+
+logger = logging.getLogger(__name__)
+
+_SAFE_ID_PATTERN = re.compile(r"^[A-Za-z0-9_\-]+$")
+
+
+class JsonlRunEventStore(RunEventStore):
+    def __init__(self, base_dir: str | Path | None = None):
+        self._base_dir = Path(base_dir) if base_dir else Path(".deer-flow")
+        self._seq_counters: dict[str, int] = {}  # thread_id -> current max seq
+
+    @staticmethod
+    def _validate_id(value: str, label: str) -> str:
+        """Validate that an ID is safe for use in filesystem paths."""
+        if not value or not _SAFE_ID_PATTERN.match(value):
+            raise ValueError(f"Invalid {label}: must be alphanumeric/dash/underscore, got {value!r}")
+        return value
+
+    def _thread_dir(self, thread_id: str) -> Path:
+        self._validate_id(thread_id, "thread_id")
+        return self._base_dir / "threads" / thread_id / "runs"
+
+    def _run_file(self, thread_id: str, run_id: str) -> Path:
+        self._validate_id(run_id, "run_id")
+        return self._thread_dir(thread_id) / f"{run_id}.jsonl"
+
+    def _next_seq(self, thread_id: str) -> int:
+        self._seq_counters[thread_id] = self._seq_counters.get(thread_id, 0) + 1
+        return self._seq_counters[thread_id]
+
+    def _ensure_seq_loaded(self, thread_id: str) -> None:
+        """Load max seq from existing files if not yet cached."""
+        if thread_id in self._seq_counters:
+            return
+        max_seq = 0
+        thread_dir = self._thread_dir(thread_id)
+        if thread_dir.exists():
+            for f in thread_dir.glob("*.jsonl"):
+                for line in f.read_text(encoding="utf-8").strip().splitlines():
+                    try:
+                        record = json.loads(line)
+                        max_seq = max(max_seq, record.get("seq", 0))
+                    except json.JSONDecodeError:
+                        logger.debug("Skipping malformed JSONL line in %s", f)
+                        continue
+        self._seq_counters[thread_id] = max_seq
+
+    def _write_record(self, record: dict) -> None:
+        path = self._run_file(record["thread_id"], record["run_id"])
+        path.parent.mkdir(parents=True, exist_ok=True)
+        with open(path, "a", encoding="utf-8") as f:
+            f.write(json.dumps(record, default=str, ensure_ascii=False) + "\n")
+
+    def _read_thread_events(self, thread_id: str) -> list[dict]:
+        """Read all events for a thread, sorted by seq."""
+        events = []
+        thread_dir = self._thread_dir(thread_id)
+        if not thread_dir.exists():
+            return events
+        for f in sorted(thread_dir.glob("*.jsonl")):
+            for line in f.read_text(encoding="utf-8").strip().splitlines():
+                if not line:
+                    continue
+                try:
+                    events.append(json.loads(line))
+                except json.JSONDecodeError:
+                    logger.debug("Skipping malformed JSONL line in %s", f)
+                    continue
+        events.sort(key=lambda e: e.get("seq", 0))
+        return events
+
+    def _read_run_events(self, thread_id: str, run_id: str) -> list[dict]:
+        """Read events for a specific run file."""
+        path = self._run_file(thread_id, run_id)
+        if not path.exists():
+            return []
+        events = []
+        for line in path.read_text(encoding="utf-8").strip().splitlines():
+            if not line:
+                continue
+            try:
+                events.append(json.loads(line))
+            except json.JSONDecodeError:
+                logger.debug("Skipping malformed JSONL line in %s", path)
+                continue
+        events.sort(key=lambda e: e.get("seq", 0))
+        return events
+
+    async def put(self, *, thread_id, run_id, event_type, category, content="", metadata=None, created_at=None):
+        self._ensure_seq_loaded(thread_id)
+        seq = self._next_seq(thread_id)
+        record = {
+            "thread_id": thread_id,
+            "run_id": run_id,
+            "event_type": event_type,
+            "category": category,
+            "content": content,
+            "metadata": metadata or {},
+            "seq": seq,
+            "created_at": created_at or datetime.now(UTC).isoformat(),
+        }
+        self._write_record(record)
+        return record
+
+    async def put_batch(self, events):
+        if not events:
+            return []
+        results = []
+        for ev in events:
+            record = await self.put(**ev)
+            results.append(record)
+        return results
+
+    async def list_messages(self, thread_id, *, limit=50, before_seq=None, after_seq=None):
+        all_events = self._read_thread_events(thread_id)
+        messages = [e for e in all_events if e.get("category") == "message"]
+
+        if before_seq is not None:
+            messages = [e for e in messages if e["seq"] < before_seq]
+            return messages[-limit:]
+        elif after_seq is not None:
+            messages = [e for e in messages if e["seq"] > after_seq]
+            return messages[:limit]
+        else:
+            return messages[-limit:]
+
+    async def list_events(self, thread_id, run_id, *, event_types=None, limit=500):
+        events = self._read_run_events(thread_id, run_id)
+        if event_types is not None:
+            events = [e for e in events if e.get("event_type") in event_types]
+        return events[:limit]
+
+    async def list_messages_by_run(self, thread_id, run_id, *, limit=50, before_seq=None, after_seq=None):
+        events = self._read_run_events(thread_id, run_id)
+        filtered = [e for e in events if e.get("category") == "message"]
+        if before_seq is not None:
+            filtered = [e for e in filtered if e.get("seq", 0) < before_seq]
+        if after_seq is not None:
+            filtered = [e for e in filtered if e.get("seq", 0) > after_seq]
+        if after_seq is not None:
+            return filtered[:limit]
+        else:
+            return filtered[-limit:] if len(filtered) > limit else filtered
+
+    async def count_messages(self, thread_id):
+        all_events = self._read_thread_events(thread_id)
+        return sum(1 for e in all_events if e.get("category") == "message")
+
+    async def delete_by_thread(self, thread_id):
+        all_events = self._read_thread_events(thread_id)
+        count = len(all_events)
+        thread_dir = self._thread_dir(thread_id)
+        if thread_dir.exists():
+            for f in thread_dir.glob("*.jsonl"):
+                f.unlink()
+        self._seq_counters.pop(thread_id, None)
+        return count
+
+    async def delete_by_run(self, thread_id, run_id):
+        events = self._read_run_events(thread_id, run_id)
+        count = len(events)
+        path = self._run_file(thread_id, run_id)
+        if path.exists():
+            path.unlink()
+        return count
diff --git a/backend/packages/harness/deerflow/runtime/events/store/memory.py b/backend/packages/harness/deerflow/runtime/events/store/memory.py
new file mode 100644
index 000000000..cf70e1cdf
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/events/store/memory.py
@@ -0,0 +1,128 @@
+"""In-memory RunEventStore. Used when run_events.backend=memory (default) and in tests.
+
+Thread-safe for single-process async usage (no threading locks needed
+since all mutations happen within the same event loop).
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+
+from deerflow.runtime.events.store.base import RunEventStore
+
+
+class MemoryRunEventStore(RunEventStore):
+    def __init__(self) -> None:
+        self._events: dict[str, list[dict]] = {}  # thread_id -> sorted event list
+        self._seq_counters: dict[str, int] = {}  # thread_id -> last assigned seq
+
+    def _next_seq(self, thread_id: str) -> int:
+        current = self._seq_counters.get(thread_id, 0)
+        next_val = current + 1
+        self._seq_counters[thread_id] = next_val
+        return next_val
+
+    def _put_one(
+        self,
+        *,
+        thread_id: str,
+        run_id: str,
+        event_type: str,
+        category: str,
+        content: str | dict = "",
+        metadata: dict | None = None,
+        created_at: str | None = None,
+    ) -> dict:
+        seq = self._next_seq(thread_id)
+        record = {
+            "thread_id": thread_id,
+            "run_id": run_id,
+            "event_type": event_type,
+            "category": category,
+            "content": content,
+            "metadata": metadata or {},
+            "seq": seq,
+            "created_at": created_at or datetime.now(UTC).isoformat(),
+        }
+        self._events.setdefault(thread_id, []).append(record)
+        return record
+
+    async def put(
+        self,
+        *,
+        thread_id,
+        run_id,
+        event_type,
+        category,
+        content="",
+        metadata=None,
+        created_at=None,
+    ):
+        return self._put_one(
+            thread_id=thread_id,
+            run_id=run_id,
+            event_type=event_type,
+            category=category,
+            content=content,
+            metadata=metadata,
+            created_at=created_at,
+        )
+
+    async def put_batch(self, events):
+        results = []
+        for ev in events:
+            record = self._put_one(**ev)
+            results.append(record)
+        return results
+
+    async def list_messages(self, thread_id, *, limit=50, before_seq=None, after_seq=None):
+        all_events = self._events.get(thread_id, [])
+        messages = [e for e in all_events if e["category"] == "message"]
+
+        if before_seq is not None:
+            messages = [e for e in messages if e["seq"] < before_seq]
+            # Take the last `limit` records
+            return messages[-limit:]
+        elif after_seq is not None:
+            messages = [e for e in messages if e["seq"] > after_seq]
+            return messages[:limit]
+        else:
+            # Return the latest `limit` records, ascending
+            return messages[-limit:]
+
+    async def list_events(self, thread_id, run_id, *, event_types=None, limit=500):
+        all_events = self._events.get(thread_id, [])
+        filtered = [e for e in all_events if e["run_id"] == run_id]
+        if event_types is not None:
+            filtered = [e for e in filtered if e["event_type"] in event_types]
+        return filtered[:limit]
+
+    async def list_messages_by_run(self, thread_id, run_id, *, limit=50, before_seq=None, after_seq=None):
+        all_events = self._events.get(thread_id, [])
+        filtered = [e for e in all_events if e["run_id"] == run_id and e["category"] == "message"]
+        if before_seq is not None:
+            filtered = [e for e in filtered if e["seq"] < before_seq]
+        if after_seq is not None:
+            filtered = [e for e in filtered if e["seq"] > after_seq]
+        if after_seq is not None:
+            return filtered[:limit]
+        else:
+            return filtered[-limit:] if len(filtered) > limit else filtered
+
+    async def count_messages(self, thread_id):
+        all_events = self._events.get(thread_id, [])
+        return sum(1 for e in all_events if e["category"] == "message")
+
+    async def delete_by_thread(self, thread_id):
+        events = self._events.pop(thread_id, [])
+        self._seq_counters.pop(thread_id, None)
+        return len(events)
+
+    async def delete_by_run(self, thread_id, run_id):
+        all_events = self._events.get(thread_id, [])
+        if not all_events:
+            return 0
+        remaining = [e for e in all_events if e["run_id"] != run_id]
+        removed = len(all_events) - len(remaining)
+        self._events[thread_id] = remaining
+        return removed
diff --git a/backend/packages/harness/deerflow/runtime/journal.py b/backend/packages/harness/deerflow/runtime/journal.py
new file mode 100644
index 000000000..a70404e11
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/journal.py
@@ -0,0 +1,497 @@
+"""Run event capture via LangChain callbacks.
+
+RunJournal sits between LangChain's callback mechanism and the pluggable
+RunEventStore. It standardizes callback data into RunEvent records and
+handles token usage accumulation.
+
+Key design decisions:
+- on_llm_new_token is NOT implemented -- only complete messages via on_llm_end
+- on_chat_model_start captures structured prompts as llm_request (OpenAI format)
+- on_llm_end emits llm_response in OpenAI Chat Completions format
+- Token usage accumulated in memory, written to RunRow on run completion
+- Caller identification via tags injection (lead_agent / subagent:{name} / middleware:{name})
+"""
+
+from __future__ import annotations
+
+import asyncio
+import logging
+import time
+from datetime import UTC, datetime
+from typing import TYPE_CHECKING, Any
+from uuid import UUID
+
+from langchain_core.callbacks import BaseCallbackHandler
+
+if TYPE_CHECKING:
+    from deerflow.runtime.events.store.base import RunEventStore
+
+logger = logging.getLogger(__name__)
+
+
+class RunJournal(BaseCallbackHandler):
+    """LangChain callback handler that captures events to RunEventStore."""
+
+    def __init__(
+        self,
+        run_id: str,
+        thread_id: str,
+        event_store: RunEventStore,
+        *,
+        track_token_usage: bool = True,
+        flush_threshold: int = 20,
+    ):
+        super().__init__()
+        self.run_id = run_id
+        self.thread_id = thread_id
+        self._store = event_store
+        self._track_tokens = track_token_usage
+        self._flush_threshold = flush_threshold
+
+        # Write buffer
+        self._buffer: list[dict] = []
+        self._pending_flush_tasks: set[asyncio.Task[None]] = set()
+
+        # Token accumulators
+        self._total_input_tokens = 0
+        self._total_output_tokens = 0
+        self._total_tokens = 0
+        self._llm_call_count = 0
+        self._lead_agent_tokens = 0
+        self._subagent_tokens = 0
+        self._middleware_tokens = 0
+
+        # Convenience fields
+        self._last_ai_msg: str | None = None
+        self._first_human_msg: str | None = None
+        self._msg_count = 0
+
+        # Latency tracking
+        self._llm_start_times: dict[str, float] = {}  # langchain run_id -> start time
+
+        # LLM request/response tracking
+        self._llm_call_index = 0
+        self._cached_prompts: dict[str, list[dict]] = {}  # langchain run_id -> OpenAI messages
+        self._cached_models: dict[str, str] = {}  # langchain run_id -> model name
+
+        # Tool call ID cache
+        self._tool_call_ids: dict[str, str] = {}  # langchain run_id -> tool_call_id
+
+    # -- Lifecycle callbacks --
+
+    def on_chain_start(self, serialized: dict, inputs: Any, *, run_id: UUID, **kwargs: Any) -> None:
+        if kwargs.get("parent_run_id") is not None:
+            return
+        self._put(
+            event_type="run_start",
+            category="lifecycle",
+            metadata={"input_preview": str(inputs)[:500]},
+        )
+
+    def on_chain_end(self, outputs: Any, *, run_id: UUID, **kwargs: Any) -> None:
+        if kwargs.get("parent_run_id") is not None:
+            return
+        self._put(event_type="run_end", category="lifecycle", metadata={"status": "success"})
+        self._flush_sync()
+
+    def on_chain_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
+        if kwargs.get("parent_run_id") is not None:
+            return
+        self._put(
+            event_type="run_error",
+            category="lifecycle",
+            content=str(error),
+            metadata={"error_type": type(error).__name__},
+        )
+        self._flush_sync()
+
+    # -- LLM callbacks --
+
+    def on_chat_model_start(self, serialized: dict, messages: list[list], *, run_id: UUID, **kwargs: Any) -> None:
+        """Capture structured prompt messages for llm_request event."""
+        from deerflow.runtime.converters import langchain_messages_to_openai
+
+        rid = str(run_id)
+        self._llm_start_times[rid] = time.monotonic()
+        self._llm_call_index += 1
+
+        model_name = serialized.get("name", "")
+        self._cached_models[rid] = model_name
+
+        # Convert the first message list (LangChain passes list-of-lists)
+        prompt_msgs = messages[0] if messages else []
+        openai_msgs = langchain_messages_to_openai(prompt_msgs)
+        self._cached_prompts[rid] = openai_msgs
+
+        caller = self._identify_caller(kwargs)
+        self._put(
+            event_type="llm_request",
+            category="trace",
+            content={"model": model_name, "messages": openai_msgs},
+            metadata={"caller": caller, "llm_call_index": self._llm_call_index},
+        )
+
+    def on_llm_start(self, serialized: dict, prompts: list[str], *, run_id: UUID, **kwargs: Any) -> None:
+        # Fallback: on_chat_model_start is preferred. This just tracks latency.
+        self._llm_start_times[str(run_id)] = time.monotonic()
+
+    def on_llm_end(self, response: Any, *, run_id: UUID, **kwargs: Any) -> None:
+        from deerflow.runtime.converters import langchain_to_openai_completion
+
+        try:
+            message = response.generations[0][0].message
+        except (IndexError, AttributeError):
+            logger.debug("on_llm_end: could not extract message from response")
+            return
+
+        caller = self._identify_caller(kwargs)
+
+        # Latency
+        rid = str(run_id)
+        start = self._llm_start_times.pop(rid, None)
+        latency_ms = int((time.monotonic() - start) * 1000) if start else None
+
+        # Token usage from message
+        usage = getattr(message, "usage_metadata", None)
+        usage_dict = dict(usage) if usage else {}
+
+        # Resolve call index
+        call_index = self._llm_call_index
+        if rid not in self._cached_prompts:
+            # Fallback: on_chat_model_start was not called
+            self._llm_call_index += 1
+            call_index = self._llm_call_index
+
+        # Clean up caches
+        self._cached_prompts.pop(rid, None)
+        self._cached_models.pop(rid, None)
+
+        # Trace event: llm_response (OpenAI completion format)
+        content = getattr(message, "content", "")
+        self._put(
+            event_type="llm_response",
+            category="trace",
+            content=langchain_to_openai_completion(message),
+            metadata={
+                "caller": caller,
+                "usage": usage_dict,
+                "latency_ms": latency_ms,
+                "llm_call_index": call_index,
+            },
+        )
+
+        # Message events: only lead_agent gets message-category events.
+        # Content uses message.model_dump() to align with checkpoint format.
+        tool_calls = getattr(message, "tool_calls", None) or []
+        if caller == "lead_agent":
+            resp_meta = getattr(message, "response_metadata", None) or {}
+            model_name = resp_meta.get("model_name") if isinstance(resp_meta, dict) else None
+            if tool_calls:
+                # ai_tool_call: agent decided to use tools
+                self._put(
+                    event_type="ai_tool_call",
+                    category="message",
+                    content=message.model_dump(),
+                    metadata={"model_name": model_name, "finish_reason": "tool_calls"},
+                )
+            elif isinstance(content, str) and content:
+                # ai_message: final text reply
+                self._put(
+                    event_type="ai_message",
+                    category="message",
+                    content=message.model_dump(),
+                    metadata={"model_name": model_name, "finish_reason": "stop"},
+                )
+                self._last_ai_msg = content
+                self._msg_count += 1
+
+        # Token accumulation
+        if self._track_tokens:
+            input_tk = usage_dict.get("input_tokens", 0) or 0
+            output_tk = usage_dict.get("output_tokens", 0) or 0
+            total_tk = usage_dict.get("total_tokens", 0) or 0
+            if total_tk == 0:
+                total_tk = input_tk + output_tk
+            if total_tk > 0:
+                self._total_input_tokens += input_tk
+                self._total_output_tokens += output_tk
+                self._total_tokens += total_tk
+                self._llm_call_count += 1
+                if caller.startswith("subagent:"):
+                    self._subagent_tokens += total_tk
+                elif caller.startswith("middleware:"):
+                    self._middleware_tokens += total_tk
+                else:
+                    self._lead_agent_tokens += total_tk
+
+    def on_llm_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
+        self._llm_start_times.pop(str(run_id), None)
+        self._put(event_type="llm_error", category="trace", content=str(error))
+
+    # -- Tool callbacks --
+
+    def on_tool_start(self, serialized: dict, input_str: str, *, run_id: UUID, **kwargs: Any) -> None:
+        tool_call_id = kwargs.get("tool_call_id")
+        if tool_call_id:
+            self._tool_call_ids[str(run_id)] = tool_call_id
+        self._put(
+            event_type="tool_start",
+            category="trace",
+            metadata={
+                "tool_name": serialized.get("name", ""),
+                "tool_call_id": tool_call_id,
+                "args": str(input_str)[:2000],
+            },
+        )
+
+    def on_tool_end(self, output: Any, *, run_id: UUID, **kwargs: Any) -> None:
+        from langchain_core.messages import ToolMessage
+        from langgraph.types import Command
+
+        # Tools that update graph state return a ``Command`` (e.g.
+        # ``present_files``). LangGraph later unwraps the inner ToolMessage
+        # into checkpoint state, so to stay checkpoint-aligned we must
+        # extract it here rather than storing ``str(Command(...))``.
+        if isinstance(output, Command):
+            update = getattr(output, "update", None) or {}
+            inner_msgs = update.get("messages") if isinstance(update, dict) else None
+            if isinstance(inner_msgs, list):
+                inner_tool_msg = next((m for m in inner_msgs if isinstance(m, ToolMessage)), None)
+                if inner_tool_msg is not None:
+                    output = inner_tool_msg
+
+        # Extract fields from ToolMessage object when LangChain provides one.
+        # LangChain's _format_output wraps tool results into a ToolMessage
+        # with tool_call_id, name, status, and artifact — more complete than
+        # what kwargs alone provides.
+        if isinstance(output, ToolMessage):
+            tool_call_id = output.tool_call_id or kwargs.get("tool_call_id") or self._tool_call_ids.pop(str(run_id), None)
+            tool_name = output.name or kwargs.get("name", "")
+            status = getattr(output, "status", "success") or "success"
+            content_str = output.content if isinstance(output.content, str) else str(output.content)
+            # Use model_dump() for checkpoint-aligned message content.
+            # Override tool_call_id if it was resolved from cache.
+            msg_content = output.model_dump()
+            if msg_content.get("tool_call_id") != tool_call_id:
+                msg_content["tool_call_id"] = tool_call_id
+        else:
+            tool_call_id = kwargs.get("tool_call_id") or self._tool_call_ids.pop(str(run_id), None)
+            tool_name = kwargs.get("name", "")
+            status = "success"
+            content_str = str(output)
+            # Construct checkpoint-aligned dict when output is a plain string.
+            msg_content = ToolMessage(
+                content=content_str,
+                tool_call_id=tool_call_id or "",
+                name=tool_name,
+                status=status,
+            ).model_dump()
+
+        # Trace event (always)
+        self._put(
+            event_type="tool_end",
+            category="trace",
+            content=content_str,
+            metadata={
+                "tool_name": tool_name,
+                "tool_call_id": tool_call_id,
+                "status": status,
+            },
+        )
+
+        # Message event: tool_result (checkpoint-aligned model_dump format)
+        self._put(
+            event_type="tool_result",
+            category="message",
+            content=msg_content,
+            metadata={"tool_name": tool_name, "status": status},
+        )
+
+    def on_tool_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None:
+        from langchain_core.messages import ToolMessage
+
+        tool_call_id = kwargs.get("tool_call_id") or self._tool_call_ids.pop(str(run_id), None)
+        tool_name = kwargs.get("name", "")
+
+        # Trace event
+        self._put(
+            event_type="tool_error",
+            category="trace",
+            content=str(error),
+            metadata={
+                "tool_name": tool_name,
+                "tool_call_id": tool_call_id,
+            },
+        )
+
+        # Message event: tool_result with error status (checkpoint-aligned)
+        msg_content = ToolMessage(
+            content=str(error),
+            tool_call_id=tool_call_id or "",
+            name=tool_name,
+            status="error",
+        ).model_dump()
+        self._put(
+            event_type="tool_result",
+            category="message",
+            content=msg_content,
+            metadata={"tool_name": tool_name, "status": "error"},
+        )
+
+    # -- Custom event callback --
+
+    def on_custom_event(self, name: str, data: Any, *, run_id: UUID, **kwargs: Any) -> None:
+        from deerflow.runtime.serialization import serialize_lc_object
+
+        if name == "summarization":
+            data_dict = data if isinstance(data, dict) else {}
+            self._put(
+                event_type="summarization",
+                category="trace",
+                content=data_dict.get("summary", ""),
+                metadata={
+                    "replaced_message_ids": data_dict.get("replaced_message_ids", []),
+                    "replaced_count": data_dict.get("replaced_count", 0),
+                },
+            )
+            self._put(
+                event_type="middleware:summarize",
+                category="middleware",
+                content={"role": "system", "content": data_dict.get("summary", "")},
+                metadata={"replaced_count": data_dict.get("replaced_count", 0)},
+            )
+        else:
+            event_data = serialize_lc_object(data) if not isinstance(data, dict) else data
+            self._put(
+                event_type=name,
+                category="trace",
+                metadata=event_data if isinstance(event_data, dict) else {"data": event_data},
+            )
+
+    # -- Internal methods --
+
+    def _put(self, *, event_type: str, category: str, content: str | dict = "", metadata: dict | None = None) -> None:
+        self._buffer.append(
+            {
+                "thread_id": self.thread_id,
+                "run_id": self.run_id,
+                "event_type": event_type,
+                "category": category,
+                "content": content,
+                "metadata": metadata or {},
+                "created_at": datetime.now(UTC).isoformat(),
+            }
+        )
+        if len(self._buffer) >= self._flush_threshold:
+            self._flush_sync()
+
+    def _flush_sync(self) -> None:
+        """Best-effort flush of buffer to RunEventStore.
+
+        BaseCallbackHandler methods are synchronous.  If an event loop is
+        running we schedule an async ``put_batch``; otherwise the events
+        stay in the buffer and are flushed later by the async ``flush()``
+        call in the worker's ``finally`` block.
+        """
+        if not self._buffer:
+            return
+        # Skip if a flush is already in flight — avoids concurrent writes
+        # to the same SQLite file from multiple fire-and-forget tasks.
+        if self._pending_flush_tasks:
+            return
+        try:
+            loop = asyncio.get_running_loop()
+        except RuntimeError:
+            # No event loop — keep events in buffer for later async flush.
+            return
+        batch = self._buffer.copy()
+        self._buffer.clear()
+        task = loop.create_task(self._flush_async(batch))
+        self._pending_flush_tasks.add(task)
+        task.add_done_callback(self._on_flush_done)
+
+    async def _flush_async(self, batch: list[dict]) -> None:
+        try:
+            await self._store.put_batch(batch)
+        except Exception:
+            logger.warning(
+                "Failed to flush %d events for run %s — returning to buffer",
+                len(batch),
+                self.run_id,
+                exc_info=True,
+            )
+            # Return failed events to buffer for retry on next flush
+            self._buffer = batch + self._buffer
+
+    def _on_flush_done(self, task: asyncio.Task) -> None:
+        self._pending_flush_tasks.discard(task)
+        if task.cancelled():
+            return
+        exc = task.exception()
+        if exc:
+            logger.warning("Journal flush task failed: %s", exc)
+
+    def _identify_caller(self, kwargs: dict) -> str:
+        for tag in kwargs.get("tags") or []:
+            if isinstance(tag, str) and (tag.startswith("subagent:") or tag.startswith("middleware:") or tag == "lead_agent"):
+                return tag
+        # Default to lead_agent: the main agent graph does not inject
+        # callback tags, while subagents and middleware explicitly tag
+        # themselves.
+        return "lead_agent"
+
+    # -- Public methods (called by worker) --
+
+    def set_first_human_message(self, content: str) -> None:
+        """Record the first human message for convenience fields."""
+        self._first_human_msg = content[:2000] if content else None
+
+    def record_middleware(self, tag: str, *, name: str, hook: str, action: str, changes: dict) -> None:
+        """Record a middleware state-change event.
+
+        Called by middleware implementations when they perform a meaningful
+        state change (e.g., title generation, summarization, HITL approval).
+        Pure-observation middleware should not call this.
+
+        Args:
+            tag: Short identifier for the middleware (e.g., "title", "summarize",
+                 "guardrail"). Used to form event_type="middleware:{tag}".
+            name: Full middleware class name.
+            hook: Lifecycle hook that triggered the action (e.g., "after_model").
+            action: Specific action performed (e.g., "generate_title").
+            changes: Dict describing the state changes made.
+        """
+        self._put(
+            event_type=f"middleware:{tag}",
+            category="middleware",
+            content={"name": name, "hook": hook, "action": action, "changes": changes},
+        )
+
+    async def flush(self) -> None:
+        """Force flush remaining buffer. Called in worker's finally block."""
+        if self._pending_flush_tasks:
+            await asyncio.gather(*tuple(self._pending_flush_tasks), return_exceptions=True)
+
+        while self._buffer:
+            batch = self._buffer[: self._flush_threshold]
+            del self._buffer[: self._flush_threshold]
+            try:
+                await self._store.put_batch(batch)
+            except Exception:
+                self._buffer = batch + self._buffer
+                raise
+
+    def get_completion_data(self) -> dict:
+        """Return accumulated token and message data for run completion."""
+        return {
+            "total_input_tokens": self._total_input_tokens,
+            "total_output_tokens": self._total_output_tokens,
+            "total_tokens": self._total_tokens,
+            "llm_call_count": self._llm_call_count,
+            "lead_agent_tokens": self._lead_agent_tokens,
+            "subagent_tokens": self._subagent_tokens,
+            "middleware_tokens": self._middleware_tokens,
+            "message_count": self._msg_count,
+            "last_ai_message": self._last_ai_msg,
+            "first_human_message": self._first_human_msg,
+        }
diff --git a/backend/packages/harness/deerflow/runtime/runs/__init__.py b/backend/packages/harness/deerflow/runtime/runs/__init__.py
index afed90f48..9faa30c17 100644
--- a/backend/packages/harness/deerflow/runtime/runs/__init__.py
+++ b/backend/packages/harness/deerflow/runtime/runs/__init__.py
@@ -2,11 +2,12 @@
 
 from .manager import ConflictError, RunManager, RunRecord, UnsupportedStrategyError
 from .schemas import DisconnectMode, RunStatus
-from .worker import run_agent
+from .worker import RunContext, run_agent
 
 __all__ = [
     "ConflictError",
     "DisconnectMode",
+    "RunContext",
     "RunManager",
     "RunRecord",
     "RunStatus",
diff --git a/backend/packages/harness/deerflow/runtime/runs/manager.py b/backend/packages/harness/deerflow/runtime/runs/manager.py
index e61a1707f..0a0794d87 100644
--- a/backend/packages/harness/deerflow/runtime/runs/manager.py
+++ b/backend/packages/harness/deerflow/runtime/runs/manager.py
@@ -1,4 +1,4 @@
-"""In-memory run registry."""
+"""In-memory run registry with optional persistent RunStore backing."""
 
 from __future__ import annotations
 
@@ -7,9 +7,13 @@ import logging
 import uuid
 from dataclasses import dataclass, field
 from datetime import UTC, datetime
+from typing import TYPE_CHECKING
 
 from .schemas import DisconnectMode, RunStatus
 
+if TYPE_CHECKING:
+    from deerflow.runtime.runs.store.base import RunStore
+
 logger = logging.getLogger(__name__)
 
 
@@ -38,11 +42,44 @@ class RunRecord:
 
 
 class RunManager:
-    """In-memory run registry.  All mutations are protected by an asyncio lock."""
+    """In-memory run registry with optional persistent RunStore backing.
 
-    def __init__(self) -> None:
+    All mutations are protected by an asyncio lock. When a ``store`` is
+    provided, serializable metadata is also persisted to the store so
+    that run history survives process restarts.
+    """
+
+    def __init__(self, store: RunStore | None = None) -> None:
         self._runs: dict[str, RunRecord] = {}
         self._lock = asyncio.Lock()
+        self._store = store
+
+    async def _persist_to_store(self, record: RunRecord, *, follow_up_to_run_id: str | None = None) -> None:
+        """Best-effort persist run record to backing store."""
+        if self._store is None:
+            return
+        try:
+            await self._store.put(
+                record.run_id,
+                thread_id=record.thread_id,
+                assistant_id=record.assistant_id,
+                status=record.status.value,
+                multitask_strategy=record.multitask_strategy,
+                metadata=record.metadata or {},
+                kwargs=record.kwargs or {},
+                created_at=record.created_at,
+                follow_up_to_run_id=follow_up_to_run_id,
+            )
+        except Exception:
+            logger.warning("Failed to persist run %s to store", record.run_id, exc_info=True)
+
+    async def update_run_completion(self, run_id: str, **kwargs) -> None:
+        """Persist token usage and completion data to the backing store."""
+        if self._store is not None:
+            try:
+                await self._store.update_run_completion(run_id, **kwargs)
+            except Exception:
+                logger.warning("Failed to persist run completion for %s", run_id, exc_info=True)
 
     async def create(
         self,
@@ -53,6 +90,7 @@ class RunManager:
         metadata: dict | None = None,
         kwargs: dict | None = None,
         multitask_strategy: str = "reject",
+        follow_up_to_run_id: str | None = None,
     ) -> RunRecord:
         """Create a new pending run and register it."""
         run_id = str(uuid.uuid4())
@@ -71,6 +109,7 @@ class RunManager:
         )
         async with self._lock:
             self._runs[run_id] = record
+        await self._persist_to_store(record, follow_up_to_run_id=follow_up_to_run_id)
         logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id)
         return record
 
@@ -96,6 +135,11 @@ class RunManager:
             record.updated_at = _now_iso()
             if error is not None:
                 record.error = error
+        if self._store is not None:
+            try:
+                await self._store.update_status(run_id, status.value, error=error)
+            except Exception:
+                logger.warning("Failed to persist status update for run %s", run_id, exc_info=True)
         logger.info("Run %s -> %s", run_id, status.value)
 
     async def cancel(self, run_id: str, *, action: str = "interrupt") -> bool:
@@ -132,6 +176,7 @@ class RunManager:
         metadata: dict | None = None,
         kwargs: dict | None = None,
         multitask_strategy: str = "reject",
+        follow_up_to_run_id: str | None = None,
     ) -> RunRecord:
         """Atomically check for inflight runs and create a new one.
 
@@ -185,6 +230,7 @@ class RunManager:
             )
             self._runs[run_id] = record
 
+        await self._persist_to_store(record, follow_up_to_run_id=follow_up_to_run_id)
         logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id)
         return record
 
diff --git a/backend/packages/harness/deerflow/runtime/runs/store/__init__.py b/backend/packages/harness/deerflow/runtime/runs/store/__init__.py
new file mode 100644
index 000000000..265a6fffb
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/runs/store/__init__.py
@@ -0,0 +1,4 @@
+from deerflow.runtime.runs.store.base import RunStore
+from deerflow.runtime.runs.store.memory import MemoryRunStore
+
+__all__ = ["MemoryRunStore", "RunStore"]
diff --git a/backend/packages/harness/deerflow/runtime/runs/store/base.py b/backend/packages/harness/deerflow/runtime/runs/store/base.py
new file mode 100644
index 000000000..3212e8ca3
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/runs/store/base.py
@@ -0,0 +1,96 @@
+"""Abstract interface for run metadata storage.
+
+RunManager depends on this interface. Implementations:
+- MemoryRunStore: in-memory dict (development, tests)
+- Future: RunRepository backed by SQLAlchemy ORM
+
+All methods accept an optional user_id for user isolation.
+When user_id is None, no user filtering is applied (single-user mode).
+"""
+
+from __future__ import annotations
+
+import abc
+from typing import Any
+
+
+class RunStore(abc.ABC):
+    @abc.abstractmethod
+    async def put(
+        self,
+        run_id: str,
+        *,
+        thread_id: str,
+        assistant_id: str | None = None,
+        user_id: str | None = None,
+        status: str = "pending",
+        multitask_strategy: str = "reject",
+        metadata: dict[str, Any] | None = None,
+        kwargs: dict[str, Any] | None = None,
+        error: str | None = None,
+        created_at: str | None = None,
+        follow_up_to_run_id: str | None = None,
+    ) -> None:
+        pass
+
+    @abc.abstractmethod
+    async def get(self, run_id: str) -> dict[str, Any] | None:
+        pass
+
+    @abc.abstractmethod
+    async def list_by_thread(
+        self,
+        thread_id: str,
+        *,
+        user_id: str | None = None,
+        limit: int = 100,
+    ) -> list[dict[str, Any]]:
+        pass
+
+    @abc.abstractmethod
+    async def update_status(
+        self,
+        run_id: str,
+        status: str,
+        *,
+        error: str | None = None,
+    ) -> None:
+        pass
+
+    @abc.abstractmethod
+    async def delete(self, run_id: str) -> None:
+        pass
+
+    @abc.abstractmethod
+    async def update_run_completion(
+        self,
+        run_id: str,
+        *,
+        status: str,
+        total_input_tokens: int = 0,
+        total_output_tokens: int = 0,
+        total_tokens: int = 0,
+        llm_call_count: int = 0,
+        lead_agent_tokens: int = 0,
+        subagent_tokens: int = 0,
+        middleware_tokens: int = 0,
+        message_count: int = 0,
+        last_ai_message: str | None = None,
+        first_human_message: str | None = None,
+        error: str | None = None,
+    ) -> None:
+        pass
+
+    @abc.abstractmethod
+    async def list_pending(self, *, before: str | None = None) -> list[dict[str, Any]]:
+        pass
+
+    @abc.abstractmethod
+    async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]:
+        """Aggregate token usage for completed runs in a thread.
+
+        Returns a dict with keys: total_tokens, total_input_tokens,
+        total_output_tokens, total_runs, by_model (model_name → {tokens, runs}),
+        by_caller ({lead_agent, subagent, middleware}).
+        """
+        pass
diff --git a/backend/packages/harness/deerflow/runtime/runs/store/memory.py b/backend/packages/harness/deerflow/runtime/runs/store/memory.py
new file mode 100644
index 000000000..0b2b05f07
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/runs/store/memory.py
@@ -0,0 +1,100 @@
+"""In-memory RunStore. Used when database.backend=memory (default) and in tests.
+
+Equivalent to the original RunManager._runs dict behavior.
+"""
+
+from __future__ import annotations
+
+from datetime import UTC, datetime
+from typing import Any
+
+from deerflow.runtime.runs.store.base import RunStore
+
+
+class MemoryRunStore(RunStore):
+    def __init__(self) -> None:
+        self._runs: dict[str, dict[str, Any]] = {}
+
+    async def put(
+        self,
+        run_id,
+        *,
+        thread_id,
+        assistant_id=None,
+        user_id=None,
+        status="pending",
+        multitask_strategy="reject",
+        metadata=None,
+        kwargs=None,
+        error=None,
+        created_at=None,
+        follow_up_to_run_id=None,
+    ):
+        now = datetime.now(UTC).isoformat()
+        self._runs[run_id] = {
+            "run_id": run_id,
+            "thread_id": thread_id,
+            "assistant_id": assistant_id,
+            "user_id": user_id,
+            "status": status,
+            "multitask_strategy": multitask_strategy,
+            "metadata": metadata or {},
+            "kwargs": kwargs or {},
+            "error": error,
+            "follow_up_to_run_id": follow_up_to_run_id,
+            "created_at": created_at or now,
+            "updated_at": now,
+        }
+
+    async def get(self, run_id):
+        return self._runs.get(run_id)
+
+    async def list_by_thread(self, thread_id, *, user_id=None, limit=100):
+        results = [r for r in self._runs.values() if r["thread_id"] == thread_id and (user_id is None or r.get("user_id") == user_id)]
+        results.sort(key=lambda r: r["created_at"], reverse=True)
+        return results[:limit]
+
+    async def update_status(self, run_id, status, *, error=None):
+        if run_id in self._runs:
+            self._runs[run_id]["status"] = status
+            if error is not None:
+                self._runs[run_id]["error"] = error
+            self._runs[run_id]["updated_at"] = datetime.now(UTC).isoformat()
+
+    async def delete(self, run_id):
+        self._runs.pop(run_id, None)
+
+    async def update_run_completion(self, run_id, *, status, **kwargs):
+        if run_id in self._runs:
+            self._runs[run_id]["status"] = status
+            for key, value in kwargs.items():
+                if value is not None:
+                    self._runs[run_id][key] = value
+            self._runs[run_id]["updated_at"] = datetime.now(UTC).isoformat()
+
+    async def list_pending(self, *, before=None):
+        now = before or datetime.now(UTC).isoformat()
+        results = [r for r in self._runs.values() if r["status"] == "pending" and r["created_at"] <= now]
+        results.sort(key=lambda r: r["created_at"])
+        return results
+
+    async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]:
+        completed = [r for r in self._runs.values() if r["thread_id"] == thread_id and r.get("status") in ("success", "error")]
+        by_model: dict[str, dict] = {}
+        for r in completed:
+            model = r.get("model_name") or "unknown"
+            entry = by_model.setdefault(model, {"tokens": 0, "runs": 0})
+            entry["tokens"] += r.get("total_tokens", 0)
+            entry["runs"] += 1
+        return {
+            "total_tokens": sum(r.get("total_tokens", 0) for r in completed),
+            "total_input_tokens": sum(r.get("total_input_tokens", 0) for r in completed),
+            "total_output_tokens": sum(r.get("total_output_tokens", 0) for r in completed),
+            "total_runs": len(completed),
+            "by_model": by_model,
+            "by_caller": {
+                "lead_agent": sum(r.get("lead_agent_tokens", 0) for r in completed),
+                "subagent": sum(r.get("subagent_tokens", 0) for r in completed),
+                "middleware": sum(r.get("middleware_tokens", 0) for r in completed),
+            },
+        }
diff --git a/backend/packages/harness/deerflow/runtime/runs/worker.py b/backend/packages/harness/deerflow/runtime/runs/worker.py
index c8b074f7a..a7d6c352e 100644
--- a/backend/packages/harness/deerflow/runtime/runs/worker.py
+++ b/backend/packages/harness/deerflow/runtime/runs/worker.py
@@ -19,8 +19,14 @@ import asyncio
 import copy
 import inspect
 import logging
-from typing import Any, Literal
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any, Literal
 
+if TYPE_CHECKING:
+    from langchain_core.messages import HumanMessage
+
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.runtime.serialization import serialize
 from deerflow.runtime.stream_bridge import StreamBridge
 
@@ -33,13 +39,30 @@ logger = logging.getLogger(__name__)
 _VALID_LG_MODES = {"values", "updates", "checkpoints", "tasks", "debug", "messages", "custom"}
 
 
+@dataclass(frozen=True)
+class RunContext:
+    """Infrastructure dependencies for a single agent run.
+
+    Groups checkpointer, store, and persistence-related singletons so that
+    ``run_agent`` (and any future callers) receive one object instead of a
+    growing list of keyword arguments.
+    """
+
+    checkpointer: Any
+    store: Any | None = field(default=None)
+    event_store: Any | None = field(default=None)
+    run_events_config: Any | None = field(default=None)
+    thread_store: Any | None = field(default=None)
+    follow_up_to_run_id: str | None = field(default=None)
+    app_config: AppConfig | None = field(default=None)
+
+
 async def run_agent(
     bridge: StreamBridge,
     run_manager: RunManager,
     record: RunRecord,
     *,
-    checkpointer: Any,
-    store: Any | None = None,
+    ctx: RunContext,
     agent_factory: Any,
     graph_input: dict,
     config: dict,
@@ -50,6 +73,14 @@ async def run_agent(
 ) -> None:
     """Execute an agent in the background, publishing events to *bridge*."""
 
+    # Unpack infrastructure dependencies from RunContext.
+    checkpointer = ctx.checkpointer
+    store = ctx.store
+    event_store = ctx.event_store
+    run_events_config = ctx.run_events_config
+    thread_store = ctx.thread_store
+    follow_up_to_run_id = ctx.follow_up_to_run_id
+
     run_id = record.run_id
     thread_id = record.thread_id
     requested_modes: set[str] = set(stream_modes or ["values"])
@@ -57,6 +88,10 @@ async def run_agent(
     pre_run_snapshot: dict[str, Any] | None = None
     snapshot_capture_failed = False
 
+    journal = None
+
+    journal = None
+
     # Track whether "events" was requested but skipped
     if "events" in requested_modes:
         logger.info(
@@ -65,6 +100,38 @@ async def run_agent(
         )
 
     try:
+        # Initialize RunJournal + write human_message event.
+        # These are inside the try block so any exception (e.g. a DB
+        # error writing the event) flows through the except/finally
+        # path that publishes an "end" event to the SSE bridge —
+        # otherwise a failure here would leave the stream hanging
+        # with no terminator.
+        if event_store is not None:
+            from deerflow.runtime.journal import RunJournal
+
+            journal = RunJournal(
+                run_id=run_id,
+                thread_id=thread_id,
+                event_store=event_store,
+                track_token_usage=getattr(run_events_config, "track_token_usage", True),
+            )
+
+            human_msg = _extract_human_message(graph_input)
+            if human_msg is not None:
+                msg_metadata = {}
+                if follow_up_to_run_id:
+                    msg_metadata["follow_up_to_run_id"] = follow_up_to_run_id
+                await event_store.put(
+                    thread_id=thread_id,
+                    run_id=run_id,
+                    event_type="human_message",
+                    category="message",
+                    content=human_msg.model_dump(),
+                    metadata=msg_metadata or None,
+                )
+                content = human_msg.content
+                journal.set_first_human_message(content if isinstance(content, str) else str(content))
+
         # 1. Mark running
         await run_manager.set_status(run_id, RunStatus.running)
 
@@ -98,17 +165,21 @@ async def run_agent(
 
         # 3. Build the agent
         from langchain_core.runnables import RunnableConfig
-        from langgraph.runtime import Runtime
 
-        # Inject runtime context so middlewares can access thread_id
-        # (langgraph-cli does this automatically; we must do it manually)
-        runtime = Runtime(context={"thread_id": thread_id}, store=store)
-        # If the caller already set a ``context`` key (LangGraph >= 0.6.0
-        # prefers it over ``configurable`` for thread-level data), make
-        # sure ``thread_id`` is available there too.
-        if "context" in config and isinstance(config["context"], dict):
-            config["context"].setdefault("thread_id", thread_id)
-        config.setdefault("configurable", {})["__pregel_runtime"] = runtime
+        # Construct typed context for the agent run.
+        # LangGraph's astream(context=...) injects this into Runtime.context
+        # so middleware/tools can access it via resolve_context().
+        if ctx.app_config is None:
+            raise RuntimeError("RunContext.app_config is required — Gateway must populate it via get_run_context")
+        deer_flow_context = DeerFlowContext(
+            app_config=ctx.app_config,
+            thread_id=thread_id,
+        )
+
+        # Inject RunJournal as a LangChain callback handler.
+        # on_llm_end captures token usage; on_chain_start/end captures lifecycle.
+        if journal is not None:
+            config.setdefault("callbacks", []).append(journal)
 
         runnable_config = RunnableConfig(**config)
         agent = agent_factory(config=runnable_config)
@@ -155,7 +226,7 @@ async def run_agent(
         if len(lg_modes) == 1 and not stream_subgraphs:
             # Single mode, no subgraphs: astream yields raw chunks
             single_mode = lg_modes[0]
-            async for chunk in agent.astream(graph_input, config=runnable_config, stream_mode=single_mode):
+            async for chunk in agent.astream(graph_input, config=runnable_config, context=deer_flow_context, stream_mode=single_mode):
                 if record.abort_event.is_set():
                     logger.info("Run %s abort requested — stopping", run_id)
                     break
@@ -166,6 +237,7 @@ async def run_agent(
             async for item in agent.astream(
                 graph_input,
                 config=runnable_config,
+                context=deer_flow_context,
                 stream_mode=lg_modes,
                 subgraphs=stream_subgraphs,
             ):
@@ -236,6 +308,41 @@ async def run_agent(
         )
 
     finally:
+        # Flush any buffered journal events and persist completion data
+        if journal is not None:
+            try:
+                await journal.flush()
+            except Exception:
+                logger.warning("Failed to flush journal for run %s", run_id, exc_info=True)
+
+            try:
+                # Persist token usage + convenience fields to RunStore
+                completion = journal.get_completion_data()
+                await run_manager.update_run_completion(run_id, status=record.status.value, **completion)
+            except Exception:
+                logger.warning("Failed to persist run completion for %s (non-fatal)", run_id, exc_info=True)
+
+        # Sync title from checkpoint to threads_meta.display_name
+        if checkpointer is not None and thread_store is not None:
+            try:
+                ckpt_config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}}
+                ckpt_tuple = await checkpointer.aget_tuple(ckpt_config)
+                if ckpt_tuple is not None:
+                    ckpt = getattr(ckpt_tuple, "checkpoint", {}) or {}
+                    title = ckpt.get("channel_values", {}).get("title")
+                    if title:
+                        await thread_store.update_display_name(thread_id, title)
+            except Exception:
+                logger.debug("Failed to sync title for thread %s (non-fatal)", thread_id)
+
+        # Update threads_meta status based on run outcome
+        if thread_store is not None:
+            try:
+                final_status = "idle" if record.status == RunStatus.success else record.status.value
+                await thread_store.update_status(thread_id, final_status)
+            except Exception:
+                logger.debug("Failed to update thread_meta status for %s (non-fatal)", thread_id)
+
         await bridge.publish_end(run_id)
         asyncio.create_task(bridge.cleanup(run_id, delay=60))
 
@@ -355,6 +462,31 @@ def _lg_mode_to_sse_event(mode: str) -> str:
     return mode
 
 
+def _extract_human_message(graph_input: dict) -> HumanMessage | None:
+    """Extract or construct a HumanMessage from graph_input for event recording.
+
+    Returns a LangChain HumanMessage so callers can use .model_dump() to get
+    the checkpoint-aligned serialization format.
+    """
+    from langchain_core.messages import HumanMessage
+
+    messages = graph_input.get("messages")
+    if not messages:
+        return None
+    last = messages[-1] if isinstance(messages, list) else messages
+    if isinstance(last, HumanMessage):
+        return last
+    if isinstance(last, str):
+        return HumanMessage(content=last) if last else None
+    if hasattr(last, "content"):
+        content = last.content
+        return HumanMessage(content=content)
+    if isinstance(last, dict):
+        content = last.get("content", "")
+        return HumanMessage(content=content) if content else None
+    return None
+
+
 def _unpack_stream_item(
     item: Any,
     lg_modes: list[str],
diff --git a/backend/packages/harness/deerflow/runtime/store/async_provider.py b/backend/packages/harness/deerflow/runtime/store/async_provider.py
index bc7a60559..d7c4a4ae5 100644
--- a/backend/packages/harness/deerflow/runtime/store/async_provider.py
+++ b/backend/packages/harness/deerflow/runtime/store/async_provider.py
@@ -23,7 +23,7 @@ from collections.abc import AsyncIterator
 
 from langgraph.store.base import BaseStore
 
-from deerflow.config.app_config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.runtime.store.provider import POSTGRES_CONN_REQUIRED, POSTGRES_STORE_INSTALL, SQLITE_STORE_INSTALL, ensure_sqlite_parent_dir, resolve_sqlite_conn_str
 
 logger = logging.getLogger(__name__)
@@ -86,28 +86,26 @@ async def _async_store(config) -> AsyncIterator[BaseStore]:
 
 
 @contextlib.asynccontextmanager
-async def make_store() -> AsyncIterator[BaseStore]:
+async def make_store(app_config: AppConfig) -> AsyncIterator[BaseStore]:
     """Async context manager that yields a Store whose backend matches the
     configured checkpointer.
 
     Reads from the same ``checkpointer`` section of *config.yaml* used by
-    :func:`deerflow.agents.checkpointer.async_provider.make_checkpointer` so
+    :func:`deerflow.runtime.checkpointer.async_provider.make_checkpointer` so
     that both singletons always use the same persistence technology::
 
-        async with make_store() as store:
+        async with make_store(app_config) as store:
             app.state.store = store
 
     Yields an :class:`~langgraph.store.memory.InMemoryStore` when no
     ``checkpointer`` section is configured (emits a WARNING in that case).
     """
-    config = get_app_config()
-
-    if config.checkpointer is None:
+    if app_config.checkpointer is None:
         from langgraph.store.memory import InMemoryStore
 
         logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
         yield InMemoryStore()
         return
 
-    async with _async_store(config.checkpointer) as store:
+    async with _async_store(app_config.checkpointer) as store:
         yield store
diff --git a/backend/packages/harness/deerflow/runtime/store/provider.py b/backend/packages/harness/deerflow/runtime/store/provider.py
index a9394fb9f..b441d5fcf 100644
--- a/backend/packages/harness/deerflow/runtime/store/provider.py
+++ b/backend/packages/harness/deerflow/runtime/store/provider.py
@@ -26,7 +26,7 @@ from collections.abc import Iterator
 
 from langgraph.store.base import BaseStore
 
-from deerflow.config.app_config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str
 
 logger = logging.getLogger(__name__)
@@ -100,7 +100,7 @@ _store: BaseStore | None = None
 _store_ctx = None  # open context manager keeping the connection alive
 
 
-def get_store() -> BaseStore:
+def get_store(app_config: AppConfig) -> BaseStore:
     """Return the global sync Store singleton, creating it on first call.
 
     Returns an :class:`~langgraph.store.memory.InMemoryStore` when no
@@ -115,19 +115,10 @@ def get_store() -> BaseStore:
     if _store is not None:
         return _store
 
-    # Lazily load app config, mirroring the checkpointer singleton pattern so
-    # that tests that set the global checkpointer config explicitly remain isolated.
-    from deerflow.config.app_config import _app_config
-    from deerflow.config.checkpointer_config import get_checkpointer_config
-
-    config = get_checkpointer_config()
-
-    if config is None and _app_config is None:
-        try:
-            get_app_config()
-        except FileNotFoundError:
-            pass
-        config = get_checkpointer_config()
+    # See matching comment in checkpointer/provider.py: a missing config.yaml
+    # is a deployment error, not a cue to silently pick InMemoryStore. Only
+    # the explicit "no checkpointer section" path falls through to memory.
+    config = app_config.checkpointer
 
     if config is None:
         from langgraph.store.memory import InMemoryStore
@@ -163,26 +154,25 @@ def reset_store() -> None:
 
 
 @contextlib.contextmanager
-def store_context() -> Iterator[BaseStore]:
+def store_context(app_config: AppConfig) -> Iterator[BaseStore]:
     """Sync context manager that yields a Store and cleans up on exit.
 
     Unlike :func:`get_store`, this does **not** cache the instance — each
     ``with`` block creates and destroys its own connection.  Use it in CLI
     scripts or tests where you want deterministic cleanup::
 
-        with store_context() as store:
+        with store_context(app_config) as store:
             store.put(("threads",), thread_id, {...})
 
     Yields an :class:`~langgraph.store.memory.InMemoryStore` when no
     checkpointer is configured in *config.yaml*.
     """
-    config = get_app_config()
-    if config.checkpointer is None:
+    if app_config.checkpointer is None:
         from langgraph.store.memory import InMemoryStore
 
         logger.warning("No 'checkpointer' section in config.yaml — using InMemoryStore for the store. Thread list will be lost on server restart. Configure a sqlite or postgres backend for persistence.")
         yield InMemoryStore()
         return
 
-    with _sync_store_cm(config.checkpointer) as store:
+    with _sync_store_cm(app_config.checkpointer) as store:
         yield store
diff --git a/backend/packages/harness/deerflow/runtime/stream_bridge/async_provider.py b/backend/packages/harness/deerflow/runtime/stream_bridge/async_provider.py
index 891f79fa0..a1297e3bb 100644
--- a/backend/packages/harness/deerflow/runtime/stream_bridge/async_provider.py
+++ b/backend/packages/harness/deerflow/runtime/stream_bridge/async_provider.py
@@ -1,7 +1,7 @@
 """Async stream bridge factory.
 
 Provides an **async context manager** aligned with
-:func:`deerflow.agents.checkpointer.async_provider.make_checkpointer`.
+:func:`deerflow.runtime.checkpointer.async_provider.make_checkpointer`.
 
 Usage (e.g. FastAPI lifespan)::
 
@@ -17,7 +17,7 @@ import contextlib
 import logging
 from collections.abc import AsyncIterator
 
-from deerflow.config.stream_bridge_config import get_stream_bridge_config
+from deerflow.config.app_config import AppConfig
 
 from .base import StreamBridge
 
@@ -25,14 +25,13 @@ logger = logging.getLogger(__name__)
 
 
 @contextlib.asynccontextmanager
-async def make_stream_bridge(config=None) -> AsyncIterator[StreamBridge]:
+async def make_stream_bridge(app_config: AppConfig) -> AsyncIterator[StreamBridge]:
     """Async context manager that yields a :class:`StreamBridge`.
 
-    Falls back to :class:`MemoryStreamBridge` when no configuration is
-    provided and nothing is set globally.
+    Falls back to :class:`MemoryStreamBridge` when no ``stream_bridge``
+    section is configured.
     """
-    if config is None:
-        config = get_stream_bridge_config()
+    config = app_config.stream_bridge
 
     if config is None or config.type == "memory":
         from deerflow.runtime.stream_bridge.memory import MemoryStreamBridge
diff --git a/backend/packages/harness/deerflow/runtime/user_context.py b/backend/packages/harness/deerflow/runtime/user_context.py
new file mode 100644
index 000000000..ffe4be690
--- /dev/null
+++ b/backend/packages/harness/deerflow/runtime/user_context.py
@@ -0,0 +1,167 @@
+"""Request-scoped user context for user-based authorization.
+
+This module holds a :class:`~contextvars.ContextVar` that the gateway's
+auth middleware sets after a successful authentication. Repository
+methods read the contextvar via a sentinel default parameter, letting
+routers stay free of ``user_id`` boilerplate.
+
+Three-state semantics for the repository ``user_id`` parameter (the
+consumer side of this module lives in ``deerflow.persistence.*``):
+
+- ``_AUTO`` (module-private sentinel, default): read from contextvar;
+  raise :class:`RuntimeError` if unset.
+- Explicit ``str``: use the provided value, overriding contextvar.
+- Explicit ``None``: no WHERE clause — used only by migration scripts
+  and admin CLIs that intentionally bypass isolation.
+
+Dependency direction
+--------------------
+``persistence`` (lower layer) reads from this module; ``gateway.auth``
+(higher layer) writes to it. ``CurrentUser`` is defined here as a
+:class:`typing.Protocol` so that ``persistence`` never needs to import
+the concrete ``User`` class from ``gateway.auth.models``. Any object
+with an ``.id: str`` attribute structurally satisfies the protocol.
+
+Asyncio semantics
+-----------------
+``ContextVar`` is task-local under asyncio, not thread-local. Each
+FastAPI request runs in its own task, so the context is naturally
+isolated. ``asyncio.create_task`` and ``asyncio.to_thread`` inherit the
+parent task's context, which is typically the intended behaviour; if
+a background task must *not* see the foreground user, wrap it with
+``contextvars.copy_context()`` to get a clean copy.
+"""
+
+from __future__ import annotations
+
+from contextvars import ContextVar, Token
+from typing import Final, Protocol, runtime_checkable
+
+
+@runtime_checkable
+class CurrentUser(Protocol):
+    """Structural type for the current authenticated user.
+
+    Any object with an ``.id: str`` attribute satisfies this protocol.
+    Concrete implementations live in ``app.gateway.auth.models.User``.
+    """
+
+    id: str
+
+
+_current_user: Final[ContextVar[CurrentUser | None]] = ContextVar("deerflow_current_user", default=None)
+
+
+def set_current_user(user: CurrentUser) -> Token[CurrentUser | None]:
+    """Set the current user for this async task.
+
+    Returns a reset token that should be passed to
+    :func:`reset_current_user` in a ``finally`` block to restore the
+    previous context.
+    """
+    return _current_user.set(user)
+
+
+def reset_current_user(token: Token[CurrentUser | None]) -> None:
+    """Restore the context to the state captured by ``token``."""
+    _current_user.reset(token)
+
+
+def get_current_user() -> CurrentUser | None:
+    """Return the current user, or ``None`` if unset.
+
+    Safe to call in any context. Used by code paths that can proceed
+    without a user (e.g. migration scripts, public endpoints).
+    """
+    return _current_user.get()
+
+
+def require_current_user() -> CurrentUser:
+    """Return the current user, or raise :class:`RuntimeError`.
+
+    Used by repository code that must not be called outside a
+    request-authenticated context. The error message is phrased so
+    that a caller debugging a stack trace can locate the offending
+    code path.
+    """
+    user = _current_user.get()
+    if user is None:
+        raise RuntimeError("repository accessed without user context")
+    return user
+
+
+# ---------------------------------------------------------------------------
+# Effective user_id helpers (filesystem isolation)
+# ---------------------------------------------------------------------------
+
+DEFAULT_USER_ID: Final[str] = "default"
+
+
+def get_effective_user_id() -> str:
+    """Return the current user's id as a string, or DEFAULT_USER_ID if unset.
+
+    Unlike :func:`require_current_user` this never raises — it is designed
+    for filesystem-path resolution where a valid user bucket is always needed.
+    """
+    user = _current_user.get()
+    if user is None:
+        return DEFAULT_USER_ID
+    return str(user.id)
+
+
+# ---------------------------------------------------------------------------
+# Sentinel-based user_id resolution
+# ---------------------------------------------------------------------------
+#
+# Repository methods accept a ``user_id`` keyword-only argument that
+# defaults to ``AUTO``. The three possible values drive distinct
+# behaviours; see the docstring on :func:`resolve_user_id`.
+
+
+class _AutoSentinel:
+    """Singleton marker meaning 'resolve user_id from contextvar'."""
+
+    _instance: _AutoSentinel | None = None
+
+    def __new__(cls) -> _AutoSentinel:
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    def __repr__(self) -> str:
+        return "<AUTO>"
+
+
+AUTO: Final[_AutoSentinel] = _AutoSentinel()
+
+
+def resolve_user_id(
+    value: str | None | _AutoSentinel,
+    *,
+    method_name: str = "repository method",
+) -> str | None:
+    """Resolve the user_id parameter passed to a repository method.
+
+    Three-state semantics:
+
+    - :data:`AUTO` (default): read from contextvar; raise
+      :class:`RuntimeError` if no user is in context. This is the
+      common case for request-scoped calls.
+    - Explicit ``str``: use the provided id verbatim, overriding any
+      contextvar value. Useful for tests and admin-override flows.
+    - Explicit ``None``: no filter — the repository should skip the
+      user_id WHERE clause entirely. Reserved for migration scripts
+      and CLI tools that intentionally bypass isolation.
+    """
+    if isinstance(value, _AutoSentinel):
+        user = _current_user.get()
+        if user is None:
+            raise RuntimeError(f"{method_name} called with user_id=AUTO but no user context is set; pass an explicit user_id, set the contextvar via auth middleware, or opt out with user_id=None for migration/CLI paths.")
+        # Coerce to ``str`` at the boundary: ``User.id`` is typed as
+        # ``UUID`` for the API surface, but the persistence layer
+        # stores ``user_id`` as ``String(64)`` and aiosqlite cannot
+        # bind a raw UUID object to a VARCHAR column ("type 'UUID' is
+        # not supported"). Honour the documented return type here
+        # rather than ripple a type change through every caller.
+        return str(user.id)
+    return value
diff --git a/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py b/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py
index 651db11ec..88102b887 100644
--- a/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py
+++ b/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py
@@ -1,10 +1,14 @@
 import logging
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 from deerflow.sandbox.local.local_sandbox import LocalSandbox, PathMapping
 from deerflow.sandbox.sandbox import Sandbox
 from deerflow.sandbox.sandbox_provider import SandboxProvider
 
+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+
 logger = logging.getLogger(__name__)
 
 _singleton: LocalSandbox | None = None
@@ -13,8 +17,9 @@ _singleton: LocalSandbox | None = None
 class LocalSandboxProvider(SandboxProvider):
     uses_thread_data_mounts = True
 
-    def __init__(self):
+    def __init__(self, app_config: "AppConfig"):
         """Initialize the local sandbox provider with path mappings."""
+        self._app_config = app_config
         self._path_mappings = self._setup_path_mappings()
 
     def _setup_path_mappings(self) -> list[PathMapping]:
@@ -31,9 +36,7 @@ class LocalSandboxProvider(SandboxProvider):
 
         # Map skills container path to local skills directory
         try:
-            from deerflow.config import get_app_config
-
-            config = get_app_config()
+            config = self._app_config
             skills_path = config.skills.get_skills_path()
             container_path = config.skills.container_path
 
diff --git a/backend/packages/harness/deerflow/sandbox/middleware.py b/backend/packages/harness/deerflow/sandbox/middleware.py
index deefc2397..bf4f6b65e 100644
--- a/backend/packages/harness/deerflow/sandbox/middleware.py
+++ b/backend/packages/harness/deerflow/sandbox/middleware.py
@@ -6,6 +6,7 @@ from langchain.agents.middleware import AgentMiddleware
 from langgraph.runtime import Runtime
 
 from deerflow.agents.thread_state import SandboxState, ThreadDataState
+from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.sandbox import get_sandbox_provider
 
 logger = logging.getLogger(__name__)
@@ -42,41 +43,35 @@ class SandboxMiddleware(AgentMiddleware[SandboxMiddlewareState]):
         super().__init__()
         self._lazy_init = lazy_init
 
-    def _acquire_sandbox(self, thread_id: str) -> str:
-        provider = get_sandbox_provider()
+    def _acquire_sandbox(self, thread_id: str, runtime: Runtime[DeerFlowContext]) -> str:
+        provider = get_sandbox_provider(runtime.context.app_config)
         sandbox_id = provider.acquire(thread_id)
         logger.info(f"Acquiring sandbox {sandbox_id}")
         return sandbox_id
 
     @override
-    def before_agent(self, state: SandboxMiddlewareState, runtime: Runtime) -> dict | None:
+    def before_agent(self, state: SandboxMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
         # Skip acquisition if lazy_init is enabled
         if self._lazy_init:
             return super().before_agent(state, runtime)
 
         # Eager initialization (original behavior)
         if "sandbox" not in state or state["sandbox"] is None:
-            thread_id = (runtime.context or {}).get("thread_id")
-            if thread_id is None:
+            thread_id = runtime.context.thread_id
+            if not thread_id:
                 return super().before_agent(state, runtime)
-            sandbox_id = self._acquire_sandbox(thread_id)
+            sandbox_id = self._acquire_sandbox(thread_id, runtime)
             logger.info(f"Assigned sandbox {sandbox_id} to thread {thread_id}")
             return {"sandbox": {"sandbox_id": sandbox_id}}
         return super().before_agent(state, runtime)
 
     @override
-    def after_agent(self, state: SandboxMiddlewareState, runtime: Runtime) -> dict | None:
+    def after_agent(self, state: SandboxMiddlewareState, runtime: Runtime[DeerFlowContext]) -> dict | None:
         sandbox = state.get("sandbox")
         if sandbox is not None:
             sandbox_id = sandbox["sandbox_id"]
             logger.info(f"Releasing sandbox {sandbox_id}")
-            get_sandbox_provider().release(sandbox_id)
-            return None
-
-        if (runtime.context or {}).get("sandbox_id") is not None:
-            sandbox_id = runtime.context.get("sandbox_id")
-            logger.info(f"Releasing sandbox {sandbox_id} from context")
-            get_sandbox_provider().release(sandbox_id)
+            get_sandbox_provider(runtime.context.app_config).release(sandbox_id)
             return None
 
         # No sandbox to release
diff --git a/backend/packages/harness/deerflow/sandbox/sandbox_provider.py b/backend/packages/harness/deerflow/sandbox/sandbox_provider.py
index ecb1f7a67..40c26b700 100644
--- a/backend/packages/harness/deerflow/sandbox/sandbox_provider.py
+++ b/backend/packages/harness/deerflow/sandbox/sandbox_provider.py
@@ -1,6 +1,6 @@
 from abc import ABC, abstractmethod
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.reflection import resolve_class
 from deerflow.sandbox.sandbox import Sandbox
 
@@ -41,23 +41,38 @@ class SandboxProvider(ABC):
 _default_sandbox_provider: SandboxProvider | None = None
 
 
-def get_sandbox_provider(**kwargs) -> SandboxProvider:
+def get_sandbox_provider(app_config: AppConfig, **kwargs) -> SandboxProvider:
     """Get the sandbox provider singleton.
 
     Returns a cached singleton instance. Use `reset_sandbox_provider()` to clear
     the cache, or `shutdown_sandbox_provider()` to properly shutdown and clear.
 
+    Args:
+        app_config: Application config used the first time the singleton is built.
+                    Ignored on subsequent calls — the cached instance is returned
+                    regardless of the config passed.
+
     Returns:
         A sandbox provider instance.
     """
     global _default_sandbox_provider
     if _default_sandbox_provider is None:
-        config = get_app_config()
-        cls = resolve_class(config.sandbox.use, SandboxProvider)
-        _default_sandbox_provider = cls(**kwargs)
+        cls = resolve_class(app_config.sandbox.use, SandboxProvider)
+        _default_sandbox_provider = cls(app_config=app_config, **kwargs) if _accepts_app_config(cls) else cls(**kwargs)
     return _default_sandbox_provider
 
 
+def _accepts_app_config(cls: type) -> bool:
+    """Return True when the provider's __init__ accepts an ``app_config`` kwarg."""
+    import inspect
+
+    try:
+        sig = inspect.signature(cls.__init__)
+    except (TypeError, ValueError):
+        return False
+    return "app_config" in sig.parameters
+
+
 def reset_sandbox_provider() -> None:
     """Reset the sandbox provider singleton.
 
diff --git a/backend/packages/harness/deerflow/sandbox/security.py b/backend/packages/harness/deerflow/sandbox/security.py
index 478016ad1..257c90f46 100644
--- a/backend/packages/harness/deerflow/sandbox/security.py
+++ b/backend/packages/harness/deerflow/sandbox/security.py
@@ -1,6 +1,6 @@
 """Security helpers for sandbox capability gating."""
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 
 _LOCAL_SANDBOX_PROVIDER_MARKERS = (
     "deerflow.sandbox.local:LocalSandboxProvider",
@@ -20,11 +20,8 @@ LOCAL_BASH_SUBAGENT_DISABLED_MESSAGE = (
 )
 
 
-def uses_local_sandbox_provider(config=None) -> bool:
+def uses_local_sandbox_provider(config: AppConfig) -> bool:
     """Return True when the active sandbox provider is the host-local provider."""
-    if config is None:
-        config = get_app_config()
-
     sandbox_cfg = getattr(config, "sandbox", None)
     sandbox_use = getattr(sandbox_cfg, "use", "")
     if sandbox_use in _LOCAL_SANDBOX_PROVIDER_MARKERS:
@@ -32,11 +29,8 @@ def uses_local_sandbox_provider(config=None) -> bool:
     return sandbox_use.endswith(":LocalSandboxProvider") and "deerflow.sandbox.local" in sandbox_use
 
 
-def is_host_bash_allowed(config=None) -> bool:
+def is_host_bash_allowed(config: AppConfig) -> bool:
     """Return whether host bash execution is explicitly allowed."""
-    if config is None:
-        config = get_app_config()
-
     sandbox_cfg = getattr(config, "sandbox", None)
     if sandbox_cfg is None:
         return False
diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py
index 7b09358e7..64ef712b8 100644
--- a/backend/packages/harness/deerflow/sandbox/tools.py
+++ b/backend/packages/harness/deerflow/sandbox/tools.py
@@ -7,7 +7,8 @@ from langchain.tools import ToolRuntime, tool
 from langgraph.typing import ContextT
 
 from deerflow.agents.thread_state import ThreadDataState, ThreadState
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import resolve_context
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX
 from deerflow.sandbox.exceptions import (
     SandboxError,
@@ -39,62 +40,43 @@ _DEFAULT_GREP_MAX_RESULTS = 100
 _MAX_GREP_MAX_RESULTS = 500
 
 
-def _get_skills_container_path() -> str:
-    """Get the skills container path from config, with fallback to default.
-
-    Result is cached after the first successful config load.  If config loading
-    fails the default is returned *without* caching so that a later call can
-    pick up the real value once the config is available.
-    """
-    cached = getattr(_get_skills_container_path, "_cached", None)
-    if cached is not None:
-        return cached
-    try:
-        from deerflow.config import get_app_config
-
-        value = get_app_config().skills.container_path
-        _get_skills_container_path._cached = value  # type: ignore[attr-defined]
-        return value
-    except Exception:
+def _get_skills_container_path(app_config: AppConfig) -> str:
+    """Get the skills container path from config, with fallback to default."""
+    skills_cfg = getattr(app_config, "skills", None)
+    if skills_cfg is None:
         return _DEFAULT_SKILLS_CONTAINER_PATH
+    return skills_cfg.container_path
 
 
-def _get_skills_host_path() -> str | None:
+def _get_skills_host_path(app_config: AppConfig) -> str | None:
     """Get the skills host filesystem path from config.
 
-    Returns None if the skills directory does not exist or config cannot be
-    loaded.  Only successful lookups are cached; failures are retried on the
-    next call so that a transiently unavailable skills directory does not
-    permanently disable skills access.
+    Returns None if the skills directory does not exist or is not configured.
     """
-    cached = getattr(_get_skills_host_path, "_cached", None)
-    if cached is not None:
-        return cached
+    skills_cfg = getattr(app_config, "skills", None)
+    if skills_cfg is None:
+        return None
     try:
-        from deerflow.config import get_app_config
-
-        config = get_app_config()
-        skills_path = config.skills.get_skills_path()
-        if skills_path.exists():
-            value = str(skills_path)
-            _get_skills_host_path._cached = value  # type: ignore[attr-defined]
-            return value
+        skills_path = skills_cfg.get_skills_path()
     except Exception:
-        pass
+        return None
+    if skills_path.exists():
+        return str(skills_path)
     return None
 
 
-def _is_skills_path(path: str) -> bool:
+def _is_skills_path(path: str, app_config: AppConfig) -> bool:
     """Check if a path is under the skills container path."""
-    skills_prefix = _get_skills_container_path()
+    skills_prefix = _get_skills_container_path(app_config)
     return path == skills_prefix or path.startswith(f"{skills_prefix}/")
 
 
-def _resolve_skills_path(path: str) -> str:
+def _resolve_skills_path(path: str, app_config: AppConfig) -> str:
     """Resolve a virtual skills path to a host filesystem path.
 
     Args:
         path: Virtual skills path (e.g. /mnt/skills/public/bootstrap/SKILL.md)
+        app_config: Resolved application config.
 
     Returns:
         Resolved host path.
@@ -102,8 +84,8 @@ def _resolve_skills_path(path: str) -> str:
     Raises:
         FileNotFoundError: If skills directory is not configured or doesn't exist.
     """
-    skills_container = _get_skills_container_path()
-    skills_host = _get_skills_host_path()
+    skills_container = _get_skills_container_path(app_config)
+    skills_host = _get_skills_host_path(app_config)
     if skills_host is None:
         raise FileNotFoundError(f"Skills directory not available for path: {path}")
 
@@ -119,48 +101,31 @@ def _is_acp_workspace_path(path: str) -> bool:
     return path == _ACP_WORKSPACE_VIRTUAL_PATH or path.startswith(f"{_ACP_WORKSPACE_VIRTUAL_PATH}/")
 
 
-def _get_custom_mounts():
+def _get_custom_mounts(app_config: AppConfig):
     """Get custom volume mounts from sandbox config.
 
-    Result is cached after the first successful config load.  If config loading
-    fails an empty list is returned *without* caching so that a later call can
-    pick up the real value once the config is available.
+    Only includes mounts whose host_path exists, consistent with
+    ``LocalSandboxProvider._setup_path_mappings()`` which also filters by
+    ``host_path.exists()``.
     """
-    cached = getattr(_get_custom_mounts, "_cached", None)
-    if cached is not None:
-        return cached
-    try:
-        from pathlib import Path
-
-        from deerflow.config import get_app_config
-
-        config = get_app_config()
-        mounts = []
-        if config.sandbox and config.sandbox.mounts:
-            # Only include mounts whose host_path exists, consistent with
-            # LocalSandboxProvider._setup_path_mappings() which also filters
-            # by host_path.exists().
-            mounts = [m for m in config.sandbox.mounts if Path(m.host_path).exists()]
-        _get_custom_mounts._cached = mounts  # type: ignore[attr-defined]
-        return mounts
-    except Exception:
-        # If config loading fails, return an empty list without caching so that
-        # a later call can retry once the config is available.
+    sandbox_cfg = getattr(app_config, "sandbox", None)
+    if sandbox_cfg is None or not sandbox_cfg.mounts:
         return []
+    return [m for m in sandbox_cfg.mounts if Path(m.host_path).exists()]
 
 
-def _is_custom_mount_path(path: str) -> bool:
+def _is_custom_mount_path(path: str, app_config: AppConfig) -> bool:
     """Check if path is under a custom mount container_path."""
-    for mount in _get_custom_mounts():
+    for mount in _get_custom_mounts(app_config):
         if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
             return True
     return False
 
 
-def _get_custom_mount_for_path(path: str):
+def _get_custom_mount_for_path(path: str, app_config: AppConfig):
     """Get the mount config matching this path (longest prefix first)."""
     best = None
-    for mount in _get_custom_mounts():
+    for mount in _get_custom_mounts(app_config):
         if path == mount.container_path or path.startswith(f"{mount.container_path}/"):
             if best is None or len(mount.container_path) > len(best.container_path):
                 best = mount
@@ -200,8 +165,9 @@ def _get_acp_workspace_host_path(thread_id: str | None = None) -> str | None:
     if thread_id is not None:
         try:
             from deerflow.config.paths import get_paths
+            from deerflow.runtime.user_context import get_effective_user_id
 
-            host_path = get_paths().acp_workspace_dir(thread_id)
+            host_path = get_paths().acp_workspace_dir(thread_id, user_id=get_effective_user_id())
             if host_path.exists():
                 return str(host_path)
         except Exception:
@@ -270,44 +236,40 @@ def _resolve_acp_workspace_path(path: str, thread_id: str | None = None) -> str:
     return str(resolved_path)
 
 
-def _get_mcp_allowed_paths() -> list[str]:
+def _get_mcp_allowed_paths(app_config: AppConfig) -> list[str]:
     """Get the list of allowed paths from MCP config for file system server."""
-    allowed_paths = []
-    try:
-        from deerflow.config.extensions_config import get_extensions_config
+    allowed_paths: list[str] = []
+    extensions_config = getattr(app_config, "extensions", None)
+    if extensions_config is None:
+        return allowed_paths
 
-        extensions_config = get_extensions_config()
+    for _, server in extensions_config.mcp_servers.items():
+        if not server.enabled:
+            continue
 
-        for _, server in extensions_config.mcp_servers.items():
-            if not server.enabled:
-                continue
-
-            # Only check the filesystem server
-            args = server.args or []
-            # Check if args has server-filesystem package
-            has_filesystem = any("server-filesystem" in arg for arg in args)
-            if not has_filesystem:
-                continue
-            # Unpack the allowed file system paths in config
-            for arg in args:
-                if not arg.startswith("-") and arg.startswith("/"):
-                    allowed_paths.append(arg.rstrip("/") + "/")
-
-    except Exception:
-        pass
+        # Only check the filesystem server
+        args = server.args or []
+        # Check if args has server-filesystem package
+        has_filesystem = any("server-filesystem" in arg for arg in args)
+        if not has_filesystem:
+            continue
+        # Unpack the allowed file system paths in config
+        for arg in args:
+            if not arg.startswith("-") and arg.startswith("/"):
+                allowed_paths.append(arg.rstrip("/") + "/")
 
     return allowed_paths
 
 
-def _get_tool_config_int(name: str, key: str, default: int) -> int:
+def _get_tool_config_int(app_config: AppConfig, name: str, key: str, default: int) -> int:
     try:
-        tool_config = get_app_config().get_tool_config(name)
-        if tool_config is not None and key in tool_config.model_extra:
-            value = tool_config.model_extra.get(key)
-            if isinstance(value, int):
-                return value
+        tool_config = app_config.get_tool_config(name)
     except Exception:
-        pass
+        return default
+    if tool_config is not None and key in tool_config.model_extra:
+        value = tool_config.model_extra.get(key)
+        if isinstance(value, int):
+            return value
     return default
 
 
@@ -317,23 +279,23 @@ def _clamp_max_results(value: int, *, default: int, upper_bound: int) -> int:
     return min(value, upper_bound)
 
 
-def _resolve_max_results(name: str, requested: int, *, default: int, upper_bound: int) -> int:
+def _resolve_max_results(app_config: AppConfig, name: str, requested: int, *, default: int, upper_bound: int) -> int:
     requested_max_results = _clamp_max_results(requested, default=default, upper_bound=upper_bound)
     configured_max_results = _clamp_max_results(
-        _get_tool_config_int(name, "max_results", default),
+        _get_tool_config_int(app_config, name, "max_results", default),
         default=default,
         upper_bound=upper_bound,
     )
     return min(requested_max_results, configured_max_results)
 
 
-def _resolve_local_read_path(path: str, thread_data: ThreadDataState) -> str:
-    validate_local_tool_path(path, thread_data, read_only=True)
-    if _is_skills_path(path):
-        return _resolve_skills_path(path)
+def _resolve_local_read_path(path: str, thread_data: ThreadDataState, app_config: AppConfig) -> str:
+    validate_local_tool_path(path, thread_data, app_config, read_only=True)
+    if _is_skills_path(path, app_config):
+        return _resolve_skills_path(path, app_config)
     if _is_acp_workspace_path(path):
         return _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
-    return _resolve_and_validate_user_data_path(path, thread_data)
+    return _resolve_and_validate_user_data_path(path, thread_data, app_config)
 
 
 def _format_glob_results(root_path: str, matches: list[str], truncated: bool) -> str:
@@ -379,7 +341,11 @@ def _join_path_preserving_style(base: str, relative: str) -> str:
     return f"{stripped_base}{separator}{normalized_relative}"
 
 
-def _sanitize_error(error: Exception, runtime: "ToolRuntime[ContextT, ThreadState] | None" = None) -> str:
+def _sanitize_error(
+    error: Exception,
+    runtime: "ToolRuntime[ContextT, ThreadState] | None" = None,
+    app_config: AppConfig | None = None,
+) -> str:
     """Sanitize an error message to avoid leaking host filesystem paths.
 
     In local-sandbox mode, resolved host paths in the error string are masked
@@ -388,8 +354,12 @@ def _sanitize_error(error: Exception, runtime: "ToolRuntime[ContextT, ThreadStat
     """
     msg = f"{type(error).__name__}: {error}"
     if runtime is not None and is_local_sandbox(runtime):
-        thread_data = get_thread_data(runtime)
-        msg = mask_local_paths_in_output(msg, thread_data)
+        if app_config is None:
+            ctx = getattr(runtime, "context", None)
+            app_config = getattr(ctx, "app_config", None)
+        if app_config is not None:
+            thread_data = get_thread_data(runtime)
+            msg = mask_local_paths_in_output(msg, thread_data, app_config)
     return msg
 
 
@@ -459,7 +429,7 @@ def _thread_actual_to_virtual_mappings(thread_data: ThreadDataState) -> dict[str
     return {actual: virtual for virtual, actual in _thread_virtual_to_actual_mappings(thread_data).items()}
 
 
-def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None) -> str:
+def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None, app_config: AppConfig) -> str:
     """Mask host absolute paths from local sandbox output using virtual paths.
 
     Handles user-data paths (per-thread), skills paths, and ACP workspace paths (global).
@@ -467,8 +437,8 @@ def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None)
     result = output
 
     # Mask skills host paths
-    skills_host = _get_skills_host_path()
-    skills_container = _get_skills_container_path()
+    skills_host = _get_skills_host_path(app_config)
+    skills_container = _get_skills_container_path(app_config)
     if skills_host:
         raw_base = str(Path(skills_host))
         resolved_base = str(Path(skills_host).resolve())
@@ -542,7 +512,13 @@ def _reject_path_traversal(path: str) -> None:
             raise PermissionError("Access denied: path traversal detected")
 
 
-def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *, read_only: bool = False) -> None:
+def validate_local_tool_path(
+    path: str,
+    thread_data: ThreadDataState | None,
+    app_config: AppConfig,
+    *,
+    read_only: bool = False,
+) -> None:
     """Validate that a virtual path is allowed for local-sandbox access.
 
     This function is a security gate — it checks whether *path* may be
@@ -571,7 +547,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
     _reject_path_traversal(path)
 
     # Skills paths — read-only access only
-    if _is_skills_path(path):
+    if _is_skills_path(path, app_config):
         if not read_only:
             raise PermissionError(f"Write access to skills path is not allowed: {path}")
         return
@@ -587,13 +563,13 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *,
         return
 
     # Custom mount paths — respect read_only config
-    if _is_custom_mount_path(path):
-        mount = _get_custom_mount_for_path(path)
+    if _is_custom_mount_path(path, app_config):
+        mount = _get_custom_mount_for_path(path, app_config)
         if mount and mount.read_only and not read_only:
             raise PermissionError(f"Write access to read-only mount is not allowed: {path}")
         return
 
-    raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, {_ACP_WORKSPACE_VIRTUAL_PATH}/, or configured mount paths are allowed")
+    raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path(app_config)}/, {_ACP_WORKSPACE_VIRTUAL_PATH}/, or configured mount paths are allowed")
 
 
 def _validate_resolved_user_data_path(resolved: Path, thread_data: ThreadDataState) -> None:
@@ -624,18 +600,23 @@ def _validate_resolved_user_data_path(resolved: Path, thread_data: ThreadDataSta
     raise PermissionError("Access denied: path traversal detected")
 
 
-def _resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) -> str:
+def _resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState, app_config: AppConfig) -> str:
     """Resolve a /mnt/user-data virtual path and validate it stays in bounds.
 
     Returns the resolved host path string.
+
+    ``app_config`` is accepted for signature symmetry with the other resolver
+    helpers; the user-data resolution path itself is fully derivable from
+    ``thread_data``.
     """
+    _ = app_config  # noqa: F841 — kept for interface symmetry with sibling resolvers
     resolved_str = replace_virtual_path(path, thread_data)
     resolved = Path(resolved_str).resolve()
     _validate_resolved_user_data_path(resolved, thread_data)
     return str(resolved)
 
 
-def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None:
+def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None, app_config: AppConfig) -> None:
     """Validate absolute paths in local-sandbox bash commands.
 
     This validation is only a best-effort guard for the explicit
@@ -659,7 +640,7 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
         raise PermissionError(f"Unsafe file:// URL in command: {file_url_match.group()}. Use paths under {VIRTUAL_PATH_PREFIX}")
 
     unsafe_paths: list[str] = []
-    allowed_paths = _get_mcp_allowed_paths()
+    allowed_paths = _get_mcp_allowed_paths(app_config)
 
     for absolute_path in _ABSOLUTE_PATH_PATTERN.findall(command):
         # Check for MCP filesystem server allowed paths
@@ -672,7 +653,7 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
             continue
 
         # Allow skills container path (resolved by tools.py before passing to sandbox)
-        if _is_skills_path(absolute_path):
+        if _is_skills_path(absolute_path, app_config):
             _reject_path_traversal(absolute_path)
             continue
 
@@ -682,7 +663,7 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
             continue
 
         # Allow custom mount container paths
-        if _is_custom_mount_path(absolute_path):
+        if _is_custom_mount_path(absolute_path, app_config):
             _reject_path_traversal(absolute_path)
             continue
 
@@ -696,12 +677,13 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState
         raise PermissionError(f"Unsafe absolute paths in command: {unsafe}. Use paths under {VIRTUAL_PATH_PREFIX}")
 
 
-def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState | None) -> str:
+def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState | None, app_config: AppConfig) -> str:
     """Replace all virtual paths (/mnt/user-data, /mnt/skills, /mnt/acp-workspace) in a command string.
 
     Args:
         command: The command string that may contain virtual paths.
         thread_data: The thread data containing actual paths.
+        app_config: Resolved application config.
 
     Returns:
         The command with all virtual paths replaced.
@@ -709,13 +691,13 @@ def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState
     result = command
 
     # Replace skills paths
-    skills_container = _get_skills_container_path()
-    skills_host = _get_skills_host_path()
+    skills_container = _get_skills_container_path(app_config)
+    skills_host = _get_skills_host_path(app_config)
     if skills_host and skills_container in result:
         skills_pattern = re.compile(rf"{re.escape(skills_container)}(/[^\s\"';&|<>()]*)?")
 
         def replace_skills_match(match: re.Match) -> str:
-            return _resolve_skills_path(match.group(0))
+            return _resolve_skills_path(match.group(0), app_config)
 
         result = skills_pattern.sub(replace_skills_match, result)
 
@@ -805,12 +787,10 @@ def sandbox_from_runtime(runtime: ToolRuntime[ContextT, ThreadState] | None = No
     sandbox_id = sandbox_state.get("sandbox_id")
     if sandbox_id is None:
         raise SandboxRuntimeError("Sandbox ID not found in state")
-    sandbox = get_sandbox_provider().get(sandbox_id)
+    sandbox = get_sandbox_provider(resolve_context(runtime).app_config).get(sandbox_id)
     if sandbox is None:
         raise SandboxNotFoundError(f"Sandbox with ID '{sandbox_id}' not found", sandbox_id=sandbox_id)
 
-    if runtime.context is not None:
-        runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for downstream use
     return sandbox
 
 
@@ -838,26 +818,24 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
     if runtime.state is None:
         raise SandboxRuntimeError("Tool runtime state not available")
 
+    app_config = runtime.context.app_config
+
     # Check if sandbox already exists in state
     sandbox_state = runtime.state.get("sandbox")
     if sandbox_state is not None:
         sandbox_id = sandbox_state.get("sandbox_id")
         if sandbox_id is not None:
-            sandbox = get_sandbox_provider().get(sandbox_id)
+            sandbox = get_sandbox_provider(app_config).get(sandbox_id)
             if sandbox is not None:
-                if runtime.context is not None:
-                    runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
                 return sandbox
             # Sandbox was released, fall through to acquire new one
 
     # Lazy acquisition: get thread_id and acquire sandbox
-    thread_id = runtime.context.get("thread_id") if runtime.context else None
-    if thread_id is None:
-        thread_id = runtime.config.get("configurable", {}).get("thread_id") if runtime.config else None
-    if thread_id is None:
+    thread_id = runtime.context.thread_id
+    if not thread_id:
         raise SandboxRuntimeError("Thread ID not available in runtime context")
 
-    provider = get_sandbox_provider()
+    provider = get_sandbox_provider(app_config)
     sandbox_id = provider.acquire(thread_id)
 
     # Update runtime state - this persists across tool calls
@@ -868,8 +846,6 @@ def ensure_sandbox_initialized(runtime: ToolRuntime[ContextT, ThreadState] | Non
     if sandbox is None:
         raise SandboxNotFoundError("Sandbox not found after acquisition", sandbox_id=sandbox_id)
 
-    if runtime.context is not None:
-        runtime.context["sandbox_id"] = sandbox_id  # Ensure sandbox_id is in context for releasing in after_agent
     return sandbox
 
 
@@ -999,40 +975,29 @@ def bash_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, com
         description: Explain why you are running this command in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
         command: The bash command to execute. Always use absolute paths for files and directories.
     """
+    app_config = resolve_context(runtime).app_config
     try:
         sandbox = ensure_sandbox_initialized(runtime)
+        sandbox_cfg = app_config.sandbox
+        max_chars = sandbox_cfg.bash_output_max_chars if sandbox_cfg else 20000
         if is_local_sandbox(runtime):
-            if not is_host_bash_allowed():
+            if not is_host_bash_allowed(app_config):
                 return f"Error: {LOCAL_HOST_BASH_DISABLED_MESSAGE}"
             ensure_thread_directories_exist(runtime)
             thread_data = get_thread_data(runtime)
-            validate_local_bash_command_paths(command, thread_data)
-            command = replace_virtual_paths_in_command(command, thread_data)
+            validate_local_bash_command_paths(command, thread_data, app_config)
+            command = replace_virtual_paths_in_command(command, thread_data, app_config)
             command = _apply_cwd_prefix(command, thread_data)
             output = sandbox.execute_command(command)
-            try:
-                from deerflow.config.app_config import get_app_config
-
-                sandbox_cfg = get_app_config().sandbox
-                max_chars = sandbox_cfg.bash_output_max_chars if sandbox_cfg else 20000
-            except Exception:
-                max_chars = 20000
-            return _truncate_bash_output(mask_local_paths_in_output(output, thread_data), max_chars)
+            return _truncate_bash_output(mask_local_paths_in_output(output, thread_data, app_config), max_chars)
         ensure_thread_directories_exist(runtime)
-        try:
-            from deerflow.config.app_config import get_app_config
-
-            sandbox_cfg = get_app_config().sandbox
-            max_chars = sandbox_cfg.bash_output_max_chars if sandbox_cfg else 20000
-        except Exception:
-            max_chars = 20000
         return _truncate_bash_output(sandbox.execute_command(command), max_chars)
     except SandboxError as e:
         return f"Error: {e}"
     except PermissionError as e:
         return f"Error: {e}"
     except Exception as e:
-        return f"Error: Unexpected error executing command: {_sanitize_error(e, runtime)}"
+        return f"Error: Unexpected error executing command: {_sanitize_error(e, runtime, app_config)}"
 
 
 @tool("ls", parse_docstring=True)
@@ -1043,6 +1008,7 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
         description: Explain why you are listing this directory in short words. ALWAYS PROVIDE THIS PARAMETER FIRST.
         path: The **absolute** path to the directory to list.
     """
+    app_config = resolve_context(runtime).app_config
     try:
         sandbox = ensure_sandbox_initialized(runtime)
         ensure_thread_directories_exist(runtime)
@@ -1050,13 +1016,13 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
         thread_data = None
         if is_local_sandbox(runtime):
             thread_data = get_thread_data(runtime)
-            validate_local_tool_path(path, thread_data, read_only=True)
-            if _is_skills_path(path):
-                path = _resolve_skills_path(path)
+            validate_local_tool_path(path, thread_data, app_config, read_only=True)
+            if _is_skills_path(path, app_config):
+                path = _resolve_skills_path(path, app_config)
             elif _is_acp_workspace_path(path):
                 path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
-            elif not _is_custom_mount_path(path):
-                path = _resolve_and_validate_user_data_path(path, thread_data)
+            elif not _is_custom_mount_path(path, app_config):
+                path = _resolve_and_validate_user_data_path(path, thread_data, app_config)
             # Custom mount paths are resolved by LocalSandbox._resolve_path()
         children = sandbox.list_dir(path)
         if not children:
@@ -1064,13 +1030,8 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
         output = "\n".join(children)
         if thread_data is not None:
             output = mask_local_paths_in_output(output, thread_data)
-        try:
-            from deerflow.config.app_config import get_app_config
-
-            sandbox_cfg = get_app_config().sandbox
-            max_chars = sandbox_cfg.ls_output_max_chars if sandbox_cfg else 20000
-        except Exception:
-            max_chars = 20000
+        sandbox_cfg = app_config.sandbox
+        max_chars = sandbox_cfg.ls_output_max_chars if sandbox_cfg else 20000
         return _truncate_ls_output(output, max_chars)
     except SandboxError as e:
         return f"Error: {e}"
@@ -1079,7 +1040,7 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
     except PermissionError:
         return f"Error: Permission denied: {requested_path}"
     except Exception as e:
-        return f"Error: Unexpected error listing directory: {_sanitize_error(e, runtime)}"
+        return f"Error: Unexpected error listing directory: {_sanitize_error(e, runtime, app_config)}"
 
 
 @tool("glob", parse_docstring=True)
@@ -1100,11 +1061,13 @@ def glob_tool(
         include_dirs: Whether matching directories should also be returned. Default is False.
         max_results: Maximum number of paths to return. Default is 200.
     """
+    app_config = resolve_context(runtime).app_config
     try:
         sandbox = ensure_sandbox_initialized(runtime)
         ensure_thread_directories_exist(runtime)
         requested_path = path
         effective_max_results = _resolve_max_results(
+            app_config,
             "glob",
             max_results,
             default=_DEFAULT_GLOB_MAX_RESULTS,
@@ -1115,10 +1078,10 @@ def glob_tool(
             thread_data = get_thread_data(runtime)
             if thread_data is None:
                 raise SandboxRuntimeError("Thread data not available for local sandbox")
-            path = _resolve_local_read_path(path, thread_data)
+            path = _resolve_local_read_path(path, thread_data, app_config)
         matches, truncated = sandbox.glob(path, pattern, include_dirs=include_dirs, max_results=effective_max_results)
         if thread_data is not None:
-            matches = [mask_local_paths_in_output(match, thread_data) for match in matches]
+            matches = [mask_local_paths_in_output(match, thread_data, app_config) for match in matches]
         return _format_glob_results(requested_path, matches, truncated)
     except SandboxError as e:
         return f"Error: {e}"
@@ -1129,7 +1092,7 @@ def glob_tool(
     except PermissionError:
         return f"Error: Permission denied: {requested_path}"
     except Exception as e:
-        return f"Error: Unexpected error searching paths: {_sanitize_error(e, runtime)}"
+        return f"Error: Unexpected error searching paths: {_sanitize_error(e, runtime, app_config)}"
 
 
 @tool("grep", parse_docstring=True)
@@ -1154,11 +1117,13 @@ def grep_tool(
         case_sensitive: Whether matching is case-sensitive. Default is False.
         max_results: Maximum number of matching lines to return. Default is 100.
     """
+    app_config = resolve_context(runtime).app_config
     try:
         sandbox = ensure_sandbox_initialized(runtime)
         ensure_thread_directories_exist(runtime)
         requested_path = path
         effective_max_results = _resolve_max_results(
+            app_config,
             "grep",
             max_results,
             default=_DEFAULT_GREP_MAX_RESULTS,
@@ -1169,7 +1134,7 @@ def grep_tool(
             thread_data = get_thread_data(runtime)
             if thread_data is None:
                 raise SandboxRuntimeError("Thread data not available for local sandbox")
-            path = _resolve_local_read_path(path, thread_data)
+            path = _resolve_local_read_path(path, thread_data, app_config)
         matches, truncated = sandbox.grep(
             path,
             pattern,
@@ -1181,7 +1146,7 @@ def grep_tool(
         if thread_data is not None:
             matches = [
                 GrepMatch(
-                    path=mask_local_paths_in_output(match.path, thread_data),
+                    path=mask_local_paths_in_output(match.path, thread_data, app_config),
                     line_number=match.line_number,
                     line=match.line,
                 )
@@ -1199,7 +1164,7 @@ def grep_tool(
     except PermissionError:
         return f"Error: Permission denied: {requested_path}"
     except Exception as e:
-        return f"Error: Unexpected error searching file contents: {_sanitize_error(e, runtime)}"
+        return f"Error: Unexpected error searching file contents: {_sanitize_error(e, runtime, app_config)}"
 
 
 @tool("read_file", parse_docstring=True)
@@ -1218,32 +1183,28 @@ def read_file_tool(
         start_line: Optional starting line number (1-indexed, inclusive). Use with end_line to read a specific range.
         end_line: Optional ending line number (1-indexed, inclusive). Use with start_line to read a specific range.
     """
+    app_config = resolve_context(runtime).app_config
     try:
         sandbox = ensure_sandbox_initialized(runtime)
         ensure_thread_directories_exist(runtime)
         requested_path = path
         if is_local_sandbox(runtime):
             thread_data = get_thread_data(runtime)
-            validate_local_tool_path(path, thread_data, read_only=True)
-            if _is_skills_path(path):
-                path = _resolve_skills_path(path)
+            validate_local_tool_path(path, thread_data, app_config, read_only=True)
+            if _is_skills_path(path, app_config):
+                path = _resolve_skills_path(path, app_config)
             elif _is_acp_workspace_path(path):
                 path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data))
-            elif not _is_custom_mount_path(path):
-                path = _resolve_and_validate_user_data_path(path, thread_data)
+            elif not _is_custom_mount_path(path, app_config):
+                path = _resolve_and_validate_user_data_path(path, thread_data, app_config)
             # Custom mount paths are resolved by LocalSandbox._resolve_path()
         content = sandbox.read_file(path)
         if not content:
             return "(empty)"
         if start_line is not None and end_line is not None:
             content = "\n".join(content.splitlines()[start_line - 1 : end_line])
-        try:
-            from deerflow.config.app_config import get_app_config
-
-            sandbox_cfg = get_app_config().sandbox
-            max_chars = sandbox_cfg.read_file_output_max_chars if sandbox_cfg else 50000
-        except Exception:
-            max_chars = 50000
+        sandbox_cfg = app_config.sandbox
+        max_chars = sandbox_cfg.read_file_output_max_chars if sandbox_cfg else 50000
         return _truncate_read_file_output(content, max_chars)
     except SandboxError as e:
         return f"Error: {e}"
@@ -1254,7 +1215,7 @@ def read_file_tool(
     except IsADirectoryError:
         return f"Error: Path is a directory, not a file: {requested_path}"
     except Exception as e:
-        return f"Error: Unexpected error reading file: {_sanitize_error(e, runtime)}"
+        return f"Error: Unexpected error reading file: {_sanitize_error(e, runtime, app_config)}"
 
 
 @tool("write_file", parse_docstring=True)
@@ -1272,15 +1233,16 @@ def write_file_tool(
         path: The **absolute** path to the file to write to. ALWAYS PROVIDE THIS PARAMETER SECOND.
         content: The content to write to the file. ALWAYS PROVIDE THIS PARAMETER THIRD.
     """
+    app_config = resolve_context(runtime).app_config
     try:
         sandbox = ensure_sandbox_initialized(runtime)
         ensure_thread_directories_exist(runtime)
         requested_path = path
         if is_local_sandbox(runtime):
             thread_data = get_thread_data(runtime)
-            validate_local_tool_path(path, thread_data)
-            if not _is_custom_mount_path(path):
-                path = _resolve_and_validate_user_data_path(path, thread_data)
+            validate_local_tool_path(path, thread_data, app_config)
+            if not _is_custom_mount_path(path, app_config):
+                path = _resolve_and_validate_user_data_path(path, thread_data, app_config)
             # Custom mount paths are resolved by LocalSandbox._resolve_path()
         with get_file_operation_lock(sandbox, path):
             sandbox.write_file(path, content, append)
@@ -1292,9 +1254,9 @@ def write_file_tool(
     except IsADirectoryError:
         return f"Error: Path is a directory, not a file: {requested_path}"
     except OSError as e:
-        return f"Error: Failed to write file '{requested_path}': {_sanitize_error(e, runtime)}"
+        return f"Error: Failed to write file '{requested_path}': {_sanitize_error(e, runtime, app_config)}"
     except Exception as e:
-        return f"Error: Unexpected error writing file: {_sanitize_error(e, runtime)}"
+        return f"Error: Unexpected error writing file: {_sanitize_error(e, runtime, app_config)}"
 
 
 @tool("str_replace", parse_docstring=True)
@@ -1316,15 +1278,16 @@ def str_replace_tool(
         new_str: The new substring. ALWAYS PROVIDE THIS PARAMETER FOURTH.
         replace_all: Whether to replace all occurrences of the substring. If False, only the first occurrence will be replaced. Default is False.
     """
+    app_config = resolve_context(runtime).app_config
     try:
         sandbox = ensure_sandbox_initialized(runtime)
         ensure_thread_directories_exist(runtime)
         requested_path = path
         if is_local_sandbox(runtime):
             thread_data = get_thread_data(runtime)
-            validate_local_tool_path(path, thread_data)
-            if not _is_custom_mount_path(path):
-                path = _resolve_and_validate_user_data_path(path, thread_data)
+            validate_local_tool_path(path, thread_data, app_config)
+            if not _is_custom_mount_path(path, app_config):
+                path = _resolve_and_validate_user_data_path(path, thread_data, app_config)
             # Custom mount paths are resolved by LocalSandbox._resolve_path()
         with get_file_operation_lock(sandbox, path):
             content = sandbox.read_file(path)
@@ -1345,4 +1308,4 @@ def str_replace_tool(
     except PermissionError:
         return f"Error: Permission denied accessing file: {requested_path}"
     except Exception as e:
-        return f"Error: Unexpected error replacing string: {_sanitize_error(e, runtime)}"
+        return f"Error: Unexpected error replacing string: {_sanitize_error(e, runtime, app_config)}"
diff --git a/backend/packages/harness/deerflow/skills/loader.py b/backend/packages/harness/deerflow/skills/loader.py
index 35ffda661..a86b9285d 100644
--- a/backend/packages/harness/deerflow/skills/loader.py
+++ b/backend/packages/harness/deerflow/skills/loader.py
@@ -1,10 +1,14 @@
 import logging
 import os
 from pathlib import Path
+from typing import TYPE_CHECKING
 
 from .parser import parse_skill_file
 from .types import Skill
 
+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
+
 logger = logging.getLogger(__name__)
 
 
@@ -22,7 +26,12 @@ def get_skills_root_path() -> Path:
     return skills_dir
 
 
-def load_skills(skills_path: Path | None = None, use_config: bool = True, enabled_only: bool = False) -> list[Skill]:
+def load_skills(
+    app_config: "AppConfig | None" = None,
+    *,
+    skills_path: Path | None = None,
+    enabled_only: bool = False,
+) -> list[Skill]:
     """
     Load all skills from the skills directory.
 
@@ -30,25 +39,19 @@ def load_skills(skills_path: Path | None = None, use_config: bool = True, enable
     to extract metadata. The enabled state is determined by the skills_state_config.json file.
 
     Args:
-        skills_path: Optional custom path to skills directory.
-                     If not provided and use_config is True, uses path from config.
-                     Otherwise defaults to deer-flow/skills
-        use_config: Whether to load skills path from config (default: True)
+        app_config: Application config used to resolve the configured skills
+                    directory. Ignored when ``skills_path`` is supplied.
+        skills_path: Explicit override for the skills directory. When both
+                     ``skills_path`` and ``app_config`` are omitted the
+                     default repository layout is used (``deer-flow/skills``).
         enabled_only: If True, only return enabled skills (default: False)
 
     Returns:
         List of Skill objects, sorted by name
     """
     if skills_path is None:
-        if use_config:
-            try:
-                from deerflow.config import get_app_config
-
-                config = get_app_config()
-                skills_path = config.skills.get_skills_path()
-            except Exception:
-                # Fallback to default if config fails
-                skills_path = get_skills_root_path()
+        if app_config is not None:
+            skills_path = app_config.skills.get_skills_path()
         else:
             skills_path = get_skills_root_path()
 
diff --git a/backend/packages/harness/deerflow/skills/manager.py b/backend/packages/harness/deerflow/skills/manager.py
index 77789937a..9b02a52cd 100644
--- a/backend/packages/harness/deerflow/skills/manager.py
+++ b/backend/packages/harness/deerflow/skills/manager.py
@@ -9,7 +9,7 @@ from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.skills.loader import load_skills
 from deerflow.skills.validation import _validate_skill_frontmatter
 
@@ -20,16 +20,17 @@ ALLOWED_SUPPORT_SUBDIRS = {"references", "templates", "scripts", "assets"}
 _SKILL_NAME_PATTERN = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$")
 
 
-def get_skills_root_dir() -> Path:
-    return get_app_config().skills.get_skills_path()
+def get_skills_root_dir(app_config: AppConfig) -> Path:
+    """Return the configured skills root."""
+    return app_config.skills.get_skills_path()
 
 
-def get_public_skills_dir() -> Path:
-    return get_skills_root_dir() / "public"
+def get_public_skills_dir(app_config: AppConfig) -> Path:
+    return get_skills_root_dir(app_config) / "public"
 
 
-def get_custom_skills_dir() -> Path:
-    path = get_skills_root_dir() / "custom"
+def get_custom_skills_dir(app_config: AppConfig) -> Path:
+    path = get_skills_root_dir(app_config) / "custom"
     path.mkdir(parents=True, exist_ok=True)
     return path
 
@@ -43,46 +44,46 @@ def validate_skill_name(name: str) -> str:
     return normalized
 
 
-def get_custom_skill_dir(name: str) -> Path:
-    return get_custom_skills_dir() / validate_skill_name(name)
+def get_custom_skill_dir(name: str, app_config: AppConfig) -> Path:
+    return get_custom_skills_dir(app_config) / validate_skill_name(name)
 
 
-def get_custom_skill_file(name: str) -> Path:
-    return get_custom_skill_dir(name) / SKILL_FILE_NAME
+def get_custom_skill_file(name: str, app_config: AppConfig) -> Path:
+    return get_custom_skill_dir(name, app_config) / SKILL_FILE_NAME
 
 
-def get_custom_skill_history_dir() -> Path:
-    path = get_custom_skills_dir() / HISTORY_DIR_NAME
+def get_custom_skill_history_dir(app_config: AppConfig) -> Path:
+    path = get_custom_skills_dir(app_config) / HISTORY_DIR_NAME
     path.mkdir(parents=True, exist_ok=True)
     return path
 
 
-def get_skill_history_file(name: str) -> Path:
-    return get_custom_skill_history_dir() / f"{validate_skill_name(name)}.jsonl"
+def get_skill_history_file(name: str, app_config: AppConfig) -> Path:
+    return get_custom_skill_history_dir(app_config) / f"{validate_skill_name(name)}.jsonl"
 
 
-def get_public_skill_dir(name: str) -> Path:
-    return get_public_skills_dir() / validate_skill_name(name)
+def get_public_skill_dir(name: str, app_config: AppConfig) -> Path:
+    return get_public_skills_dir(app_config) / validate_skill_name(name)
 
 
-def custom_skill_exists(name: str) -> bool:
-    return get_custom_skill_file(name).exists()
+def custom_skill_exists(name: str, app_config: AppConfig) -> bool:
+    return get_custom_skill_file(name, app_config).exists()
 
 
-def public_skill_exists(name: str) -> bool:
-    return (get_public_skill_dir(name) / SKILL_FILE_NAME).exists()
+def public_skill_exists(name: str, app_config: AppConfig) -> bool:
+    return (get_public_skill_dir(name, app_config) / SKILL_FILE_NAME).exists()
 
 
-def ensure_custom_skill_is_editable(name: str) -> None:
-    if custom_skill_exists(name):
+def ensure_custom_skill_is_editable(name: str, app_config: AppConfig) -> None:
+    if custom_skill_exists(name, app_config):
         return
-    if public_skill_exists(name):
+    if public_skill_exists(name, app_config):
         raise ValueError(f"'{name}' is a built-in skill. To customise it, create a new skill with the same name under skills/custom/.")
     raise FileNotFoundError(f"Custom skill '{name}' not found.")
 
 
-def ensure_safe_support_path(name: str, relative_path: str) -> Path:
-    skill_dir = get_custom_skill_dir(name).resolve()
+def ensure_safe_support_path(name: str, relative_path: str, app_config: AppConfig) -> Path:
+    skill_dir = get_custom_skill_dir(name, app_config).resolve()
     if not relative_path or relative_path.endswith("/"):
         raise ValueError("Supporting file path must include a filename.")
     relative = Path(relative_path)
@@ -124,8 +125,8 @@ def atomic_write(path: Path, content: str) -> None:
     tmp_path.replace(path)
 
 
-def append_history(name: str, record: dict[str, Any]) -> None:
-    history_path = get_skill_history_file(name)
+def append_history(name: str, record: dict[str, Any], app_config: AppConfig) -> None:
+    history_path = get_skill_history_file(name, app_config)
     history_path.parent.mkdir(parents=True, exist_ok=True)
     payload = {
         "ts": datetime.now(UTC).isoformat(),
@@ -136,8 +137,8 @@ def append_history(name: str, record: dict[str, Any]) -> None:
         f.write("\n")
 
 
-def read_history(name: str) -> list[dict[str, Any]]:
-    history_path = get_skill_history_file(name)
+def read_history(name: str, app_config: AppConfig) -> list[dict[str, Any]]:
+    history_path = get_skill_history_file(name, app_config)
     if not history_path.exists():
         return []
     records: list[dict[str, Any]] = []
@@ -148,12 +149,12 @@ def read_history(name: str) -> list[dict[str, Any]]:
     return records
 
 
-def list_custom_skills() -> list:
-    return [skill for skill in load_skills(enabled_only=False) if skill.category == "custom"]
+def list_custom_skills(app_config: AppConfig) -> list:
+    return [skill for skill in load_skills(app_config, enabled_only=False) if skill.category == "custom"]
 
 
-def read_custom_skill_content(name: str) -> str:
-    skill_file = get_custom_skill_file(name)
+def read_custom_skill_content(name: str, app_config: AppConfig) -> str:
+    skill_file = get_custom_skill_file(name, app_config)
     if not skill_file.exists():
         raise FileNotFoundError(f"Custom skill '{name}' not found.")
     return skill_file.read_text(encoding="utf-8")
diff --git a/backend/packages/harness/deerflow/skills/security_scanner.py b/backend/packages/harness/deerflow/skills/security_scanner.py
index a8fc90a4e..957d9cd04 100644
--- a/backend/packages/harness/deerflow/skills/security_scanner.py
+++ b/backend/packages/harness/deerflow/skills/security_scanner.py
@@ -7,7 +7,7 @@ import logging
 import re
 from dataclasses import dataclass
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.models import create_chat_model
 
 logger = logging.getLogger(__name__)
@@ -35,7 +35,7 @@ def _extract_json_object(raw: str) -> dict | None:
         return None
 
 
-async def scan_skill_content(content: str, *, executable: bool = False, location: str = "SKILL.md") -> ScanResult:
+async def scan_skill_content(app_config: AppConfig, content: str, *, executable: bool = False, location: str = "SKILL.md") -> ScanResult:
     """Screen skill content before it is written to disk."""
     rubric = (
         "You are a security reviewer for AI agent skills. "
@@ -47,9 +47,12 @@ async def scan_skill_content(content: str, *, executable: bool = False, location
     prompt = f"Location: {location}\nExecutable: {str(executable).lower()}\n\nReview this content:\n-----\n{content}\n-----"
 
     try:
-        config = get_app_config()
-        model_name = config.skill_evolution.moderation_model_name
-        model = create_chat_model(name=model_name, thinking_enabled=False) if model_name else create_chat_model(thinking_enabled=False)
+        model_name = app_config.skill_evolution.moderation_model_name
+        model = (
+            create_chat_model(name=model_name, thinking_enabled=False, app_config=app_config)
+            if model_name
+            else create_chat_model(thinking_enabled=False, app_config=app_config)
+        )
         response = await model.ainvoke(
             [
                 {"role": "system", "content": rubric},
diff --git a/backend/packages/harness/deerflow/subagents/executor.py b/backend/packages/harness/deerflow/subagents/executor.py
index b42cebacf..9177e2b5b 100644
--- a/backend/packages/harness/deerflow/subagents/executor.py
+++ b/backend/packages/harness/deerflow/subagents/executor.py
@@ -17,6 +17,7 @@ from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
 from langchain_core.runnables import RunnableConfig
 
 from deerflow.agents.thread_state import SandboxState, ThreadDataState, ThreadState
+from deerflow.config.app_config import AppConfig
 from deerflow.models import create_chat_model
 from deerflow.subagents.config import SubagentConfig
 
@@ -132,24 +133,16 @@ class SubagentExecutor:
         self,
         config: SubagentConfig,
         tools: list[BaseTool],
+        app_config: AppConfig,
         parent_model: str | None = None,
         sandbox_state: SandboxState | None = None,
         thread_data: ThreadDataState | None = None,
         thread_id: str | None = None,
         trace_id: str | None = None,
     ):
-        """Initialize the executor.
-
-        Args:
-            config: Subagent configuration.
-            tools: List of all available tools (will be filtered).
-            parent_model: The parent agent's model name for inheritance.
-            sandbox_state: Sandbox state from parent agent.
-            thread_data: Thread data from parent agent.
-            thread_id: Thread ID for sandbox operations.
-            trace_id: Trace ID from parent for distributed tracing.
-        """
+        """Initialize the executor."""
         self.config = config
+        self.app_config = app_config
         self.parent_model = parent_model
         self.sandbox_state = sandbox_state
         self.thread_data = thread_data
@@ -169,7 +162,7 @@ class SubagentExecutor:
     def _create_agent(self):
         """Create the agent instance."""
         model_name = _get_model_name(self.config, self.parent_model)
-        model = create_chat_model(name=model_name, thinking_enabled=False)
+        model = create_chat_model(name=model_name, thinking_enabled=False, app_config=self.app_config)
 
         from deerflow.agents.middlewares.tool_error_handling_middleware import build_subagent_runtime_middlewares
 
diff --git a/backend/packages/harness/deerflow/subagents/registry.py b/backend/packages/harness/deerflow/subagents/registry.py
index b34d7e9bd..b04071250 100644
--- a/backend/packages/harness/deerflow/subagents/registry.py
+++ b/backend/packages/harness/deerflow/subagents/registry.py
@@ -3,6 +3,7 @@
 import logging
 from dataclasses import replace
 
+from deerflow.config.app_config import AppConfig
 from deerflow.sandbox.security import is_host_bash_allowed
 from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
 from deerflow.subagents.config import SubagentConfig
@@ -10,19 +11,17 @@ from deerflow.subagents.config import SubagentConfig
 logger = logging.getLogger(__name__)
 
 
-def _build_custom_subagent_config(name: str) -> SubagentConfig | None:
+def _build_custom_subagent_config(name: str, app_config: AppConfig) -> SubagentConfig | None:
     """Build a SubagentConfig from config.yaml custom_agents section.
 
     Args:
         name: The name of the custom subagent.
+        app_config: The resolved application config.
 
     Returns:
         SubagentConfig if found in custom_agents, None otherwise.
     """
-    from deerflow.config.subagents_config import get_subagents_app_config
-
-    app_config = get_subagents_app_config()
-    custom = app_config.custom_agents.get(name)
+    custom = app_config.subagents.custom_agents.get(name)
     if custom is None:
         return None
 
@@ -39,67 +38,44 @@ def _build_custom_subagent_config(name: str) -> SubagentConfig | None:
     )
 
 
-def get_subagent_config(name: str) -> SubagentConfig | None:
+def get_subagent_config(name: str, app_config: AppConfig) -> SubagentConfig | None:
     """Get a subagent configuration by name, with config.yaml overrides applied.
 
     Resolution order (mirrors Codex's config layering):
     1. Built-in subagents (general-purpose, bash)
     2. Custom subagents from config.yaml custom_agents section
     3. Per-agent overrides from config.yaml agents section (timeout, max_turns, model, skills)
-
-    Args:
-        name: The name of the subagent.
-
-    Returns:
-        SubagentConfig if found (with any config.yaml overrides applied), None otherwise.
     """
-    # Step 1: Look up built-in, then fall back to custom_agents
     config = BUILTIN_SUBAGENTS.get(name)
     if config is None:
-        config = _build_custom_subagent_config(name)
+        config = _build_custom_subagent_config(name, app_config)
     if config is None:
         return None
 
-    # Step 2: Apply per-agent overrides from config.yaml agents section.
-    # Only explicit per-agent overrides are applied here. Global defaults
-    # (timeout_seconds, max_turns at the top level) apply to built-in agents
-    # but must NOT override custom agents' own values — custom agents define
-    # their own defaults in the custom_agents section.
-    # Lazy import to avoid circular deps.
-    from deerflow.config.subagents_config import get_subagents_app_config
+    sub_config = app_config.subagents
+    overrides: dict = {}
 
-    app_config = get_subagents_app_config()
-    is_builtin = name in BUILTIN_SUBAGENTS
-    agent_override = app_config.agents.get(name)
+    # Timeout: subagents config supplies effective per-agent override or global default.
+    effective_timeout = sub_config.get_timeout_for(name)
+    if effective_timeout != config.timeout_seconds:
+        logger.debug("Subagent '%s': timeout overridden (%ss -> %ss)", name, config.timeout_seconds, effective_timeout)
+        overrides["timeout_seconds"] = effective_timeout
 
-    overrides = {}
-
-    # Timeout: per-agent override > global default (builtins only) > config's own value
-    if agent_override is not None and agent_override.timeout_seconds is not None:
-        if agent_override.timeout_seconds != config.timeout_seconds:
-            logger.debug("Subagent '%s': timeout overridden (%ss -> %ss)", name, config.timeout_seconds, agent_override.timeout_seconds)
-            overrides["timeout_seconds"] = agent_override.timeout_seconds
-    elif is_builtin and app_config.timeout_seconds != config.timeout_seconds:
-        logger.debug("Subagent '%s': timeout from global default (%ss -> %ss)", name, config.timeout_seconds, app_config.timeout_seconds)
-        overrides["timeout_seconds"] = app_config.timeout_seconds
-
-    # Max turns: per-agent override > global default (builtins only) > config's own value
-    if agent_override is not None and agent_override.max_turns is not None:
-        if agent_override.max_turns != config.max_turns:
-            logger.debug("Subagent '%s': max_turns overridden (%s -> %s)", name, config.max_turns, agent_override.max_turns)
-            overrides["max_turns"] = agent_override.max_turns
-    elif is_builtin and app_config.max_turns is not None and app_config.max_turns != config.max_turns:
-        logger.debug("Subagent '%s': max_turns from global default (%s -> %s)", name, config.max_turns, app_config.max_turns)
-        overrides["max_turns"] = app_config.max_turns
+    # Max turns: subagents config supplies effective per-agent override or global default
+    # (falls back to ``config.max_turns`` when no override is configured).
+    effective_max_turns = sub_config.get_max_turns_for(name, config.max_turns)
+    if effective_max_turns != config.max_turns:
+        logger.debug("Subagent '%s': max_turns overridden (%s -> %s)", name, config.max_turns, effective_max_turns)
+        overrides["max_turns"] = effective_max_turns
 
     # Model: per-agent override only (no global default for model)
-    effective_model = app_config.get_model_for(name)
+    effective_model = sub_config.get_model_for(name)
     if effective_model is not None and effective_model != config.model:
         logger.debug("Subagent '%s': model overridden (%s -> %s)", name, config.model, effective_model)
         overrides["model"] = effective_model
 
     # Skills: per-agent override only (no global default for skills)
-    effective_skills = app_config.get_skills_for(name)
+    effective_skills = sub_config.get_skills_for(name)
     if effective_skills is not None and effective_skills != config.skills:
         logger.debug("Subagent '%s': skills overridden (%s -> %s)", name, config.skills, effective_skills)
         overrides["skills"] = effective_skills
@@ -110,21 +86,21 @@ def get_subagent_config(name: str) -> SubagentConfig | None:
     return config
 
 
-def list_subagents() -> list[SubagentConfig]:
+def list_subagents(app_config: AppConfig) -> list[SubagentConfig]:
     """List all available subagent configurations (with config.yaml overrides applied).
 
     Returns:
         List of all registered SubagentConfig instances (built-in + custom).
     """
-    configs = []
-    for name in get_subagent_names():
-        config = get_subagent_config(name)
+    configs: list[SubagentConfig] = []
+    for name in get_subagent_names(app_config):
+        config = get_subagent_config(name, app_config)
         if config is not None:
             configs.append(config)
     return configs
 
 
-def get_subagent_names() -> list[str]:
+def get_subagent_names(app_config: AppConfig) -> list[str]:
     """Get all available subagent names (built-in + custom).
 
     Returns:
@@ -132,26 +108,22 @@ def get_subagent_names() -> list[str]:
     """
     names = list(BUILTIN_SUBAGENTS.keys())
 
-    # Merge custom_agents from config.yaml
-    from deerflow.config.subagents_config import get_subagents_app_config
-
-    app_config = get_subagents_app_config()
-    for custom_name in app_config.custom_agents:
+    for custom_name in app_config.subagents.custom_agents:
         if custom_name not in names:
             names.append(custom_name)
 
     return names
 
 
-def get_available_subagent_names() -> list[str]:
+def get_available_subagent_names(app_config: AppConfig) -> list[str]:
     """Get subagent names that should be exposed to the active runtime.
 
     Returns:
         List of subagent names visible to the current sandbox configuration.
     """
-    names = get_subagent_names()
+    names = get_subagent_names(app_config)
     try:
-        host_bash_allowed = is_host_bash_allowed()
+        host_bash_allowed = is_host_bash_allowed(app_config)
     except Exception:
         logger.debug("Could not determine host bash availability; exposing all subagents")
         return names
diff --git a/backend/packages/harness/deerflow/tools/builtins/invoke_acp_agent_tool.py b/backend/packages/harness/deerflow/tools/builtins/invoke_acp_agent_tool.py
index baf7f8ff5..618649020 100644
--- a/backend/packages/harness/deerflow/tools/builtins/invoke_acp_agent_tool.py
+++ b/backend/packages/harness/deerflow/tools/builtins/invoke_acp_agent_tool.py
@@ -33,11 +33,12 @@ def _get_work_dir(thread_id: str | None) -> str:
         An absolute physical filesystem path to use as the working directory.
     """
     from deerflow.config.paths import get_paths
+    from deerflow.runtime.user_context import get_effective_user_id
 
     paths = get_paths()
     if thread_id:
         try:
-            work_dir = paths.acp_workspace_dir(thread_id)
+            work_dir = paths.acp_workspace_dir(thread_id, user_id=get_effective_user_id())
         except ValueError:
             logger.warning("Invalid thread_id %r for ACP workspace, falling back to global", thread_id)
             work_dir = paths.base_dir / "acp-workspace"
diff --git a/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py b/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py
index 13ddd247e..211053f1a 100644
--- a/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py
+++ b/backend/packages/harness/deerflow/tools/builtins/present_file_tool.py
@@ -9,6 +9,7 @@ from langgraph.typing import ContextT
 
 from deerflow.agents.thread_state import ThreadState
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 
 OUTPUTS_VIRTUAL_PREFIX = f"{VIRTUAL_PATH_PREFIX}/outputs"
 
@@ -51,7 +52,7 @@ def _normalize_presented_filepath(
     if runtime.state is None:
         raise ValueError("Thread runtime state is not available")
 
-    thread_id = _get_thread_id(runtime)
+    thread_id = runtime.context.thread_id
     if not thread_id:
         raise ValueError("Thread ID is not available in runtime context or runtime config")
 
@@ -65,7 +66,7 @@ def _normalize_presented_filepath(
     virtual_prefix = VIRTUAL_PATH_PREFIX.lstrip("/")
 
     if stripped == virtual_prefix or stripped.startswith(virtual_prefix + "/"):
-        actual_path = get_paths().resolve_virtual_path(thread_id, filepath)
+        actual_path = get_paths().resolve_virtual_path(thread_id, filepath, user_id=get_effective_user_id())
     else:
         actual_path = Path(filepath).expanduser().resolve()
 
diff --git a/backend/packages/harness/deerflow/tools/builtins/setup_agent_tool.py b/backend/packages/harness/deerflow/tools/builtins/setup_agent_tool.py
index 793ccb13a..32fdc87f5 100644
--- a/backend/packages/harness/deerflow/tools/builtins/setup_agent_tool.py
+++ b/backend/packages/harness/deerflow/tools/builtins/setup_agent_tool.py
@@ -27,7 +27,7 @@ def setup_agent(
         skills: Optional list of skill names this agent should use. None means use all enabled skills, empty list means no skills.
     """
 
-    agent_name: str | None = runtime.context.get("agent_name") if runtime.context else None
+    agent_name: str | None = runtime.context.agent_name
     agent_dir = None
     is_new_dir = False
 
diff --git a/backend/packages/harness/deerflow/tools/builtins/task_tool.py b/backend/packages/harness/deerflow/tools/builtins/task_tool.py
index 59613272c..da356f975 100644
--- a/backend/packages/harness/deerflow/tools/builtins/task_tool.py
+++ b/backend/packages/harness/deerflow/tools/builtins/task_tool.py
@@ -11,6 +11,7 @@ from langgraph.config import get_stream_writer
 from langgraph.typing import ContextT
 
 from deerflow.agents.thread_state import ThreadState
+from deerflow.config.deer_flow_context import resolve_context
 from deerflow.sandbox.security import LOCAL_BASH_SUBAGENT_DISABLED_MESSAGE, is_host_bash_allowed
 from deerflow.subagents import SubagentExecutor, get_available_subagent_names, get_subagent_config
 from deerflow.subagents.executor import SubagentStatus, cleanup_background_task, get_background_task_result, request_cancel_background_task
@@ -74,14 +75,15 @@ async def task_tool(
         subagent_type: The type of subagent to use. ALWAYS PROVIDE THIS PARAMETER THIRD.
         max_turns: Optional maximum number of agent turns. Defaults to subagent's configured max.
     """
-    available_subagent_names = get_available_subagent_names()
+    ctx = resolve_context(runtime)
+    available_subagent_names = get_available_subagent_names(ctx.app_config)
 
     # Get subagent configuration
-    config = get_subagent_config(subagent_type)
+    config = get_subagent_config(subagent_type, ctx.app_config)
     if config is None:
         available = ", ".join(available_subagent_names)
         return f"Error: Unknown subagent type '{subagent_type}'. Available: {available}"
-    if subagent_type == "bash" and not is_host_bash_allowed():
+    if subagent_type == "bash" and not is_host_bash_allowed(ctx.app_config):
         return f"Error: {LOCAL_BASH_SUBAGENT_DISABLED_MESSAGE}"
 
     # Build config overrides
@@ -105,9 +107,7 @@ async def task_tool(
     if runtime is not None:
         sandbox_state = runtime.state.get("sandbox")
         thread_data = runtime.state.get("thread_data")
-        thread_id = runtime.context.get("thread_id") if runtime.context else None
-        if thread_id is None:
-            thread_id = runtime.config.get("configurable", {}).get("thread_id")
+        thread_id = runtime.context.thread_id
 
         # Try to get parent model from configurable
         metadata = runtime.config.get("metadata", {})
@@ -131,12 +131,13 @@ async def task_tool(
     parent_tool_groups = metadata.get("tool_groups")
 
     # Subagents should not have subagent tools enabled (prevent recursive nesting)
-    tools = get_available_tools(model_name=parent_model, groups=parent_tool_groups, subagent_enabled=False)
+    tools = get_available_tools(model_name=parent_model, groups=parent_tool_groups, subagent_enabled=False, app_config=ctx.app_config)
 
     # Create executor
     executor = SubagentExecutor(
         config=config,
         tools=tools,
+        app_config=ctx.app_config,
         parent_model=parent_model,
         sandbox_state=sandbox_state,
         thread_data=thread_data,
diff --git a/backend/packages/harness/deerflow/tools/skill_manage_tool.py b/backend/packages/harness/deerflow/tools/skill_manage_tool.py
index 3b7a109cc..920883e8d 100644
--- a/backend/packages/harness/deerflow/tools/skill_manage_tool.py
+++ b/backend/packages/harness/deerflow/tools/skill_manage_tool.py
@@ -5,7 +5,7 @@ from __future__ import annotations
 import asyncio
 import logging
 import shutil
-from typing import Any
+from typing import TYPE_CHECKING, Any
 from weakref import WeakValueDictionary
 
 from langchain.tools import ToolRuntime, tool
@@ -13,6 +13,9 @@ from langgraph.typing import ContextT
 
 from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async
 from deerflow.agents.thread_state import ThreadState
+
+if TYPE_CHECKING:
+    from deerflow.config.app_config import AppConfig
 from deerflow.mcp.tools import _make_sync_tool_wrapper
 from deerflow.skills.manager import (
     append_history,
@@ -45,9 +48,7 @@ def _get_lock(name: str) -> asyncio.Lock:
 def _get_thread_id(runtime: ToolRuntime[ContextT, ThreadState] | None) -> str | None:
     if runtime is None:
         return None
-    if runtime.context and runtime.context.get("thread_id"):
-        return runtime.context.get("thread_id")
-    return runtime.config.get("configurable", {}).get("thread_id")
+    return runtime.context.thread_id or None
 
 
 def _history_record(*, action: str, file_path: str, prev_content: str | None, new_content: str | None, thread_id: str | None, scanner: dict[str, Any]) -> dict[str, Any]:
@@ -62,8 +63,8 @@ def _history_record(*, action: str, file_path: str, prev_content: str | None, ne
     }
 
 
-async def _scan_or_raise(content: str, *, executable: bool, location: str) -> dict[str, str]:
-    result = await scan_skill_content(content, executable=executable, location=location)
+async def _scan_or_raise(app_config: "AppConfig", content: str, *, executable: bool, location: str) -> dict[str, str]:
+    result = await scan_skill_content(app_config, content, executable=executable, location=location)
     if result.decision == "block":
         raise ValueError(f"Security scan blocked the write: {result.reason}")
     if executable and result.decision != "allow":
@@ -96,50 +97,55 @@ async def _skill_manage_impl(
         replace: Replacement text for patch.
         expected_count: Optional expected number of replacements for patch.
     """
+    from deerflow.config.deer_flow_context import resolve_context
+
     name = validate_skill_name(name)
     lock = _get_lock(name)
     thread_id = _get_thread_id(runtime)
+    app_config = resolve_context(runtime).app_config
 
     async with lock:
         if action == "create":
-            if await _to_thread(custom_skill_exists, name):
+            if await _to_thread(custom_skill_exists, name, app_config):
                 raise ValueError(f"Custom skill '{name}' already exists.")
             if content is None:
                 raise ValueError("content is required for create.")
             await _to_thread(validate_skill_markdown_content, name, content)
-            scan = await _scan_or_raise(content, executable=False, location=f"{name}/SKILL.md")
-            skill_file = await _to_thread(get_custom_skill_file, name)
+            scan = await _scan_or_raise(app_config, content, executable=False, location=f"{name}/SKILL.md")
+            skill_file = await _to_thread(get_custom_skill_file, name, app_config)
             await _to_thread(atomic_write, skill_file, content)
             await _to_thread(
                 append_history,
                 name,
                 _history_record(action="create", file_path="SKILL.md", prev_content=None, new_content=content, thread_id=thread_id, scanner=scan),
+                app_config,
             )
-            await refresh_skills_system_prompt_cache_async()
+            await refresh_skills_system_prompt_cache_async(app_config)
             return f"Created custom skill '{name}'."
 
         if action == "edit":
-            await _to_thread(ensure_custom_skill_is_editable, name)
+            await _to_thread(ensure_custom_skill_is_editable, name, app_config)
             if content is None:
                 raise ValueError("content is required for edit.")
             await _to_thread(validate_skill_markdown_content, name, content)
-            scan = await _scan_or_raise(content, executable=False, location=f"{name}/SKILL.md")
-            skill_file = await _to_thread(get_custom_skill_file, name)
+            scan = await _scan_or_raise(app_config, content, executable=False, location=f"{name}/SKILL.md")
+            skill_file = await _to_thread(get_custom_skill_file, name, app_config)
             prev_content = await _to_thread(skill_file.read_text, encoding="utf-8")
             await _to_thread(atomic_write, skill_file, content)
             await _to_thread(
                 append_history,
                 name,
                 _history_record(action="edit", file_path="SKILL.md", prev_content=prev_content, new_content=content, thread_id=thread_id, scanner=scan),
+                app_config,
             )
-            await refresh_skills_system_prompt_cache_async()
+            await refresh_skills_system_prompt_cache_async(app_config)
             return f"Updated custom skill '{name}'."
 
         if action == "patch":
-            await _to_thread(ensure_custom_skill_is_editable, name)
+            await _to_thread(ensure_custom_skill_is_editable, name, app_config)
             if find is None or replace is None:
                 raise ValueError("find and replace are required for patch.")
-            skill_file = await _to_thread(get_custom_skill_file, name)
+            skill_file = await _to_thread(get_custom_skill_file, name, app_config)
             prev_content = await _to_thread(skill_file.read_text, encoding="utf-8")
             occurrences = prev_content.count(find)
             if occurrences == 0:
@@ -149,51 +155,54 @@ async def _skill_manage_impl(
             replacement_count = expected_count if expected_count is not None else 1
             new_content = prev_content.replace(find, replace, replacement_count)
             await _to_thread(validate_skill_markdown_content, name, new_content)
-            scan = await _scan_or_raise(new_content, executable=False, location=f"{name}/SKILL.md")
+            scan = await _scan_or_raise(app_config, new_content, executable=False, location=f"{name}/SKILL.md")
             await _to_thread(atomic_write, skill_file, new_content)
             await _to_thread(
                 append_history,
                 name,
                 _history_record(action="patch", file_path="SKILL.md", prev_content=prev_content, new_content=new_content, thread_id=thread_id, scanner=scan),
+                app_config,
             )
-            await refresh_skills_system_prompt_cache_async()
+            await refresh_skills_system_prompt_cache_async(app_config)
             return f"Patched custom skill '{name}' ({replacement_count} replacement(s) applied, {occurrences} match(es) found)."
 
         if action == "delete":
-            await _to_thread(ensure_custom_skill_is_editable, name)
-            skill_dir = await _to_thread(get_custom_skill_dir, name)
-            prev_content = await _to_thread(read_custom_skill_content, name)
+            await _to_thread(ensure_custom_skill_is_editable, name, app_config)
+            skill_dir = await _to_thread(get_custom_skill_dir, name, app_config)
+            prev_content = await _to_thread(read_custom_skill_content, name, app_config)
             await _to_thread(
                 append_history,
                 name,
                 _history_record(action="delete", file_path="SKILL.md", prev_content=prev_content, new_content=None, thread_id=thread_id, scanner={"decision": "allow", "reason": "Deletion requested."}),
+                app_config,
             )
             await _to_thread(shutil.rmtree, skill_dir)
-            await refresh_skills_system_prompt_cache_async()
+            await refresh_skills_system_prompt_cache_async(app_config)
             return f"Deleted custom skill '{name}'."
 
         if action == "write_file":
-            await _to_thread(ensure_custom_skill_is_editable, name)
+            await _to_thread(ensure_custom_skill_is_editable, name, app_config)
             if path is None or content is None:
                 raise ValueError("path and content are required for write_file.")
-            target = await _to_thread(ensure_safe_support_path, name, path)
+            target = await _to_thread(ensure_safe_support_path, name, path, app_config)
             exists = await _to_thread(target.exists)
             prev_content = await _to_thread(target.read_text, encoding="utf-8") if exists else None
             executable = "scripts/" in path or path.startswith("scripts/")
-            scan = await _scan_or_raise(content, executable=executable, location=f"{name}/{path}")
+            scan = await _scan_or_raise(app_config, content, executable=executable, location=f"{name}/{path}")
             await _to_thread(atomic_write, target, content)
             await _to_thread(
                 append_history,
                 name,
                 _history_record(action="write_file", file_path=path, prev_content=prev_content, new_content=content, thread_id=thread_id, scanner=scan),
+                app_config,
             )
             return f"Wrote '{path}' for custom skill '{name}'."
 
         if action == "remove_file":
-            await _to_thread(ensure_custom_skill_is_editable, name)
+            await _to_thread(ensure_custom_skill_is_editable, name, app_config)
             if path is None:
                 raise ValueError("path is required for remove_file.")
-            target = await _to_thread(ensure_safe_support_path, name, path)
+            target = await _to_thread(ensure_safe_support_path, name, path, app_config)
             if not await _to_thread(target.exists):
                 raise FileNotFoundError(f"Supporting file '{path}' not found for skill '{name}'.")
             prev_content = await _to_thread(target.read_text, encoding="utf-8")
@@ -202,10 +211,11 @@ async def _skill_manage_impl(
                 append_history,
                 name,
                 _history_record(action="remove_file", file_path=path, prev_content=prev_content, new_content=None, thread_id=thread_id, scanner={"decision": "allow", "reason": "Deletion requested."}),
+                app_config,
             )
             return f"Removed '{path}' from custom skill '{name}'."
 
-        if await _to_thread(public_skill_exists, name):
+        if await _to_thread(public_skill_exists, name, app_config):
             raise ValueError(f"'{name}' is a built-in skill. To customise it, create a new skill with the same name under skills/custom/.")
         raise ValueError(f"Unsupported action '{action}'.")
 
diff --git a/backend/packages/harness/deerflow/tools/tools.py b/backend/packages/harness/deerflow/tools/tools.py
index 6b027e54e..c2c7db599 100644
--- a/backend/packages/harness/deerflow/tools/tools.py
+++ b/backend/packages/harness/deerflow/tools/tools.py
@@ -2,7 +2,7 @@ import logging
 
 from langchain.tools import BaseTool
 
-from deerflow.config import get_app_config
+from deerflow.config.app_config import AppConfig
 from deerflow.reflection import resolve_variable
 from deerflow.sandbox.security import is_host_bash_allowed
 from deerflow.tools.builtins import ask_clarification_tool, present_file_tool, task_tool, view_image_tool
@@ -37,6 +37,8 @@ def get_available_tools(
     include_mcp: bool = True,
     model_name: str | None = None,
     subagent_enabled: bool = False,
+    *,
+    app_config: AppConfig,
 ) -> list[BaseTool]:
     """Get all available tools from config.
 
@@ -48,11 +50,12 @@ def get_available_tools(
         include_mcp: Whether to include tools from MCP servers (default: True).
         model_name: Optional model name to determine if vision tools should be included.
         subagent_enabled: Whether to include subagent tools (task, task_status).
+        app_config: Application config — required.
 
     Returns:
         List of available tools.
     """
-    config = get_app_config()
+    config = app_config
     tool_configs = [tool for tool in config.tools if groups is None or tool.group in groups]
 
     # Do not expose host bash by default when LocalSandboxProvider is active.
@@ -138,10 +141,9 @@ def get_available_tools(
     # Add invoke_acp_agent tool if any ACP agents are configured
     acp_tools: list[BaseTool] = []
     try:
-        from deerflow.config.acp_config import get_acp_agents
         from deerflow.tools.builtins.invoke_acp_agent_tool import build_invoke_acp_agent_tool
 
-        acp_agents = get_acp_agents()
+        acp_agents = config.acp_agents
         if acp_agents:
             acp_tools.append(build_invoke_acp_agent_tool(acp_agents))
             logger.info(f"Including invoke_acp_agent tool ({len(acp_agents)} agent(s): {list(acp_agents.keys())})")
diff --git a/backend/packages/harness/deerflow/uploads/manager.py b/backend/packages/harness/deerflow/uploads/manager.py
index 8c60399e7..c36151b38 100644
--- a/backend/packages/harness/deerflow/uploads/manager.py
+++ b/backend/packages/harness/deerflow/uploads/manager.py
@@ -10,6 +10,7 @@ from pathlib import Path
 from urllib.parse import quote
 
 from deerflow.config.paths import VIRTUAL_PATH_PREFIX, get_paths
+from deerflow.runtime.user_context import get_effective_user_id
 
 
 class PathTraversalError(ValueError):
@@ -33,7 +34,7 @@ def validate_thread_id(thread_id: str) -> None:
 def get_uploads_dir(thread_id: str) -> Path:
     """Return the uploads directory path for a thread (no side effects)."""
     validate_thread_id(thread_id)
-    return get_paths().sandbox_uploads_dir(thread_id)
+    return get_paths().sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
 
 
 def ensure_uploads_dir(thread_id: str) -> Path:
diff --git a/backend/packages/harness/deerflow/utils/file_conversion.py b/backend/packages/harness/deerflow/utils/file_conversion.py
index f51b47caa..a694c29a8 100644
--- a/backend/packages/harness/deerflow/utils/file_conversion.py
+++ b/backend/packages/harness/deerflow/utils/file_conversion.py
@@ -19,8 +19,6 @@ import logging
 import re
 from pathlib import Path
 
-from deerflow.config.app_config import get_app_config
-
 logger = logging.getLogger(__name__)
 
 # File extensions that should be converted to markdown
@@ -135,7 +133,7 @@ def _do_convert(file_path: Path, pdf_converter: str) -> str:
     return _convert_with_markitdown(file_path)
 
 
-async def convert_file_to_markdown(file_path: Path) -> Path | None:
+async def convert_file_to_markdown(file_path: Path, app_config: object | None = None) -> Path | None:
     """Convert a supported document file to Markdown.
 
     PDF files are handled with a two-converter strategy (see module docstring).
@@ -144,12 +142,14 @@ async def convert_file_to_markdown(file_path: Path) -> Path | None:
 
     Args:
         file_path: Path to the file to convert.
+        app_config: Optional AppConfig (for pdf_converter preference). When
+            omitted, defaults to ``auto``.
 
     Returns:
         Path to the generated .md file, or None if conversion failed.
     """
     try:
-        pdf_converter = _get_pdf_converter()
+        pdf_converter = _get_pdf_converter(app_config)
         file_size = file_path.stat().st_size
 
         if file_size > _ASYNC_THRESHOLD_BYTES:
@@ -288,28 +288,20 @@ def extract_outline(md_path: Path) -> list[dict]:
     return outline
 
 
-def _get_uploads_config_value(key: str, default: object) -> object:
-    """Read a value from the uploads config, supporting dict and attribute access."""
-    cfg = get_app_config()
-    uploads_cfg = getattr(cfg, "uploads", None)
-    if isinstance(uploads_cfg, dict):
-        return uploads_cfg.get(key, default)
-    return getattr(uploads_cfg, key, default)
-
-
-def _get_pdf_converter() -> str:
+def _get_pdf_converter(app_config: object | None) -> str:
     """Read pdf_converter setting from app config, defaulting to 'auto'.
 
     Normalizes the value to lowercase and validates it against the allowed set
     so that values like 'AUTO' or 'MarkItDown' from config.yaml don't silently
     fall through to unexpected behaviour.
     """
-    try:
-        raw = str(_get_uploads_config_value("pdf_converter", "auto")).strip().lower()
-        if raw not in _ALLOWED_PDF_CONVERTERS:
-            logger.warning("Invalid pdf_converter value %r; falling back to 'auto'", raw)
-            return "auto"
-        return raw
-    except Exception:
-        pass
-    return "auto"
+    if app_config is None:
+        return "auto"
+    uploads_cfg = getattr(app_config, "uploads", None)
+    if uploads_cfg is None:
+        return "auto"
+    raw = str(getattr(uploads_cfg, "pdf_converter", "auto")).strip().lower()
+    if raw not in _ALLOWED_PDF_CONVERTERS:
+        logger.warning("Invalid pdf_converter value %r; falling back to 'auto'", raw)
+        return "auto"
+    return raw
diff --git a/backend/packages/harness/pyproject.toml b/backend/packages/harness/pyproject.toml
index e7a81ff7b..87e571dd3 100644
--- a/backend/packages/harness/pyproject.toml
+++ b/backend/packages/harness/pyproject.toml
@@ -33,10 +33,19 @@ dependencies = [
     "langchain-google-genai>=4.2.1",
     "langgraph-checkpoint-sqlite>=3.0.3",
     "langgraph-sdk>=0.1.51",
+    "sqlalchemy[asyncio]>=2.0,<3.0",
+    "aiosqlite>=0.19",
+    "alembic>=1.13",
 ]
 
 [project.optional-dependencies]
 ollama = ["langchain-ollama>=0.3.0"]
+postgres = [
+    "asyncpg>=0.29",
+    "langgraph-checkpoint-postgres>=3.0.5",
+    "psycopg[binary]>=3.3.3",
+    "psycopg-pool>=3.3.0",
+]
 pymupdf = ["pymupdf4llm>=0.0.17"]
 
 [build-system]
diff --git a/backend/pyproject.toml b/backend/pyproject.toml
index fe280d701..fbda138d5 100644
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@@ -17,8 +17,14 @@ dependencies = [
     "langgraph-sdk>=0.1.51",
     "markdown-to-mrkdwn>=0.3.1",
     "wecom-aibot-python-sdk>=0.1.6",
+    "bcrypt>=4.0.0",
+    "pyjwt>=2.9.0",
+    "email-validator>=2.0.0",
 ]
 
+[project.optional-dependencies]
+postgres = ["deerflow-harness[postgres]"]
+
 [dependency-groups]
 dev = [
     "prompt-toolkit>=3.0.0",
@@ -27,6 +33,11 @@ dev = [
     "ruff>=0.14.11",
 ]
 
+[tool.pytest.ini_options]
+markers = [
+    "no_auto_user: disable the conftest autouse contextvar fixture for this test",
+]
+
 [tool.uv.workspace]
 members = ["packages/harness"]
 
diff --git a/backend/scripts/migrate_user_isolation.py b/backend/scripts/migrate_user_isolation.py
new file mode 100644
index 000000000..4d37a0d1e
--- /dev/null
+++ b/backend/scripts/migrate_user_isolation.py
@@ -0,0 +1,160 @@
+"""One-time migration: move legacy thread dirs and memory into per-user layout.
+
+Usage:
+    PYTHONPATH=. python scripts/migrate_user_isolation.py [--dry-run]
+
+The script is idempotent — re-running it after a successful migration is a no-op.
+"""
+import argparse
+import json
+import logging
+import shutil
+from pathlib import Path
+
+from deerflow.config.paths import Paths, get_paths
+
+logger = logging.getLogger(__name__)
+
+
+def migrate_thread_dirs(
+    paths: Paths,
+    thread_owner_map: dict[str, str],
+    *,
+    dry_run: bool = False,
+) -> list[dict]:
+    """Move legacy thread directories into per-user layout.
+
+    Args:
+        paths: Paths instance.
+        thread_owner_map: Mapping of thread_id -> user_id from threads_meta table.
+        dry_run: If True, only log what would happen.
+
+    Returns:
+        List of migration report entries.
+    """
+    report: list[dict] = []
+    legacy_threads = paths.base_dir / "threads"
+    if not legacy_threads.exists():
+        logger.info("No legacy threads directory found — nothing to migrate.")
+        return report
+
+    for thread_dir in sorted(legacy_threads.iterdir()):
+        if not thread_dir.is_dir():
+            continue
+        thread_id = thread_dir.name
+        user_id = thread_owner_map.get(thread_id, "default")
+        dest = paths.base_dir / "users" / user_id / "threads" / thread_id
+
+        entry = {"thread_id": thread_id, "user_id": user_id, "action": ""}
+
+        if dest.exists():
+            conflicts_dir = paths.base_dir / "migration-conflicts" / thread_id
+            entry["action"] = f"conflict -> {conflicts_dir}"
+            if not dry_run:
+                conflicts_dir.parent.mkdir(parents=True, exist_ok=True)
+                shutil.move(str(thread_dir), str(conflicts_dir))
+            logger.warning("Conflict for thread %s: moved to %s", thread_id, conflicts_dir)
+        else:
+            entry["action"] = f"moved -> {dest}"
+            if not dry_run:
+                dest.parent.mkdir(parents=True, exist_ok=True)
+                shutil.move(str(thread_dir), str(dest))
+            logger.info("Migrated thread %s -> user %s", thread_id, user_id)
+
+        report.append(entry)
+
+    # Clean up empty legacy threads dir
+    if not dry_run and legacy_threads.exists() and not any(legacy_threads.iterdir()):
+        legacy_threads.rmdir()
+
+    return report
+
+
+def migrate_memory(
+    paths: Paths,
+    user_id: str = "default",
+    *,
+    dry_run: bool = False,
+) -> None:
+    """Move legacy global memory.json into per-user layout.
+
+    Args:
+        paths: Paths instance.
+        user_id: Target user to receive the legacy memory.
+        dry_run: If True, only log.
+    """
+    legacy_mem = paths.base_dir / "memory.json"
+    if not legacy_mem.exists():
+        logger.info("No legacy memory.json found — nothing to migrate.")
+        return
+
+    dest = paths.user_memory_file(user_id)
+    if dest.exists():
+        legacy_backup = paths.base_dir / "memory.legacy.json"
+        logger.warning("Destination %s exists; renaming legacy to %s", dest, legacy_backup)
+        if not dry_run:
+            legacy_mem.rename(legacy_backup)
+        return
+
+    logger.info("Migrating memory.json -> %s", dest)
+    if not dry_run:
+        dest.parent.mkdir(parents=True, exist_ok=True)
+        shutil.move(str(legacy_mem), str(dest))
+
+
+def _build_owner_map_from_db(paths: Paths) -> dict[str, str]:
+    """Query threads_meta table for thread_id -> user_id mapping.
+
+    Uses raw sqlite3 to avoid async dependencies.
+    """
+    import sqlite3
+
+    db_path = paths.base_dir / "deer-flow.db"
+    if not db_path.exists():
+        logger.info("No database found at %s — using empty owner map.", db_path)
+        return {}
+
+    conn = sqlite3.connect(str(db_path))
+    try:
+        cursor = conn.execute("SELECT thread_id, user_id FROM threads_meta WHERE user_id IS NOT NULL")
+        return {row[0]: row[1] for row in cursor.fetchall()}
+    except sqlite3.OperationalError as e:
+        logger.warning("Failed to query threads_meta: %s", e)
+        return {}
+    finally:
+        conn.close()
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Migrate DeerFlow data to per-user layout")
+    parser.add_argument("--dry-run", action="store_true", help="Log actions without making changes")
+    args = parser.parse_args()
+
+    logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+
+    paths = get_paths()
+    logger.info("Base directory: %s", paths.base_dir)
+    logger.info("Dry run: %s", args.dry_run)
+
+    owner_map = _build_owner_map_from_db(paths)
+    logger.info("Found %d thread ownership records in DB", len(owner_map))
+
+    report = migrate_thread_dirs(paths, owner_map, dry_run=args.dry_run)
+    migrate_memory(paths, user_id="default", dry_run=args.dry_run)
+
+    if report:
+        logger.info("Migration report:")
+        for entry in report:
+            logger.info("  thread=%s user=%s action=%s", entry["thread_id"], entry["user_id"], entry["action"])
+    else:
+        logger.info("No threads to migrate.")
+
+    unowned = [e for e in report if e["user_id"] == "default"]
+    if unowned:
+        logger.warning("%d thread(s) had no owner and were assigned to 'default':", len(unowned))
+        for e in unowned:
+            logger.warning("  %s", e["thread_id"])
+
+
+if __name__ == "__main__":
+    main()
diff --git a/backend/tests/_router_auth_helpers.py b/backend/tests/_router_auth_helpers.py
new file mode 100644
index 000000000..a7ce60468
--- /dev/null
+++ b/backend/tests/_router_auth_helpers.py
@@ -0,0 +1,134 @@
+"""Helpers for router-level tests that need a stubbed auth context.
+
+The production gateway runs ``AuthMiddleware`` (validates the JWT cookie)
+ahead of every router, plus ``@require_permission(owner_check=True)``
+decorators that read ``request.state.auth`` and call
+``thread_store.check_access``. Router-level unit tests construct
+**bare** FastAPI apps that include only one router — they have neither
+the auth middleware nor a real thread_store, so the decorators raise
+401 (TestClient path) or ValueError (direct-call path).
+
+This module provides two surfaces:
+
+1. :func:`make_authed_test_app` — wraps ``FastAPI()`` with a tiny
+   ``BaseHTTPMiddleware`` that stamps a fake user / AuthContext on every
+   request, plus a permissive ``thread_store`` mock on
+   ``app.state``. Use from TestClient-based router tests.
+
+2. :func:`call_unwrapped` — invokes the underlying function bypassing
+   the ``@require_permission`` decorator chain by walking ``__wrapped__``.
+   Use from direct-call tests that previously imported the route
+   function and called it positionally.
+
+Both helpers are deliberately permissive: they never deny a request.
+Tests that want to verify the *auth boundary itself* (e.g.
+``test_auth_middleware``, ``test_auth_type_system``) build their own
+apps with the real middleware — those should not use this module.
+"""
+
+from __future__ import annotations
+
+from collections.abc import Callable
+from typing import ParamSpec, TypeVar
+from unittest.mock import AsyncMock, MagicMock
+from uuid import uuid4
+
+from fastapi import FastAPI, Request, Response
+from starlette.middleware.base import BaseHTTPMiddleware
+from starlette.types import ASGIApp
+
+from app.gateway.auth.models import User
+from app.gateway.authz import AuthContext, Permissions
+
+# Default permission set granted to the stub user. Mirrors `_ALL_PERMISSIONS`
+# in authz.py — kept inline so the tests don't import a private symbol.
+_STUB_PERMISSIONS: list[str] = [
+    Permissions.THREADS_READ,
+    Permissions.THREADS_WRITE,
+    Permissions.THREADS_DELETE,
+    Permissions.RUNS_CREATE,
+    Permissions.RUNS_READ,
+    Permissions.RUNS_CANCEL,
+]
+
+
+def _make_stub_user() -> User:
+    """A deterministic test user — same shape as production, fresh UUID."""
+    return User(
+        email="router-test@example.com",
+        password_hash="x",
+        system_role="user",
+        id=uuid4(),
+    )
+
+
+class _StubAuthMiddleware(BaseHTTPMiddleware):
+    """Stamp a fake user / AuthContext onto every request.
+
+    Mirrors what production ``AuthMiddleware`` does after the JWT decode
+    + DB lookup short-circuit, so ``@require_permission`` finds an
+    authenticated context and skips its own re-authentication path.
+    """
+
+    def __init__(self, app: ASGIApp, user_factory: Callable[[], User]) -> None:
+        super().__init__(app)
+        self._user_factory = user_factory
+
+    async def dispatch(self, request: Request, call_next: Callable) -> Response:
+        user = self._user_factory()
+        request.state.user = user
+        request.state.auth = AuthContext(user=user, permissions=list(_STUB_PERMISSIONS))
+        return await call_next(request)
+
+
+def make_authed_test_app(
+    *,
+    user_factory: Callable[[], User] | None = None,
+    owner_check_passes: bool = True,
+) -> FastAPI:
+    """Build a FastAPI test app with stub auth + permissive thread_store.
+
+    Args:
+        user_factory: Override the default test user. Must return a fully
+            populated :class:`User`. Useful for cross-user isolation tests
+            that need a stable id across requests.
+        owner_check_passes: When True (default), ``thread_store.check_access``
+            returns True for every call so ``@require_permission(owner_check=True)``
+            never blocks the route under test. Pass False to verify that
+            permission failures surface correctly.
+
+    Returns:
+        A ``FastAPI`` app with the stub middleware installed and
+        ``app.state.thread_store`` set to a permissive mock. The
+        caller is still responsible for ``app.include_router(...)``.
+    """
+    factory = user_factory or _make_stub_user
+    app = FastAPI()
+    app.add_middleware(_StubAuthMiddleware, user_factory=factory)
+
+    repo = MagicMock()
+    repo.check_access = AsyncMock(return_value=owner_check_passes)
+    app.state.thread_store = repo
+
+    return app
+
+
+_P = ParamSpec("_P")
+_R = TypeVar("_R")
+
+
+def call_unwrapped(decorated: Callable[_P, _R], /, *args: _P.args, **kwargs: _P.kwargs) -> _R:
+    """Invoke the underlying function of a ``@require_permission``-decorated route.
+
+    ``functools.wraps`` sets ``__wrapped__`` on each layer; we walk all
+    the way down to the original handler, bypassing every authz +
+    require_auth wrapper. Use from tests that need to call route
+    functions directly (without TestClient) and don't want to construct
+    a fake ``Request`` just to satisfy the decorator. The ``ParamSpec``
+    propagates the wrapped route's signature so call sites still get
+    parameter checking despite the unwrapping.
+    """
+    fn: Callable = decorated
+    while hasattr(fn, "__wrapped__"):
+        fn = fn.__wrapped__  # type: ignore[attr-defined]
+    return fn(*args, **kwargs)
diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py
index 997f42577..63d23824b 100644
--- a/backend/tests/conftest.py
+++ b/backend/tests/conftest.py
@@ -7,6 +7,7 @@ issues when unit-testing lightweight config/registry code in isolation.
 import importlib.util
 import sys
 from pathlib import Path
+from types import SimpleNamespace
 from unittest.mock import MagicMock
 
 import pytest
@@ -53,3 +54,71 @@ def provisioner_module():
     module = importlib.util.module_from_spec(spec)
     spec.loader.exec_module(module)
     return module
+
+
+# ---------------------------------------------------------------------------
+# Auto-set user context for every test unless marked no_auto_user
+# ---------------------------------------------------------------------------
+#
+# Repository methods read ``user_id`` from a contextvar by default
+# (see ``deerflow.runtime.user_context``). Without this fixture, every
+# pre-existing persistence test would raise RuntimeError because the
+# contextvar is unset. The fixture sets a default test user on every
+# test; tests that explicitly want to verify behaviour *without* a user
+# context should mark themselves ``@pytest.mark.no_auto_user``.
+
+
+@pytest.fixture(autouse=True)
+def _auto_app_config_from_file(monkeypatch, request):
+    """Replace ``AppConfig.from_file`` with a minimal factory so tests that
+    (directly or indirectly, e.g. via the LangGraph Server bootstrap path in
+    ``make_lead_agent``) load AppConfig from disk do not need a real
+    ``config.yaml`` on the filesystem.
+
+    Tests that want to verify the real ``from_file`` behaviour should mark
+    themselves with ``@pytest.mark.real_from_file``.
+    """
+    if request.node.get_closest_marker("real_from_file"):
+        yield
+        return
+    try:
+        from deerflow.config.app_config import AppConfig
+        from deerflow.config.sandbox_config import SandboxConfig
+    except ImportError:
+        yield
+        return
+
+    def _fake_from_file(config_path: str | None = None) -> AppConfig:  # noqa: ARG001
+        return AppConfig(sandbox=SandboxConfig(use="test"))
+
+    monkeypatch.setattr(AppConfig, "from_file", _fake_from_file)
+    yield
+
+
+@pytest.fixture(autouse=True)
+def _auto_user_context(request):
+    """Inject a default ``test-user-autouse`` into the contextvar.
+
+    Opt-out via ``@pytest.mark.no_auto_user``. Uses lazy import so that
+    tests which don't touch the persistence layer never pay the cost
+    of importing runtime.user_context.
+    """
+    if request.node.get_closest_marker("no_auto_user"):
+        yield
+        return
+
+    try:
+        from deerflow.runtime.user_context import (
+            reset_current_user,
+            set_current_user,
+        )
+    except ImportError:
+        yield
+        return
+
+    user = SimpleNamespace(id="test-user-autouse", email="test@local")
+    token = set_current_user(user)
+    try:
+        yield
+    finally:
+        reset_current_user(token)
diff --git a/backend/tests/test_acp_config.py b/backend/tests/test_acp_config.py
index 16fbfad16..f958fa047 100644
--- a/backend/tests/test_acp_config.py
+++ b/backend/tests/test_acp_config.py
@@ -2,21 +2,27 @@
 
 import json
 
+import pytest
 import pytest
 import yaml
+
+pytestmark = pytest.mark.real_from_file
 from pydantic import ValidationError
 
-from deerflow.config.acp_config import ACPAgentConfig, get_acp_agents, load_acp_config_from_dict
+from deerflow.config.acp_config import ACPAgentConfig
 from deerflow.config.app_config import AppConfig
+from deerflow.config.sandbox_config import SandboxConfig
 
 
-def setup_function():
-    """Reset ACP config before each test."""
-    load_acp_config_from_dict({})
+def _make_config(acp_agents: dict | None = None) -> AppConfig:
+    return AppConfig(
+        sandbox=SandboxConfig(use="test"),
+        acp_agents={name: ACPAgentConfig(**cfg) for name, cfg in (acp_agents or {}).items()},
+    )
 
 
-def test_load_acp_config_sets_agents():
-    load_acp_config_from_dict(
+def test_acp_agents_via_app_config():
+    cfg = _make_config(
         {
             "claude_code": {
                 "command": "claude-code-acp",
@@ -26,39 +32,33 @@ def test_load_acp_config_sets_agents():
             }
         }
     )
-    agents = get_acp_agents()
+    agents = cfg.acp_agents
     assert "claude_code" in agents
     assert agents["claude_code"].command == "claude-code-acp"
     assert agents["claude_code"].description == "Claude Code for coding tasks"
     assert agents["claude_code"].model is None
 
 
-def test_load_acp_config_multiple_agents():
-    load_acp_config_from_dict(
+def test_multiple_agents():
+    cfg = _make_config(
         {
             "claude_code": {"command": "claude-code-acp", "args": [], "description": "Claude Code"},
             "codex": {"command": "codex-acp", "args": ["--flag"], "description": "Codex CLI"},
         }
     )
-    agents = get_acp_agents()
+    agents = cfg.acp_agents
     assert len(agents) == 2
     assert agents["codex"].args == ["--flag"]
 
 
-def test_load_acp_config_empty_clears_agents():
-    load_acp_config_from_dict({"agent": {"command": "cmd", "args": [], "description": "desc"}})
-    assert len(get_acp_agents()) == 1
-
-    load_acp_config_from_dict({})
-    assert len(get_acp_agents()) == 0
+def test_empty_acp_agents():
+    cfg = _make_config({})
+    assert cfg.acp_agents == {}
 
 
-def test_load_acp_config_none_clears_agents():
-    load_acp_config_from_dict({"agent": {"command": "cmd", "args": [], "description": "desc"}})
-    assert len(get_acp_agents()) == 1
-
-    load_acp_config_from_dict(None)
-    assert get_acp_agents() == {}
+def test_default_acp_agents_empty():
+    cfg = AppConfig(sandbox=SandboxConfig(use="test"))
+    assert cfg.acp_agents == {}
 
 
 def test_acp_agent_config_defaults():
@@ -79,8 +79,8 @@ def test_acp_agent_config_env_default_is_empty():
     assert cfg.env == {}
 
 
-def test_load_acp_config_preserves_env():
-    load_acp_config_from_dict(
+def test_acp_agent_preserves_env():
+    cfg = _make_config(
         {
             "codex": {
                 "command": "codex-acp",
@@ -90,8 +90,7 @@ def test_load_acp_config_preserves_env():
             }
         }
     )
-    cfg = get_acp_agents()["codex"]
-    assert cfg.env == {"OPENAI_API_KEY": "$OPENAI_API_KEY", "FOO": "bar"}
+    assert cfg.acp_agents["codex"].env == {"OPENAI_API_KEY": "$OPENAI_API_KEY", "FOO": "bar"}
 
 
 def test_acp_agent_config_with_model():
@@ -115,13 +114,7 @@ def test_acp_agent_config_missing_description_raises():
         ACPAgentConfig(command="my-agent")
 
 
-def test_get_acp_agents_returns_empty_by_default():
-    """After clearing, should return empty dict."""
-    load_acp_config_from_dict({})
-    assert get_acp_agents() == {}
-
-
-def test_app_config_reload_without_acp_agents_clears_previous_state(tmp_path, monkeypatch):
+def test_app_config_from_file_with_acp_agents(tmp_path, monkeypatch):
     config_path = tmp_path / "config.yaml"
     extensions_path = tmp_path / "extensions_config.json"
     extensions_path.write_text(json.dumps({"mcpServers": {}, "skills": {}}), encoding="utf-8")
@@ -157,9 +150,9 @@ def test_app_config_reload_without_acp_agents_clears_previous_state(tmp_path, mo
     monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
 
     config_path.write_text(yaml.safe_dump(config_with_acp), encoding="utf-8")
-    AppConfig.from_file(str(config_path))
-    assert set(get_acp_agents()) == {"codex"}
+    app = AppConfig.from_file(str(config_path))
+    assert set(app.acp_agents) == {"codex"}
 
     config_path.write_text(yaml.safe_dump(config_without_acp), encoding="utf-8")
-    AppConfig.from_file(str(config_path))
-    assert get_acp_agents() == {}
+    app = AppConfig.from_file(str(config_path))
+    assert app.acp_agents == {}
diff --git a/backend/tests/test_aio_sandbox_provider.py b/backend/tests/test_aio_sandbox_provider.py
index e797cf7e3..c7984531f 100644
--- a/backend/tests/test_aio_sandbox_provider.py
+++ b/backend/tests/test_aio_sandbox_provider.py
@@ -57,6 +57,7 @@ def test_get_thread_mounts_includes_acp_workspace(tmp_path, monkeypatch):
     """_get_thread_mounts must include /mnt/acp-workspace (read-only) for docker sandbox."""
     aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
     monkeypatch.setattr(aio_mod, "get_paths", lambda: Paths(base_dir=tmp_path))
+    monkeypatch.setattr(aio_mod, "get_effective_user_id", lambda: None)
 
     mounts = aio_mod.AioSandboxProvider._get_thread_mounts("thread-3")
 
@@ -95,6 +96,7 @@ def test_get_thread_mounts_preserves_windows_host_path_style(tmp_path, monkeypat
     aio_mod = importlib.import_module("deerflow.community.aio_sandbox.aio_sandbox_provider")
     monkeypatch.setenv("DEER_FLOW_HOST_BASE_DIR", r"C:\Users\demo\deer-flow\backend\.deer-flow")
     monkeypatch.setattr(aio_mod, "get_paths", lambda: Paths(base_dir=tmp_path))
+    monkeypatch.setattr(aio_mod, "get_effective_user_id", lambda: None)
 
     mounts = aio_mod.AioSandboxProvider._get_thread_mounts("thread-10")
 
diff --git a/backend/tests/test_app_config_reload.py b/backend/tests/test_app_config_reload.py
index 9e865f142..716dcac05 100644
--- a/backend/tests/test_app_config_reload.py
+++ b/backend/tests/test_app_config_reload.py
@@ -1,13 +1,14 @@
 from __future__ import annotations
 
 import json
-import os
 from pathlib import Path
 
+import pytest
 import yaml
 
-from deerflow.config.agents_api_config import get_agents_api_config
-from deerflow.config.app_config import get_app_config, reset_app_config
+from deerflow.config.app_config import AppConfig
+
+pytestmark = pytest.mark.real_from_file
 
 
 def _write_config(path: Path, *, model_name: str, supports_thinking: bool) -> None:
@@ -29,113 +30,66 @@ def _write_config(path: Path, *, model_name: str, supports_thinking: bool) -> No
     )
 
 
-def _write_config_with_agents_api(
-    path: Path,
-    *,
-    model_name: str,
-    supports_thinking: bool,
-    agents_api: dict | None = None,
-) -> None:
-    config = {
-        "sandbox": {"use": "deerflow.sandbox.local:LocalSandboxProvider"},
-        "models": [
-            {
-                "name": model_name,
-                "use": "langchain_openai:ChatOpenAI",
-                "model": "gpt-test",
-                "supports_thinking": supports_thinking,
-            }
-        ],
-    }
-    if agents_api is not None:
-        config["agents_api"] = agents_api
-
-    path.write_text(yaml.safe_dump(config), encoding="utf-8")
-
-
 def _write_extensions_config(path: Path) -> None:
     path.write_text(json.dumps({"mcpServers": {}, "skills": {}}), encoding="utf-8")
 
 
-def test_get_app_config_reloads_when_file_changes(tmp_path, monkeypatch):
+def test_from_file_reads_model_name(tmp_path, monkeypatch):
+    """``AppConfig.from_file`` is the only lifecycle method now; there is no
+    process-global ``init/current``. Each consumer holds its own captured
+    AppConfig instance.
+    """
     config_path = tmp_path / "config.yaml"
     extensions_path = tmp_path / "extensions_config.json"
     _write_extensions_config(extensions_path)
-    _write_config(config_path, model_name="first-model", supports_thinking=False)
+    _write_config(config_path, model_name="test-model", supports_thinking=False)
 
     monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_path))
     monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
-    reset_app_config()
 
-    try:
-        initial = get_app_config()
-        assert initial.models[0].supports_thinking is False
-
-        _write_config(config_path, model_name="first-model", supports_thinking=True)
-        next_mtime = config_path.stat().st_mtime + 5
-        os.utime(config_path, (next_mtime, next_mtime))
-
-        reloaded = get_app_config()
-        assert reloaded.models[0].supports_thinking is True
-        assert reloaded is not initial
-    finally:
-        reset_app_config()
+    config = AppConfig.from_file(str(config_path))
+    assert config.models[0].name == "test-model"
 
 
-def test_get_app_config_reloads_when_config_path_changes(tmp_path, monkeypatch):
-    config_a = tmp_path / "config-a.yaml"
-    config_b = tmp_path / "config-b.yaml"
-    extensions_path = tmp_path / "extensions_config.json"
-    _write_extensions_config(extensions_path)
-    _write_config(config_a, model_name="model-a", supports_thinking=False)
-    _write_config(config_b, model_name="model-b", supports_thinking=True)
-
-    monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
-    monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_a))
-    reset_app_config()
-
-    try:
-        first = get_app_config()
-        assert first.models[0].name == "model-a"
-
-        monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_b))
-        second = get_app_config()
-        assert second.models[0].name == "model-b"
-        assert second is not first
-    finally:
-        reset_app_config()
-
-
-def test_get_app_config_resets_agents_api_config_when_section_removed(tmp_path, monkeypatch):
+def test_from_file_each_call_returns_fresh_instance(tmp_path, monkeypatch):
+    """Two reads of the same file produce separate AppConfig instances —
+    no hidden singleton, no memoization. Callers decide when to re-read.
+    """
     config_path = tmp_path / "config.yaml"
     extensions_path = tmp_path / "extensions_config.json"
     _write_extensions_config(extensions_path)
-    _write_config_with_agents_api(
-        config_path,
-        model_name="first-model",
-        supports_thinking=False,
-        agents_api={"enabled": True},
+    _write_config(config_path, model_name="model-a", supports_thinking=False)
+
+    monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_path))
+    monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
+
+    config_a = AppConfig.from_file(str(config_path))
+    assert config_a.models[0].name == "model-a"
+
+    _write_config(config_path, model_name="model-b", supports_thinking=True)
+    config_b = AppConfig.from_file(str(config_path))
+    assert config_b.models[0].name == "model-b"
+    assert config_a is not config_b
+
+
+def test_config_version_check(tmp_path, monkeypatch):
+    config_path = tmp_path / "config.yaml"
+    extensions_path = tmp_path / "extensions_config.json"
+    _write_extensions_config(extensions_path)
+
+    config_path.write_text(
+        yaml.safe_dump(
+            {
+                "config_version": 1,
+                "sandbox": {"use": "deerflow.sandbox.local:LocalSandboxProvider"},
+                "models": [],
+            }
+        ),
+        encoding="utf-8",
     )
 
     monkeypatch.setenv("DEER_FLOW_CONFIG_PATH", str(config_path))
     monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(extensions_path))
-    reset_app_config()
 
-    try:
-        initial = get_app_config()
-        assert initial.models[0].name == "first-model"
-        assert get_agents_api_config().enabled is True
-
-        _write_config_with_agents_api(
-            config_path,
-            model_name="first-model",
-            supports_thinking=False,
-        )
-        next_mtime = config_path.stat().st_mtime + 5
-        os.utime(config_path, (next_mtime, next_mtime))
-
-        reloaded = get_app_config()
-        assert reloaded is not initial
-        assert get_agents_api_config().enabled is False
-    finally:
-        reset_app_config()
+    config = AppConfig.from_file(str(config_path))
+    assert config is not None
diff --git a/backend/tests/test_artifacts_router.py b/backend/tests/test_artifacts_router.py
index 9a30ff44e..df32e45dc 100644
--- a/backend/tests/test_artifacts_router.py
+++ b/backend/tests/test_artifacts_router.py
@@ -3,7 +3,7 @@ import zipfile
 from pathlib import Path
 
 import pytest
-from fastapi import FastAPI
+from _router_auth_helpers import call_unwrapped, make_authed_test_app
 from fastapi.testclient import TestClient
 from starlette.requests import Request
 from starlette.responses import FileResponse
@@ -36,7 +36,7 @@ def test_get_artifact_reads_utf8_text_file_on_windows_locale(tmp_path, monkeypat
     monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
 
     request = _make_request()
-    response = asyncio.run(artifacts_router.get_artifact("thread-1", "mnt/user-data/outputs/note.txt", request))
+    response = asyncio.run(call_unwrapped(artifacts_router.get_artifact, "thread-1", "mnt/user-data/outputs/note.txt", request))
 
     assert bytes(response.body).decode("utf-8") == text
     assert response.media_type == "text/plain"
@@ -49,7 +49,7 @@ def test_get_artifact_forces_download_for_active_content(tmp_path, monkeypatch,
 
     monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
 
-    response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/{filename}", _make_request()))
+    response = asyncio.run(call_unwrapped(artifacts_router.get_artifact, "thread-1", f"mnt/user-data/outputs/{filename}", _make_request()))
 
     assert isinstance(response, FileResponse)
     assert response.headers.get("content-disposition", "").startswith("attachment;")
@@ -63,7 +63,7 @@ def test_get_artifact_forces_download_for_active_content_in_skill_archive(tmp_pa
 
     monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
 
-    response = asyncio.run(artifacts_router.get_artifact("thread-1", f"mnt/user-data/outputs/sample.skill/{filename}", _make_request()))
+    response = asyncio.run(call_unwrapped(artifacts_router.get_artifact, "thread-1", f"mnt/user-data/outputs/sample.skill/{filename}", _make_request()))
 
     assert response.headers.get("content-disposition", "").startswith("attachment;")
     assert bytes(response.body) == content.encode("utf-8")
@@ -75,7 +75,7 @@ def test_get_artifact_download_false_does_not_force_attachment(tmp_path, monkeyp
 
     monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: artifact_path)
 
-    app = FastAPI()
+    app = make_authed_test_app()
     app.include_router(artifacts_router.router)
 
     with TestClient(app) as client:
@@ -93,7 +93,7 @@ def test_get_artifact_download_true_forces_attachment_for_skill_archive(tmp_path
 
     monkeypatch.setattr(artifacts_router, "resolve_thread_virtual_path", lambda _thread_id, _path: skill_path)
 
-    app = FastAPI()
+    app = make_authed_test_app()
     app.include_router(artifacts_router.router)
 
     with TestClient(app) as client:
diff --git a/backend/tests/test_auth.py b/backend/tests/test_auth.py
new file mode 100644
index 000000000..ea4c5733a
--- /dev/null
+++ b/backend/tests/test_auth.py
@@ -0,0 +1,654 @@
+"""Tests for authentication module: JWT, password hashing, AuthContext, and authz decorators."""
+
+from datetime import timedelta
+from unittest.mock import AsyncMock, MagicMock, patch
+from uuid import uuid4
+
+import pytest
+from fastapi import FastAPI, HTTPException
+from fastapi.testclient import TestClient
+
+from app.gateway.auth import create_access_token, decode_token, hash_password, verify_password
+from app.gateway.auth.models import User
+from app.gateway.authz import (
+    AuthContext,
+    Permissions,
+    get_auth_context,
+    require_auth,
+    require_permission,
+)
+
+# ── Password Hashing ────────────────────────────────────────────────────────
+
+
+def test_hash_password_and_verify():
+    """Hashing and verification round-trip."""
+    password = "s3cr3tP@ssw0rd!"
+    hashed = hash_password(password)
+    assert hashed != password
+    assert verify_password(password, hashed) is True
+    assert verify_password("wrongpassword", hashed) is False
+
+
+def test_hash_password_different_each_time():
+    """bcrypt generates unique salts, so same password has different hashes."""
+    password = "testpassword"
+    h1 = hash_password(password)
+    h2 = hash_password(password)
+    assert h1 != h2  # Different salts
+    # But both verify correctly
+    assert verify_password(password, h1) is True
+    assert verify_password(password, h2) is True
+
+
+def test_verify_password_rejects_empty():
+    """Empty password should not verify."""
+    hashed = hash_password("nonempty")
+    assert verify_password("", hashed) is False
+
+
+# ── JWT ─────────────────────────────────────────────────────────────────────
+
+
+def test_create_and_decode_token():
+    """JWT creation and decoding round-trip."""
+    user_id = str(uuid4())
+    # Set a valid JWT secret for this test
+    import os
+
+    os.environ["AUTH_JWT_SECRET"] = "test-secret-key-for-jwt-testing-minimum-32-chars"
+    token = create_access_token(user_id)
+    assert isinstance(token, str)
+
+    payload = decode_token(token)
+    assert payload is not None
+    assert payload.sub == user_id
+
+
+def test_decode_token_expired():
+    """Expired token returns TokenError.EXPIRED."""
+    from app.gateway.auth.errors import TokenError
+
+    user_id = str(uuid4())
+    # Create token that expires immediately
+    token = create_access_token(user_id, expires_delta=timedelta(seconds=-1))
+    payload = decode_token(token)
+    assert payload == TokenError.EXPIRED
+
+
+def test_decode_token_invalid():
+    """Invalid token returns TokenError."""
+    from app.gateway.auth.errors import TokenError
+
+    assert isinstance(decode_token("not.a.valid.token"), TokenError)
+    assert isinstance(decode_token(""), TokenError)
+    assert isinstance(decode_token("completely-wrong"), TokenError)
+
+
+def test_create_token_custom_expiry():
+    """Custom expiry is respected."""
+    user_id = str(uuid4())
+    token = create_access_token(user_id, expires_delta=timedelta(hours=1))
+    payload = decode_token(token)
+    assert payload is not None
+    assert payload.sub == user_id
+
+
+# ── AuthContext ────────────────────────────────────────────────────────────
+
+
+def test_auth_context_unauthenticated():
+    """AuthContext with no user."""
+    ctx = AuthContext(user=None, permissions=[])
+    assert ctx.is_authenticated is False
+    assert ctx.has_permission("threads", "read") is False
+
+
+def test_auth_context_authenticated_no_perms():
+    """AuthContext with user but no permissions."""
+    user = User(id=uuid4(), email="test@example.com", password_hash="hash")
+    ctx = AuthContext(user=user, permissions=[])
+    assert ctx.is_authenticated is True
+    assert ctx.has_permission("threads", "read") is False
+
+
+def test_auth_context_has_permission():
+    """AuthContext permission checking."""
+    user = User(id=uuid4(), email="test@example.com", password_hash="hash")
+    perms = [Permissions.THREADS_READ, Permissions.THREADS_WRITE]
+    ctx = AuthContext(user=user, permissions=perms)
+    assert ctx.has_permission("threads", "read") is True
+    assert ctx.has_permission("threads", "write") is True
+    assert ctx.has_permission("threads", "delete") is False
+    assert ctx.has_permission("runs", "read") is False
+
+
+def test_auth_context_require_user_raises():
+    """require_user raises 401 when not authenticated."""
+    ctx = AuthContext(user=None, permissions=[])
+    with pytest.raises(HTTPException) as exc_info:
+        ctx.require_user()
+    assert exc_info.value.status_code == 401
+
+
+def test_auth_context_require_user_returns_user():
+    """require_user returns user when authenticated."""
+    user = User(id=uuid4(), email="test@example.com", password_hash="hash")
+    ctx = AuthContext(user=user, permissions=[])
+    returned = ctx.require_user()
+    assert returned == user
+
+
+# ── get_auth_context helper ─────────────────────────────────────────────────
+
+
+def test_get_auth_context_not_set():
+    """get_auth_context returns None when auth not set on request."""
+    mock_request = MagicMock()
+    # Make getattr return None (simulating attribute not set)
+    mock_request.state = MagicMock()
+    del mock_request.state.auth
+    assert get_auth_context(mock_request) is None
+
+
+def test_get_auth_context_set():
+    """get_auth_context returns the AuthContext from request."""
+    user = User(id=uuid4(), email="test@example.com", password_hash="hash")
+    ctx = AuthContext(user=user, permissions=[Permissions.THREADS_READ])
+
+    mock_request = MagicMock()
+    mock_request.state.auth = ctx
+
+    assert get_auth_context(mock_request) == ctx
+
+
+# ── require_auth decorator ──────────────────────────────────────────────────
+
+
+def test_require_auth_sets_auth_context():
+    """require_auth sets auth context on request from cookie."""
+    from fastapi import Request
+
+    app = FastAPI()
+
+    @app.get("/test")
+    @require_auth
+    async def endpoint(request: Request):
+        ctx = get_auth_context(request)
+        return {"authenticated": ctx.is_authenticated}
+
+    with TestClient(app) as client:
+        # No cookie → anonymous
+        response = client.get("/test")
+        assert response.status_code == 200
+        assert response.json()["authenticated"] is False
+
+
+def test_require_auth_requires_request_param():
+    """require_auth raises ValueError if request parameter is missing."""
+    import asyncio
+
+    @require_auth
+    async def bad_endpoint():  # Missing `request` parameter
+        pass
+
+    with pytest.raises(ValueError, match="require_auth decorator requires 'request' parameter"):
+        asyncio.run(bad_endpoint())
+
+
+# ── require_permission decorator ─────────────────────────────────────────────
+
+
+def test_require_permission_requires_auth():
+    """require_permission raises 401 when not authenticated."""
+    from fastapi import Request
+
+    app = FastAPI()
+
+    @app.get("/test")
+    @require_permission("threads", "read")
+    async def endpoint(request: Request):
+        return {"ok": True}
+
+    with TestClient(app) as client:
+        response = client.get("/test")
+        assert response.status_code == 401
+        assert "Authentication required" in response.json()["detail"]
+
+
+def test_require_permission_denies_wrong_permission():
+    """User without required permission gets 403."""
+    from fastapi import Request
+
+    app = FastAPI()
+    user = User(id=uuid4(), email="test@example.com", password_hash="hash")
+
+    @app.get("/test")
+    @require_permission("threads", "delete")
+    async def endpoint(request: Request):
+        return {"ok": True}
+
+    mock_auth = AuthContext(user=user, permissions=[Permissions.THREADS_READ])
+
+    with patch("app.gateway.authz._authenticate", return_value=mock_auth):
+        with TestClient(app) as client:
+            response = client.get("/test")
+            assert response.status_code == 403
+            assert "Permission denied" in response.json()["detail"]
+
+
+# ── Weak JWT secret warning ──────────────────────────────────────────────────
+
+
+# ── User Model Fields ──────────────────────────────────────────────────────
+
+
+def test_user_model_has_needs_setup_default_false():
+    """New users default to needs_setup=False."""
+    user = User(email="test@example.com", password_hash="hash")
+    assert user.needs_setup is False
+
+
+def test_user_model_has_token_version_default_zero():
+    """New users default to token_version=0."""
+    user = User(email="test@example.com", password_hash="hash")
+    assert user.token_version == 0
+
+
+def test_user_model_needs_setup_true():
+    """Auto-created admin has needs_setup=True."""
+    user = User(email="admin@example.com", password_hash="hash", needs_setup=True)
+    assert user.needs_setup is True
+
+
+def test_sqlite_round_trip_new_fields():
+    """needs_setup and token_version survive create → read round-trip.
+
+    Uses the shared persistence engine (same one threads_meta, runs,
+    run_events, and feedback use). The old separate .deer-flow/users.db
+    file is gone.
+    """
+    import asyncio
+    import tempfile
+
+    from app.gateway.auth.repositories.sqlite import SQLiteUserRepository
+
+    async def _run() -> None:
+        from deerflow.persistence.engine import (
+            close_engine,
+            get_session_factory,
+            init_engine,
+        )
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            url = f"sqlite+aiosqlite:///{tmpdir}/scratch.db"
+            await init_engine("sqlite", url=url, sqlite_dir=tmpdir)
+            try:
+                repo = SQLiteUserRepository(get_session_factory())
+                user = User(
+                    email="setup@test.com",
+                    password_hash="fakehash",
+                    system_role="admin",
+                    needs_setup=True,
+                    token_version=3,
+                )
+                created = await repo.create_user(user)
+                assert created.needs_setup is True
+                assert created.token_version == 3
+
+                fetched = await repo.get_user_by_email("setup@test.com")
+                assert fetched is not None
+                assert fetched.needs_setup is True
+                assert fetched.token_version == 3
+
+                fetched.needs_setup = False
+                fetched.token_version = 4
+                await repo.update_user(fetched)
+                refetched = await repo.get_user_by_id(str(fetched.id))
+                assert refetched is not None
+                assert refetched.needs_setup is False
+                assert refetched.token_version == 4
+            finally:
+                await close_engine()
+
+    asyncio.run(_run())
+
+
+def test_update_user_raises_when_row_concurrently_deleted(tmp_path):
+    """Concurrent-delete during update_user must hard-fail, not silently no-op.
+
+    Earlier the SQLite repo returned the input unchanged when the row was
+    missing, making a phantom success path that admin password reset
+    callers (`reset_admin`, `_ensure_admin_user`) would happily log as
+    'password reset'. The new contract: raise ``UserNotFoundError`` so
+    a vanished row never looks like a successful update.
+    """
+    import asyncio
+    import tempfile
+
+    from app.gateway.auth.repositories.base import UserNotFoundError
+    from app.gateway.auth.repositories.sqlite import SQLiteUserRepository
+
+    async def _run() -> None:
+        from deerflow.persistence.engine import (
+            close_engine,
+            get_session_factory,
+            init_engine,
+        )
+        from deerflow.persistence.user.model import UserRow
+
+        with tempfile.TemporaryDirectory() as d:
+            url = f"sqlite+aiosqlite:///{d}/scratch.db"
+            await init_engine("sqlite", url=url, sqlite_dir=d)
+            try:
+                sf = get_session_factory()
+                repo = SQLiteUserRepository(sf)
+                user = User(
+                    email="ghost@test.com",
+                    password_hash="fakehash",
+                    system_role="user",
+                )
+                created = await repo.create_user(user)
+
+                # Simulate "row vanished underneath us" by deleting the row
+                # via the raw ORM session, then attempt to update.
+                async with sf() as session:
+                    row = await session.get(UserRow, str(created.id))
+                    assert row is not None
+                    await session.delete(row)
+                    await session.commit()
+
+                created.needs_setup = True
+                with pytest.raises(UserNotFoundError):
+                    await repo.update_user(created)
+            finally:
+                await close_engine()
+
+    asyncio.run(_run())
+
+
+# ── Token Versioning ───────────────────────────────────────────────────────
+
+
+def test_jwt_encodes_ver():
+    """JWT payload includes ver field."""
+    import os
+
+    from app.gateway.auth.errors import TokenError
+
+    os.environ["AUTH_JWT_SECRET"] = "test-secret-key-for-jwt-testing-minimum-32-chars"
+    token = create_access_token(str(uuid4()), token_version=3)
+    payload = decode_token(token)
+    assert not isinstance(payload, TokenError)
+    assert payload.ver == 3
+
+
+def test_jwt_default_ver_zero():
+    """JWT ver defaults to 0."""
+    import os
+
+    from app.gateway.auth.errors import TokenError
+
+    os.environ["AUTH_JWT_SECRET"] = "test-secret-key-for-jwt-testing-minimum-32-chars"
+    token = create_access_token(str(uuid4()))
+    payload = decode_token(token)
+    assert not isinstance(payload, TokenError)
+    assert payload.ver == 0
+
+
+def test_token_version_mismatch_rejects():
+    """Token with stale ver is rejected by get_current_user_from_request."""
+    import asyncio
+    import os
+
+    os.environ["AUTH_JWT_SECRET"] = "test-secret-key-for-jwt-testing-minimum-32-chars"
+
+    user_id = str(uuid4())
+    token = create_access_token(user_id, token_version=0)
+
+    mock_user = User(id=user_id, email="test@example.com", password_hash="hash", token_version=1)
+
+    mock_request = MagicMock()
+    mock_request.cookies = {"access_token": token}
+
+    with patch("app.gateway.deps.get_local_provider") as mock_provider_fn:
+        mock_provider = MagicMock()
+        mock_provider.get_user = AsyncMock(return_value=mock_user)
+        mock_provider_fn.return_value = mock_provider
+
+        from app.gateway.deps import get_current_user_from_request
+
+        with pytest.raises(HTTPException) as exc_info:
+            asyncio.run(get_current_user_from_request(mock_request))
+        assert exc_info.value.status_code == 401
+        assert "revoked" in str(exc_info.value.detail).lower()
+
+
+# ── change-password extension ──────────────────────────────────────────────
+
+
+def test_change_password_request_accepts_new_email():
+    """ChangePasswordRequest model accepts optional new_email."""
+    from app.gateway.routers.auth import ChangePasswordRequest
+
+    req = ChangePasswordRequest(
+        current_password="old",
+        new_password="newpassword",
+        new_email="new@example.com",
+    )
+    assert req.new_email == "new@example.com"
+
+
+def test_change_password_request_new_email_optional():
+    """ChangePasswordRequest model works without new_email."""
+    from app.gateway.routers.auth import ChangePasswordRequest
+
+    req = ChangePasswordRequest(current_password="old", new_password="newpassword")
+    assert req.new_email is None
+
+
+def test_login_response_includes_needs_setup():
+    """LoginResponse includes needs_setup field."""
+    from app.gateway.routers.auth import LoginResponse
+
+    resp = LoginResponse(expires_in=3600, needs_setup=True)
+    assert resp.needs_setup is True
+    resp2 = LoginResponse(expires_in=3600)
+    assert resp2.needs_setup is False
+
+
+# ── Rate Limiting ──────────────────────────────────────────────────────────
+
+
+def test_rate_limiter_allows_under_limit():
+    """Requests under the limit are allowed."""
+    from app.gateway.routers.auth import _check_rate_limit, _login_attempts
+
+    _login_attempts.clear()
+    _check_rate_limit("192.168.1.1")  # Should not raise
+
+
+def test_rate_limiter_blocks_after_max_failures():
+    """IP is blocked after 5 consecutive failures."""
+    from app.gateway.routers.auth import _check_rate_limit, _login_attempts, _record_login_failure
+
+    _login_attempts.clear()
+    ip = "10.0.0.1"
+    for _ in range(5):
+        _record_login_failure(ip)
+    with pytest.raises(HTTPException) as exc_info:
+        _check_rate_limit(ip)
+    assert exc_info.value.status_code == 429
+
+
+def test_rate_limiter_resets_on_success():
+    """Successful login clears the failure counter."""
+    from app.gateway.routers.auth import _check_rate_limit, _login_attempts, _record_login_failure, _record_login_success
+
+    _login_attempts.clear()
+    ip = "10.0.0.2"
+    for _ in range(4):
+        _record_login_failure(ip)
+    _record_login_success(ip)
+    _check_rate_limit(ip)  # Should not raise
+
+
+# ── Client IP extraction ─────────────────────────────────────────────────
+
+
+def test_get_client_ip_direct_connection_no_proxy(monkeypatch):
+    """Direct mode (no AUTH_TRUSTED_PROXIES): use TCP peer regardless of X-Real-IP."""
+    monkeypatch.delenv("AUTH_TRUSTED_PROXIES", raising=False)
+    from app.gateway.routers.auth import _get_client_ip
+
+    req = MagicMock()
+    req.client.host = "203.0.113.42"
+    req.headers = {}
+    assert _get_client_ip(req) == "203.0.113.42"
+
+
+def test_get_client_ip_x_real_ip_ignored_when_no_trusted_proxy(monkeypatch):
+    """X-Real-IP is silently ignored if AUTH_TRUSTED_PROXIES is unset.
+
+    This closes the bypass where any client could rotate X-Real-IP per
+    request to dodge per-IP rate limits in dev / direct mode.
+    """
+    monkeypatch.delenv("AUTH_TRUSTED_PROXIES", raising=False)
+    from app.gateway.routers.auth import _get_client_ip
+
+    req = MagicMock()
+    req.client.host = "127.0.0.1"
+    req.headers = {"x-real-ip": "203.0.113.42"}
+    assert _get_client_ip(req) == "127.0.0.1"
+
+
+def test_get_client_ip_x_real_ip_honored_from_trusted_proxy(monkeypatch):
+    """X-Real-IP is honored when the TCP peer matches AUTH_TRUSTED_PROXIES."""
+    monkeypatch.setenv("AUTH_TRUSTED_PROXIES", "10.0.0.0/8")
+    from app.gateway.routers.auth import _get_client_ip
+
+    req = MagicMock()
+    req.client.host = "10.5.6.7"  # in trusted CIDR
+    req.headers = {"x-real-ip": "203.0.113.42"}
+    assert _get_client_ip(req) == "203.0.113.42"
+
+
+def test_get_client_ip_x_real_ip_rejected_from_untrusted_peer(monkeypatch):
+    """X-Real-IP is rejected when the TCP peer is NOT in the trusted list."""
+    monkeypatch.setenv("AUTH_TRUSTED_PROXIES", "10.0.0.0/8")
+    from app.gateway.routers.auth import _get_client_ip
+
+    req = MagicMock()
+    req.client.host = "8.8.8.8"  # NOT in trusted CIDR
+    req.headers = {"x-real-ip": "203.0.113.42"}  # client trying to spoof
+    assert _get_client_ip(req) == "8.8.8.8"
+
+
+def test_get_client_ip_xff_never_honored(monkeypatch):
+    """X-Forwarded-For is never used; only X-Real-IP from a trusted peer."""
+    monkeypatch.setenv("AUTH_TRUSTED_PROXIES", "10.0.0.0/8")
+    from app.gateway.routers.auth import _get_client_ip
+
+    req = MagicMock()
+    req.client.host = "10.0.0.1"
+    req.headers = {"x-forwarded-for": "198.51.100.5"}  # no x-real-ip
+    assert _get_client_ip(req) == "10.0.0.1"
+
+
+def test_get_client_ip_invalid_trusted_proxy_entry_skipped(monkeypatch, caplog):
+    """Garbage entries in AUTH_TRUSTED_PROXIES are warned and skipped."""
+    monkeypatch.setenv("AUTH_TRUSTED_PROXIES", "not-an-ip,10.0.0.0/8")
+    from app.gateway.routers.auth import _get_client_ip
+
+    req = MagicMock()
+    req.client.host = "10.5.6.7"
+    req.headers = {"x-real-ip": "203.0.113.42"}
+    assert _get_client_ip(req) == "203.0.113.42"  # valid entry still works
+
+
+def test_get_client_ip_no_client_returns_unknown(monkeypatch):
+    """No request.client → 'unknown' marker (no crash)."""
+    monkeypatch.delenv("AUTH_TRUSTED_PROXIES", raising=False)
+    from app.gateway.routers.auth import _get_client_ip
+
+    req = MagicMock()
+    req.client = None
+    req.headers = {}
+    assert _get_client_ip(req) == "unknown"
+
+
+# ── Common-password blocklist ────────────────────────────────────────────────
+
+
+def test_register_rejects_literal_password():
+    """Pydantic validator rejects 'password' as a registration password."""
+    from pydantic import ValidationError
+
+    from app.gateway.routers.auth import RegisterRequest
+
+    with pytest.raises(ValidationError) as exc:
+        RegisterRequest(email="x@example.com", password="password")
+    assert "too common" in str(exc.value)
+
+
+def test_register_rejects_common_password_case_insensitive():
+    """Case variants of common passwords are also rejected."""
+    from pydantic import ValidationError
+
+    from app.gateway.routers.auth import RegisterRequest
+
+    for variant in ["PASSWORD", "Password1", "qwerty123", "letmein1"]:
+        with pytest.raises(ValidationError):
+            RegisterRequest(email="x@example.com", password=variant)
+
+
+def test_register_accepts_strong_password():
+    """A non-blocklisted password of length >=8 is accepted."""
+    from app.gateway.routers.auth import RegisterRequest
+
+    req = RegisterRequest(email="x@example.com", password="Tr0ub4dor&3-Horse")
+    assert req.password == "Tr0ub4dor&3-Horse"
+
+
+def test_change_password_rejects_common_password():
+    """The same blocklist applies to change-password."""
+    from pydantic import ValidationError
+
+    from app.gateway.routers.auth import ChangePasswordRequest
+
+    with pytest.raises(ValidationError):
+        ChangePasswordRequest(current_password="anything", new_password="iloveyou")
+
+
+def test_password_blocklist_keeps_short_passwords_for_length_check():
+    """Short passwords still fail the min_length check (not the blocklist)."""
+    from pydantic import ValidationError
+
+    from app.gateway.routers.auth import RegisterRequest
+
+    with pytest.raises(ValidationError) as exc:
+        RegisterRequest(email="x@example.com", password="abc")
+    # the length check should fire, not the blocklist
+    assert "at least 8 characters" in str(exc.value)
+
+
+# ── Weak JWT secret warning ──────────────────────────────────────────────────
+
+
+def test_missing_jwt_secret_generates_ephemeral(monkeypatch, caplog):
+    """get_auth_config() auto-generates an ephemeral secret when AUTH_JWT_SECRET is unset."""
+    import logging
+
+    import app.gateway.auth.config as config_module
+
+    config_module._auth_config = None
+    monkeypatch.delenv("AUTH_JWT_SECRET", raising=False)
+
+    with caplog.at_level(logging.WARNING):
+        config = config_module.get_auth_config()
+
+    assert config.jwt_secret  # non-empty ephemeral secret
+    assert any("AUTH_JWT_SECRET" in msg for msg in caplog.messages)
+
+    # Cleanup
+    config_module._auth_config = None
diff --git a/backend/tests/test_auth_config.py b/backend/tests/test_auth_config.py
new file mode 100644
index 000000000..21b8bd81b
--- /dev/null
+++ b/backend/tests/test_auth_config.py
@@ -0,0 +1,54 @@
+"""Tests for AuthConfig typed configuration."""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from app.gateway.auth.config import AuthConfig
+
+
+def test_auth_config_defaults():
+    config = AuthConfig(jwt_secret="test-secret-key-123")
+    assert config.token_expiry_days == 7
+
+
+def test_auth_config_token_expiry_range():
+    AuthConfig(jwt_secret="s", token_expiry_days=1)
+    AuthConfig(jwt_secret="s", token_expiry_days=30)
+    with pytest.raises(Exception):
+        AuthConfig(jwt_secret="s", token_expiry_days=0)
+    with pytest.raises(Exception):
+        AuthConfig(jwt_secret="s", token_expiry_days=31)
+
+
+def test_auth_config_from_env():
+    env = {"AUTH_JWT_SECRET": "test-jwt-secret-from-env"}
+    with patch.dict(os.environ, env, clear=False):
+        import app.gateway.auth.config as cfg
+
+        old = cfg._auth_config
+        cfg._auth_config = None
+        try:
+            config = cfg.get_auth_config()
+            assert config.jwt_secret == "test-jwt-secret-from-env"
+        finally:
+            cfg._auth_config = old
+
+
+def test_auth_config_missing_secret_generates_ephemeral(caplog):
+    import logging
+
+    import app.gateway.auth.config as cfg
+
+    old = cfg._auth_config
+    cfg._auth_config = None
+    try:
+        with patch.dict(os.environ, {}, clear=True):
+            os.environ.pop("AUTH_JWT_SECRET", None)
+            with caplog.at_level(logging.WARNING):
+                config = cfg.get_auth_config()
+            assert config.jwt_secret
+            assert any("AUTH_JWT_SECRET" in msg for msg in caplog.messages)
+    finally:
+        cfg._auth_config = old
diff --git a/backend/tests/test_auth_errors.py b/backend/tests/test_auth_errors.py
new file mode 100644
index 000000000..b3b46c75f
--- /dev/null
+++ b/backend/tests/test_auth_errors.py
@@ -0,0 +1,75 @@
+"""Tests for auth error types and typed decode_token."""
+
+from datetime import UTC, datetime, timedelta
+
+import jwt as pyjwt
+
+from app.gateway.auth.config import AuthConfig, set_auth_config
+from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse, TokenError
+from app.gateway.auth.jwt import create_access_token, decode_token
+
+
+def test_auth_error_code_values():
+    assert AuthErrorCode.INVALID_CREDENTIALS == "invalid_credentials"
+    assert AuthErrorCode.TOKEN_EXPIRED == "token_expired"
+    assert AuthErrorCode.NOT_AUTHENTICATED == "not_authenticated"
+
+
+def test_token_error_values():
+    assert TokenError.EXPIRED == "expired"
+    assert TokenError.INVALID_SIGNATURE == "invalid_signature"
+    assert TokenError.MALFORMED == "malformed"
+
+
+def test_auth_error_response_serialization():
+    err = AuthErrorResponse(
+        code=AuthErrorCode.TOKEN_EXPIRED,
+        message="Token has expired",
+    )
+    d = err.model_dump()
+    assert d == {"code": "token_expired", "message": "Token has expired"}
+
+
+def test_auth_error_response_from_dict():
+    d = {"code": "invalid_credentials", "message": "Wrong password"}
+    err = AuthErrorResponse(**d)
+    assert err.code == AuthErrorCode.INVALID_CREDENTIALS
+
+
+# ── decode_token typed failure tests ──────────────────────────────
+
+_TEST_SECRET = "test-secret-for-jwt-decode-token-tests"
+
+
+def _setup_config():
+    set_auth_config(AuthConfig(jwt_secret=_TEST_SECRET))
+
+
+def test_decode_token_returns_token_error_on_expired():
+    _setup_config()
+    expired_payload = {"sub": "user-1", "exp": datetime.now(UTC) - timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(expired_payload, _TEST_SECRET, algorithm="HS256")
+    result = decode_token(token)
+    assert result == TokenError.EXPIRED
+
+
+def test_decode_token_returns_token_error_on_bad_signature():
+    _setup_config()
+    payload = {"sub": "user-1", "exp": datetime.now(UTC) + timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(payload, "wrong-secret", algorithm="HS256")
+    result = decode_token(token)
+    assert result == TokenError.INVALID_SIGNATURE
+
+
+def test_decode_token_returns_token_error_on_malformed():
+    _setup_config()
+    result = decode_token("not-a-jwt")
+    assert result == TokenError.MALFORMED
+
+
+def test_decode_token_returns_payload_on_valid():
+    _setup_config()
+    token = create_access_token("user-123")
+    result = decode_token(token)
+    assert not isinstance(result, TokenError)
+    assert result.sub == "user-123"
diff --git a/backend/tests/test_auth_middleware.py b/backend/tests/test_auth_middleware.py
new file mode 100644
index 000000000..398f9cec6
--- /dev/null
+++ b/backend/tests/test_auth_middleware.py
@@ -0,0 +1,222 @@
+"""Tests for the global AuthMiddleware (fail-closed safety net)."""
+
+import pytest
+from starlette.testclient import TestClient
+
+from app.gateway.auth_middleware import AuthMiddleware, _is_public
+
+# ── _is_public unit tests ─────────────────────────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "path",
+    [
+        "/health",
+        "/health/",
+        "/docs",
+        "/docs/",
+        "/redoc",
+        "/openapi.json",
+        "/api/v1/auth/login/local",
+        "/api/v1/auth/register",
+        "/api/v1/auth/logout",
+        "/api/v1/auth/setup-status",
+    ],
+)
+def test_public_paths(path: str):
+    assert _is_public(path) is True
+
+
+@pytest.mark.parametrize(
+    "path",
+    [
+        "/api/models",
+        "/api/mcp/config",
+        "/api/memory",
+        "/api/skills",
+        "/api/threads/123",
+        "/api/threads/123/uploads",
+        "/api/agents",
+        "/api/channels",
+        "/api/runs/stream",
+        "/api/threads/123/runs",
+        "/api/v1/auth/me",
+        "/api/v1/auth/change-password",
+    ],
+)
+def test_protected_paths(path: str):
+    assert _is_public(path) is False
+
+
+# ── Trailing slash / normalization edge cases ─────────────────────────────
+
+
+@pytest.mark.parametrize(
+    "path",
+    [
+        "/api/v1/auth/login/local/",
+        "/api/v1/auth/register/",
+        "/api/v1/auth/logout/",
+        "/api/v1/auth/setup-status/",
+    ],
+)
+def test_public_auth_paths_with_trailing_slash(path: str):
+    assert _is_public(path) is True
+
+
+@pytest.mark.parametrize(
+    "path",
+    [
+        "/api/models/",
+        "/api/v1/auth/me/",
+        "/api/v1/auth/change-password/",
+    ],
+)
+def test_protected_paths_with_trailing_slash(path: str):
+    assert _is_public(path) is False
+
+
+def test_unknown_api_path_is_protected():
+    """Fail-closed: any new /api/* path is protected by default."""
+    assert _is_public("/api/new-feature") is False
+    assert _is_public("/api/v2/something") is False
+    assert _is_public("/api/v1/auth/new-endpoint") is False
+
+
+# ── Middleware integration tests ──────────────────────────────────────────
+
+
+def _make_app():
+    """Create a minimal FastAPI app with AuthMiddleware for testing."""
+    from fastapi import FastAPI
+
+    app = FastAPI()
+    app.add_middleware(AuthMiddleware)
+
+    @app.get("/health")
+    async def health():
+        return {"status": "ok"}
+
+    @app.get("/api/v1/auth/me")
+    async def auth_me():
+        return {"id": "1", "email": "test@test.com"}
+
+    @app.get("/api/v1/auth/setup-status")
+    async def setup_status():
+        return {"needs_setup": False}
+
+    @app.get("/api/models")
+    async def models_get():
+        return {"models": []}
+
+    @app.put("/api/mcp/config")
+    async def mcp_put():
+        return {"ok": True}
+
+    @app.delete("/api/threads/abc")
+    async def thread_delete():
+        return {"ok": True}
+
+    @app.patch("/api/threads/abc")
+    async def thread_patch():
+        return {"ok": True}
+
+    @app.post("/api/threads/abc/runs/stream")
+    async def stream():
+        return {"ok": True}
+
+    @app.get("/api/future-endpoint")
+    async def future():
+        return {"ok": True}
+
+    return app
+
+
+@pytest.fixture
+def client():
+    return TestClient(_make_app())
+
+
+def test_public_path_no_cookie(client):
+    res = client.get("/health")
+    assert res.status_code == 200
+
+
+def test_public_auth_path_no_cookie(client):
+    """Public auth endpoints (login/register) pass without cookie."""
+    res = client.get("/api/v1/auth/setup-status")
+    assert res.status_code == 200
+
+
+def test_protected_auth_path_no_cookie(client):
+    """/auth/me requires cookie even though it's under /api/v1/auth/."""
+    res = client.get("/api/v1/auth/me")
+    assert res.status_code == 401
+
+
+def test_protected_path_no_cookie_returns_401(client):
+    res = client.get("/api/models")
+    assert res.status_code == 401
+    body = res.json()
+    assert body["detail"]["code"] == "not_authenticated"
+
+
+def test_protected_path_with_junk_cookie_rejected(client):
+    """Junk cookie → 401. Middleware strictly validates the JWT now
+    (AUTH_TEST_PLAN test 7.5.8); it no longer silently passes bad
+    tokens through to the route handler."""
+    res = client.get("/api/models", cookies={"access_token": "some-token"})
+    assert res.status_code == 401
+
+
+def test_protected_post_no_cookie_returns_401(client):
+    res = client.post("/api/threads/abc/runs/stream")
+    assert res.status_code == 401
+
+
+# ── Method matrix: PUT/DELETE/PATCH also protected ────────────────────────
+
+
+def test_protected_put_no_cookie(client):
+    res = client.put("/api/mcp/config")
+    assert res.status_code == 401
+
+
+def test_protected_delete_no_cookie(client):
+    res = client.delete("/api/threads/abc")
+    assert res.status_code == 401
+
+
+def test_protected_patch_no_cookie(client):
+    res = client.patch("/api/threads/abc")
+    assert res.status_code == 401
+
+
+def test_put_with_junk_cookie_rejected(client):
+    """Junk cookie on PUT → 401 (strict JWT validation in middleware)."""
+    client.cookies.set("access_token", "tok")
+    res = client.put("/api/mcp/config")
+    assert res.status_code == 401
+
+
+def test_delete_with_junk_cookie_rejected(client):
+    """Junk cookie on DELETE → 401 (strict JWT validation in middleware)."""
+    client.cookies.set("access_token", "tok")
+    res = client.delete("/api/threads/abc")
+    assert res.status_code == 401
+
+
+# ── Fail-closed: unknown future endpoints ─────────────────────────────────
+
+
+def test_unknown_endpoint_no_cookie_returns_401(client):
+    """Any new /api/* endpoint is blocked by default without cookie."""
+    res = client.get("/api/future-endpoint")
+    assert res.status_code == 401
+
+
+def test_unknown_endpoint_with_junk_cookie_rejected(client):
+    """New endpoints are also protected by strict JWT validation."""
+    client.cookies.set("access_token", "tok")
+    res = client.get("/api/future-endpoint")
+    assert res.status_code == 401
diff --git a/backend/tests/test_auth_type_system.py b/backend/tests/test_auth_type_system.py
new file mode 100644
index 000000000..226d3812c
--- /dev/null
+++ b/backend/tests/test_auth_type_system.py
@@ -0,0 +1,701 @@
+"""Tests for auth type system hardening.
+
+Covers structured error responses, typed decode_token callers,
+CSRF middleware path matching, config-driven cookie security,
+and unhappy paths / edge cases for all auth boundaries.
+"""
+
+import os
+import secrets
+from datetime import UTC, datetime, timedelta
+from unittest.mock import patch
+
+import jwt as pyjwt
+import pytest
+from fastapi import FastAPI
+from fastapi.testclient import TestClient
+from pydantic import ValidationError
+
+from app.gateway.auth.config import AuthConfig, set_auth_config
+from app.gateway.auth.errors import AuthErrorCode, AuthErrorResponse, TokenError
+from app.gateway.auth.jwt import decode_token
+from app.gateway.csrf_middleware import (
+    CSRF_COOKIE_NAME,
+    CSRF_HEADER_NAME,
+    CSRFMiddleware,
+    is_auth_endpoint,
+    should_check_csrf,
+)
+
+# ── Setup ────────────────────────────────────────────────────────────
+
+_TEST_SECRET = "test-secret-for-auth-type-system-tests-min32"
+
+
+@pytest.fixture(autouse=True)
+def _persistence_engine(tmp_path):
+    """Initialise a per-test SQLite engine + reset cached provider singletons.
+
+    The auth tests call real HTTP handlers that go through
+    ``SQLiteUserRepository`` → ``get_session_factory``. Each test gets
+    a fresh DB plus a clean ``deps._cached_*`` so the cached provider
+    does not hold a dangling reference to the previous test's engine.
+    """
+    import asyncio
+
+    from app.gateway import deps
+    from deerflow.persistence.engine import close_engine, init_engine
+
+    url = f"sqlite+aiosqlite:///{tmp_path}/auth_types.db"
+    asyncio.run(init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)))
+    deps._cached_local_provider = None
+    deps._cached_repo = None
+    try:
+        yield
+    finally:
+        deps._cached_local_provider = None
+        deps._cached_repo = None
+        asyncio.run(close_engine())
+
+
+def _setup_config():
+    set_auth_config(AuthConfig(jwt_secret=_TEST_SECRET))
+
+
+# ── CSRF Middleware Path Matching ────────────────────────────────────
+
+
+class _FakeRequest:
+    """Minimal request mock for CSRF path matching tests."""
+
+    def __init__(self, path: str, method: str = "POST"):
+        self.method = method
+
+        class _URL:
+            def __init__(self, p):
+                self.path = p
+
+        self.url = _URL(path)
+        self.cookies = {}
+        self.headers = {}
+
+
+def test_csrf_exempts_login_local():
+    """login/local (actual route) should be exempt from CSRF."""
+    req = _FakeRequest("/api/v1/auth/login/local")
+    assert is_auth_endpoint(req) is True
+
+
+def test_csrf_exempts_login_local_trailing_slash():
+    """Trailing slash should also be exempt."""
+    req = _FakeRequest("/api/v1/auth/login/local/")
+    assert is_auth_endpoint(req) is True
+
+
+def test_csrf_exempts_logout():
+    req = _FakeRequest("/api/v1/auth/logout")
+    assert is_auth_endpoint(req) is True
+
+
+def test_csrf_exempts_register():
+    req = _FakeRequest("/api/v1/auth/register")
+    assert is_auth_endpoint(req) is True
+
+
+def test_csrf_does_not_exempt_old_login_path():
+    """Old /api/v1/auth/login (without /local) should NOT be exempt."""
+    req = _FakeRequest("/api/v1/auth/login")
+    assert is_auth_endpoint(req) is False
+
+
+def test_csrf_does_not_exempt_me():
+    req = _FakeRequest("/api/v1/auth/me")
+    assert is_auth_endpoint(req) is False
+
+
+def test_csrf_skips_get_requests():
+    req = _FakeRequest("/api/v1/auth/me", method="GET")
+    assert should_check_csrf(req) is False
+
+
+def test_csrf_checks_post_to_protected():
+    req = _FakeRequest("/api/v1/some/endpoint", method="POST")
+    assert should_check_csrf(req) is True
+
+
+# ── Structured Error Response Format ────────────────────────────────
+
+
+def test_auth_error_response_has_code_and_message():
+    """All auth errors should have structured {code, message} format."""
+    err = AuthErrorResponse(
+        code=AuthErrorCode.INVALID_CREDENTIALS,
+        message="Wrong password",
+    )
+    d = err.model_dump()
+    assert "code" in d
+    assert "message" in d
+    assert d["code"] == "invalid_credentials"
+
+
+def test_auth_error_response_all_codes_serializable():
+    """Every AuthErrorCode should be serializable in AuthErrorResponse."""
+    for code in AuthErrorCode:
+        err = AuthErrorResponse(code=code, message=f"Test {code.value}")
+        d = err.model_dump()
+        assert d["code"] == code.value
+
+
+# ── decode_token Caller Pattern ──────────────────────────────────────
+
+
+def test_decode_token_expired_maps_to_token_expired_code():
+    """TokenError.EXPIRED should map to AuthErrorCode.TOKEN_EXPIRED."""
+    _setup_config()
+    from datetime import UTC, datetime, timedelta
+
+    import jwt as pyjwt
+
+    expired = {"sub": "u1", "exp": datetime.now(UTC) - timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(expired, _TEST_SECRET, algorithm="HS256")
+    result = decode_token(token)
+    assert result == TokenError.EXPIRED
+
+    # Verify the mapping pattern used in route handlers
+    code = AuthErrorCode.TOKEN_EXPIRED if result == TokenError.EXPIRED else AuthErrorCode.TOKEN_INVALID
+    assert code == AuthErrorCode.TOKEN_EXPIRED
+
+
+def test_decode_token_invalid_sig_maps_to_token_invalid_code():
+    """TokenError.INVALID_SIGNATURE should map to AuthErrorCode.TOKEN_INVALID."""
+    _setup_config()
+    from datetime import UTC, datetime, timedelta
+
+    import jwt as pyjwt
+
+    payload = {"sub": "u1", "exp": datetime.now(UTC) + timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(payload, "wrong-key", algorithm="HS256")
+    result = decode_token(token)
+    assert result == TokenError.INVALID_SIGNATURE
+
+    code = AuthErrorCode.TOKEN_EXPIRED if result == TokenError.EXPIRED else AuthErrorCode.TOKEN_INVALID
+    assert code == AuthErrorCode.TOKEN_INVALID
+
+
+def test_decode_token_malformed_maps_to_token_invalid_code():
+    """TokenError.MALFORMED should map to AuthErrorCode.TOKEN_INVALID."""
+    _setup_config()
+    result = decode_token("garbage")
+    assert result == TokenError.MALFORMED
+
+    code = AuthErrorCode.TOKEN_EXPIRED if result == TokenError.EXPIRED else AuthErrorCode.TOKEN_INVALID
+    assert code == AuthErrorCode.TOKEN_INVALID
+
+
+# ── Login Response Format ────────────────────────────────────────────
+
+
+def test_login_response_model_has_no_access_token():
+    """LoginResponse should NOT contain access_token field (RFC-001)."""
+    from app.gateway.routers.auth import LoginResponse
+
+    resp = LoginResponse(expires_in=604800)
+    d = resp.model_dump()
+    assert "access_token" not in d
+    assert "expires_in" in d
+    assert d["expires_in"] == 604800
+
+
+def test_login_response_model_fields():
+    """LoginResponse has expires_in and needs_setup."""
+    from app.gateway.routers.auth import LoginResponse
+
+    fields = set(LoginResponse.model_fields.keys())
+    assert fields == {"expires_in", "needs_setup"}
+
+
+# ── AuthConfig in Route ──────────────────────────────────────────────
+
+
+def test_auth_config_token_expiry_used_in_login_response():
+    """LoginResponse.expires_in should come from config.token_expiry_days."""
+    from app.gateway.routers.auth import LoginResponse
+
+    expected_seconds = 14 * 24 * 3600
+    resp = LoginResponse(expires_in=expected_seconds)
+    assert resp.expires_in == expected_seconds
+
+
+# ── UserResponse Type Preservation ───────────────────────────────────
+
+
+def test_user_response_system_role_literal():
+    """UserResponse.system_role should only accept 'admin' or 'user'."""
+    from app.gateway.auth.models import UserResponse
+
+    # Valid roles
+    resp = UserResponse(id="1", email="a@b.com", system_role="admin")
+    assert resp.system_role == "admin"
+
+    resp = UserResponse(id="1", email="a@b.com", system_role="user")
+    assert resp.system_role == "user"
+
+
+def test_user_response_rejects_invalid_role():
+    """UserResponse should reject invalid system_role values."""
+    from app.gateway.auth.models import UserResponse
+
+    with pytest.raises(ValidationError):
+        UserResponse(id="1", email="a@b.com", system_role="superadmin")
+
+
+# ══════════════════════════════════════════════════════════════════════
+# UNHAPPY PATHS / EDGE CASES
+# ══════════════════════════════════════════════════════════════════════
+
+
+# ── get_current_user structured 401 responses ────────────────────────
+
+
+def test_get_current_user_no_cookie_returns_not_authenticated():
+    """No cookie → 401 with code=not_authenticated."""
+    import asyncio
+
+    from fastapi import HTTPException
+
+    from app.gateway.deps import get_current_user_from_request
+
+    mock_request = type("MockRequest", (), {"cookies": {}})()
+    with pytest.raises(HTTPException) as exc_info:
+        asyncio.run(get_current_user_from_request(mock_request))
+    assert exc_info.value.status_code == 401
+    detail = exc_info.value.detail
+    assert detail["code"] == "not_authenticated"
+
+
+def test_get_current_user_expired_token_returns_token_expired():
+    """Expired token → 401 with code=token_expired."""
+    import asyncio
+
+    from fastapi import HTTPException
+
+    from app.gateway.deps import get_current_user_from_request
+
+    _setup_config()
+    expired = {"sub": "u1", "exp": datetime.now(UTC) - timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(expired, _TEST_SECRET, algorithm="HS256")
+
+    mock_request = type("MockRequest", (), {"cookies": {"access_token": token}})()
+    with pytest.raises(HTTPException) as exc_info:
+        asyncio.run(get_current_user_from_request(mock_request))
+    assert exc_info.value.status_code == 401
+    detail = exc_info.value.detail
+    assert detail["code"] == "token_expired"
+
+
+def test_get_current_user_invalid_token_returns_token_invalid():
+    """Bad signature → 401 with code=token_invalid."""
+    import asyncio
+
+    from fastapi import HTTPException
+
+    from app.gateway.deps import get_current_user_from_request
+
+    _setup_config()
+    payload = {"sub": "u1", "exp": datetime.now(UTC) + timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(payload, "wrong-secret", algorithm="HS256")
+
+    mock_request = type("MockRequest", (), {"cookies": {"access_token": token}})()
+    with pytest.raises(HTTPException) as exc_info:
+        asyncio.run(get_current_user_from_request(mock_request))
+    assert exc_info.value.status_code == 401
+    detail = exc_info.value.detail
+    assert detail["code"] == "token_invalid"
+
+
+def test_get_current_user_malformed_token_returns_token_invalid():
+    """Garbage token → 401 with code=token_invalid."""
+    import asyncio
+
+    from fastapi import HTTPException
+
+    from app.gateway.deps import get_current_user_from_request
+
+    _setup_config()
+    mock_request = type("MockRequest", (), {"cookies": {"access_token": "not-a-jwt"}})()
+    with pytest.raises(HTTPException) as exc_info:
+        asyncio.run(get_current_user_from_request(mock_request))
+    assert exc_info.value.status_code == 401
+    detail = exc_info.value.detail
+    assert detail["code"] == "token_invalid"
+
+
+# ── decode_token edge cases ──────────────────────────────────────────
+
+
+def test_decode_token_empty_string_returns_malformed():
+    _setup_config()
+    result = decode_token("")
+    assert result == TokenError.MALFORMED
+
+
+def test_decode_token_whitespace_returns_malformed():
+    _setup_config()
+    result = decode_token("   ")
+    assert result == TokenError.MALFORMED
+
+
+# ── AuthConfig validation edge cases ─────────────────────────────────
+
+
+def test_auth_config_missing_jwt_secret_raises():
+    """AuthConfig requires jwt_secret — no default allowed."""
+    with pytest.raises(ValidationError):
+        AuthConfig()
+
+
+def test_auth_config_token_expiry_zero_raises():
+    """token_expiry_days must be >= 1."""
+    with pytest.raises(ValidationError):
+        AuthConfig(jwt_secret="secret", token_expiry_days=0)
+
+
+def test_auth_config_token_expiry_31_raises():
+    """token_expiry_days must be <= 30."""
+    with pytest.raises(ValidationError):
+        AuthConfig(jwt_secret="secret", token_expiry_days=31)
+
+
+def test_auth_config_token_expiry_boundary_1_ok():
+    config = AuthConfig(jwt_secret="secret", token_expiry_days=1)
+    assert config.token_expiry_days == 1
+
+
+def test_auth_config_token_expiry_boundary_30_ok():
+    config = AuthConfig(jwt_secret="secret", token_expiry_days=30)
+    assert config.token_expiry_days == 30
+
+
+def test_get_auth_config_missing_env_var_generates_ephemeral(caplog):
+    """get_auth_config() auto-generates ephemeral secret when AUTH_JWT_SECRET is unset."""
+    import logging
+
+    import app.gateway.auth.config as cfg
+
+    old = cfg._auth_config
+    cfg._auth_config = None
+    try:
+        with patch.dict(os.environ, {}, clear=True):
+            os.environ.pop("AUTH_JWT_SECRET", None)
+            with caplog.at_level(logging.WARNING):
+                config = cfg.get_auth_config()
+            assert config.jwt_secret
+            assert any("AUTH_JWT_SECRET" in msg for msg in caplog.messages)
+    finally:
+        cfg._auth_config = old
+
+
+# ── CSRF middleware integration (unhappy paths) ──────────────────────
+
+
+def _make_csrf_app():
+    """Create a minimal FastAPI app with CSRFMiddleware for testing."""
+    from fastapi import HTTPException as _HTTPException
+    from fastapi.responses import JSONResponse as _JSONResponse
+
+    app = FastAPI()
+
+    @app.exception_handler(_HTTPException)
+    async def _http_exc_handler(request, exc):
+        return _JSONResponse(status_code=exc.status_code, content={"detail": exc.detail})
+
+    app.add_middleware(CSRFMiddleware)
+
+    @app.post("/api/v1/test/protected")
+    async def protected():
+        return {"ok": True}
+
+    @app.post("/api/v1/auth/login/local")
+    async def login():
+        return {"ok": True}
+
+    @app.get("/api/v1/test/read")
+    async def read_endpoint():
+        return {"ok": True}
+
+    return app
+
+
+def test_csrf_middleware_blocks_post_without_token():
+    """POST to protected endpoint without CSRF token → 403 with structured detail."""
+    client = TestClient(_make_csrf_app())
+    resp = client.post("/api/v1/test/protected")
+    assert resp.status_code == 403
+    assert "CSRF" in resp.json()["detail"]
+    assert "missing" in resp.json()["detail"].lower()
+
+
+def test_csrf_middleware_blocks_post_with_mismatched_token():
+    """POST with mismatched CSRF cookie/header → 403 with mismatch detail."""
+    client = TestClient(_make_csrf_app())
+    client.cookies.set(CSRF_COOKIE_NAME, "token-a")
+    resp = client.post(
+        "/api/v1/test/protected",
+        headers={CSRF_HEADER_NAME: "token-b"},
+    )
+    assert resp.status_code == 403
+    assert "mismatch" in resp.json()["detail"].lower()
+
+
+def test_csrf_middleware_allows_post_with_matching_token():
+    """POST with matching CSRF cookie/header → 200."""
+    client = TestClient(_make_csrf_app())
+    token = secrets.token_urlsafe(64)
+    client.cookies.set(CSRF_COOKIE_NAME, token)
+    resp = client.post(
+        "/api/v1/test/protected",
+        headers={CSRF_HEADER_NAME: token},
+    )
+    assert resp.status_code == 200
+
+
+def test_csrf_middleware_allows_get_without_token():
+    """GET requests bypass CSRF check."""
+    client = TestClient(_make_csrf_app())
+    resp = client.get("/api/v1/test/read")
+    assert resp.status_code == 200
+
+
+def test_csrf_middleware_exempts_login_local():
+    """POST to login/local is exempt from CSRF (no token yet)."""
+    client = TestClient(_make_csrf_app())
+    resp = client.post("/api/v1/auth/login/local")
+    assert resp.status_code == 200
+
+
+def test_csrf_middleware_sets_cookie_on_auth_endpoint():
+    """Auth endpoints should receive a CSRF cookie in response."""
+    client = TestClient(_make_csrf_app())
+    resp = client.post("/api/v1/auth/login/local")
+    assert CSRF_COOKIE_NAME in resp.cookies
+
+
+# ── UserResponse edge cases ──────────────────────────────────────────
+
+
+def test_user_response_missing_required_fields():
+    """UserResponse with missing fields → ValidationError."""
+    from app.gateway.auth.models import UserResponse
+
+    with pytest.raises(ValidationError):
+        UserResponse(id="1")  # missing email, system_role
+
+    with pytest.raises(ValidationError):
+        UserResponse(id="1", email="a@b.com")  # missing system_role
+
+
+def test_user_response_empty_string_role_rejected():
+    """Empty string is not a valid role."""
+    from app.gateway.auth.models import UserResponse
+
+    with pytest.raises(ValidationError):
+        UserResponse(id="1", email="a@b.com", system_role="")
+
+
+# ══════════════════════════════════════════════════════════════════════
+# HTTP-LEVEL API CONTRACT TESTS
+# ══════════════════════════════════════════════════════════════════════
+
+
+def _make_auth_app():
+    """Create FastAPI app with auth routes for contract testing."""
+    from app.gateway.app import create_app
+
+    return create_app()
+
+
+def _get_auth_client():
+    """Get TestClient for auth API contract tests."""
+    return TestClient(_make_auth_app())
+
+
+def test_api_auth_me_no_cookie_returns_structured_401():
+    """/api/v1/auth/me without cookie → 401 with {code: 'not_authenticated'}."""
+    _setup_config()
+    client = _get_auth_client()
+    resp = client.get("/api/v1/auth/me")
+    assert resp.status_code == 401
+    body = resp.json()
+    assert body["detail"]["code"] == "not_authenticated"
+    assert "message" in body["detail"]
+
+
+def test_api_auth_me_expired_token_returns_structured_401():
+    """/api/v1/auth/me with expired token → 401 with {code: 'token_expired'}."""
+    _setup_config()
+    expired = {"sub": "u1", "exp": datetime.now(UTC) - timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(expired, _TEST_SECRET, algorithm="HS256")
+
+    client = _get_auth_client()
+    client.cookies.set("access_token", token)
+    resp = client.get("/api/v1/auth/me")
+    assert resp.status_code == 401
+    body = resp.json()
+    assert body["detail"]["code"] == "token_expired"
+
+
+def test_api_auth_me_invalid_sig_returns_structured_401():
+    """/api/v1/auth/me with bad signature → 401 with {code: 'token_invalid'}."""
+    _setup_config()
+    payload = {"sub": "u1", "exp": datetime.now(UTC) + timedelta(hours=1), "iat": datetime.now(UTC)}
+    token = pyjwt.encode(payload, "wrong-key", algorithm="HS256")
+
+    client = _get_auth_client()
+    client.cookies.set("access_token", token)
+    resp = client.get("/api/v1/auth/me")
+    assert resp.status_code == 401
+    body = resp.json()
+    assert body["detail"]["code"] == "token_invalid"
+
+
+def test_api_login_bad_credentials_returns_structured_401():
+    """Login with wrong password → 401 with {code: 'invalid_credentials'}."""
+    _setup_config()
+    client = _get_auth_client()
+    resp = client.post(
+        "/api/v1/auth/login/local",
+        data={"username": "nonexistent@test.com", "password": "wrongpassword"},
+    )
+    assert resp.status_code == 401
+    body = resp.json()
+    assert body["detail"]["code"] == "invalid_credentials"
+
+
+def test_api_login_success_no_token_in_body():
+    """Successful login → response body has expires_in but NOT access_token."""
+    _setup_config()
+    client = _get_auth_client()
+    # Register first
+    client.post(
+        "/api/v1/auth/register",
+        json={"email": "contract-test@test.com", "password": "securepassword123"},
+    )
+    # Login
+    resp = client.post(
+        "/api/v1/auth/login/local",
+        data={"username": "contract-test@test.com", "password": "securepassword123"},
+    )
+    assert resp.status_code == 200
+    body = resp.json()
+    assert "expires_in" in body
+    assert "access_token" not in body
+    # Token should be in cookie, not body
+    assert "access_token" in resp.cookies
+
+
+def test_api_register_duplicate_returns_structured_400():
+    """Register with duplicate email → 400 with {code: 'email_already_exists'}."""
+    _setup_config()
+    client = _get_auth_client()
+    email = "dup-contract-test@test.com"
+    # First register
+    client.post("/api/v1/auth/register", json={"email": email, "password": "Tr0ub4dor3a"})
+    # Duplicate
+    resp = client.post("/api/v1/auth/register", json={"email": email, "password": "AnotherStr0ngPwd!"})
+    assert resp.status_code == 400
+    body = resp.json()
+    assert body["detail"]["code"] == "email_already_exists"
+
+
+# ── Cookie security: HTTP vs HTTPS ────────────────────────────────────
+
+
+def _unique_email(prefix: str) -> str:
+    return f"{prefix}-{secrets.token_hex(4)}@test.com"
+
+
+def _get_set_cookie_headers(resp) -> list[str]:
+    """Extract all set-cookie header values from a TestClient response."""
+    return [v for k, v in resp.headers.multi_items() if k.lower() == "set-cookie"]
+
+
+def test_register_http_cookie_httponly_true_secure_false():
+    """HTTP register → access_token cookie is httponly=True, secure=False, no max_age."""
+    _setup_config()
+    client = _get_auth_client()
+    resp = client.post(
+        "/api/v1/auth/register",
+        json={"email": _unique_email("http-cookie"), "password": "Tr0ub4dor3a"},
+    )
+    assert resp.status_code == 201
+    cookie_header = resp.headers.get("set-cookie", "")
+    assert "access_token=" in cookie_header
+    assert "httponly" in cookie_header.lower()
+    assert "secure" not in cookie_header.lower().replace("samesite", "")
+
+
+def test_register_https_cookie_httponly_true_secure_true():
+    """HTTPS register (x-forwarded-proto) → access_token cookie is httponly=True, secure=True, has max_age."""
+    _setup_config()
+    client = _get_auth_client()
+    resp = client.post(
+        "/api/v1/auth/register",
+        json={"email": _unique_email("https-cookie"), "password": "Tr0ub4dor3a"},
+        headers={"x-forwarded-proto": "https"},
+    )
+    assert resp.status_code == 201
+    cookie_header = resp.headers.get("set-cookie", "")
+    assert "access_token=" in cookie_header
+    assert "httponly" in cookie_header.lower()
+    assert "secure" in cookie_header.lower()
+    assert "max-age" in cookie_header.lower()
+
+
+def test_login_https_sets_secure_cookie():
+    """HTTPS login → access_token cookie has secure flag."""
+    _setup_config()
+    client = _get_auth_client()
+    email = _unique_email("https-login")
+    client.post("/api/v1/auth/register", json={"email": email, "password": "Tr0ub4dor3a"})
+    resp = client.post(
+        "/api/v1/auth/login/local",
+        data={"username": email, "password": "Tr0ub4dor3a"},
+        headers={"x-forwarded-proto": "https"},
+    )
+    assert resp.status_code == 200
+    cookie_header = resp.headers.get("set-cookie", "")
+    assert "access_token=" in cookie_header
+    assert "httponly" in cookie_header.lower()
+    assert "secure" in cookie_header.lower()
+
+
+def test_csrf_cookie_secure_on_https():
+    """HTTPS register → csrf_token cookie has secure flag but NOT httponly."""
+    _setup_config()
+    client = _get_auth_client()
+    resp = client.post(
+        "/api/v1/auth/register",
+        json={"email": _unique_email("csrf-https"), "password": "Tr0ub4dor3a"},
+        headers={"x-forwarded-proto": "https"},
+    )
+    assert resp.status_code == 201
+    csrf_cookies = [h for h in _get_set_cookie_headers(resp) if "csrf_token=" in h]
+    assert csrf_cookies, "csrf_token cookie not set on HTTPS register"
+    csrf_header = csrf_cookies[0]
+    assert "secure" in csrf_header.lower()
+    assert "httponly" not in csrf_header.lower()
+
+
+def test_csrf_cookie_not_secure_on_http():
+    """HTTP register → csrf_token cookie does NOT have secure flag."""
+    _setup_config()
+    client = _get_auth_client()
+    resp = client.post(
+        "/api/v1/auth/register",
+        json={"email": _unique_email("csrf-http"), "password": "Tr0ub4dor3a"},
+    )
+    assert resp.status_code == 201
+    csrf_cookies = [h for h in _get_set_cookie_headers(resp) if "csrf_token=" in h]
+    assert csrf_cookies, "csrf_token cookie not set on HTTP register"
+    csrf_header = csrf_cookies[0]
+    assert "secure" not in csrf_header.lower().replace("samesite", "")
diff --git a/backend/tests/test_channel_file_attachments.py b/backend/tests/test_channel_file_attachments.py
index 2843a9cd0..7273b1c82 100644
--- a/backend/tests/test_channel_file_attachments.py
+++ b/backend/tests/test_channel_file_attachments.py
@@ -231,7 +231,7 @@ class TestResolveAttachments:
         mock_paths = MagicMock()
         mock_paths.sandbox_outputs_dir.return_value = outputs_dir
 
-        def resolve_side_effect(tid, vpath):
+        def resolve_side_effect(tid, vpath, *, user_id=None):
             if "data.csv" in vpath:
                 return good_file
             return tmp_path / "missing.txt"
diff --git a/backend/tests/test_checkpointer.py b/backend/tests/test_checkpointer.py
index 44db0e2d1..7733f43e0 100644
--- a/backend/tests/test_checkpointer.py
+++ b/backend/tests/test_checkpointer.py
@@ -5,25 +5,21 @@ from unittest.mock import AsyncMock, MagicMock, patch
 
 import pytest
 
-import deerflow.config.app_config as app_config_module
-from deerflow.agents.checkpointer import get_checkpointer, reset_checkpointer
-from deerflow.config.checkpointer_config import (
-    CheckpointerConfig,
-    get_checkpointer_config,
-    load_checkpointer_config_from_dict,
-    set_checkpointer_config,
-)
+from deerflow.config.app_config import AppConfig
+from deerflow.config.checkpointer_config import CheckpointerConfig
+from deerflow.config.sandbox_config import SandboxConfig
+from deerflow.runtime.checkpointer import get_checkpointer, reset_checkpointer
+
+
+def _make_config(checkpointer: CheckpointerConfig | None = None) -> AppConfig:
+    return AppConfig(sandbox=SandboxConfig(use="test"), checkpointer=checkpointer)
 
 
 @pytest.fixture(autouse=True)
 def reset_state():
     """Reset singleton state before each test."""
-    app_config_module._app_config = None
-    set_checkpointer_config(None)
     reset_checkpointer()
     yield
-    app_config_module._app_config = None
-    set_checkpointer_config(None)
     reset_checkpointer()
 
 
@@ -33,24 +29,18 @@ def reset_state():
 
 
 class TestCheckpointerConfig:
-    def test_load_memory_config(self):
-        load_checkpointer_config_from_dict({"type": "memory"})
-        config = get_checkpointer_config()
-        assert config is not None
+    def test_memory_config(self):
+        config = CheckpointerConfig(type="memory")
         assert config.type == "memory"
         assert config.connection_string is None
 
-    def test_load_sqlite_config(self):
-        load_checkpointer_config_from_dict({"type": "sqlite", "connection_string": "/tmp/test.db"})
-        config = get_checkpointer_config()
-        assert config is not None
+    def test_sqlite_config(self):
+        config = CheckpointerConfig(type="sqlite", connection_string="/tmp/test.db")
         assert config.type == "sqlite"
         assert config.connection_string == "/tmp/test.db"
 
-    def test_load_postgres_config(self):
-        load_checkpointer_config_from_dict({"type": "postgres", "connection_string": "postgresql://localhost/db"})
-        config = get_checkpointer_config()
-        assert config is not None
+    def test_postgres_config(self):
+        config = CheckpointerConfig(type="postgres", connection_string="postgresql://localhost/db")
         assert config.type == "postgres"
         assert config.connection_string == "postgresql://localhost/db"
 
@@ -58,14 +48,9 @@ class TestCheckpointerConfig:
         config = CheckpointerConfig(type="memory")
         assert config.connection_string is None
 
-    def test_set_config_to_none(self):
-        load_checkpointer_config_from_dict({"type": "memory"})
-        set_checkpointer_config(None)
-        assert get_checkpointer_config() is None
-
     def test_invalid_type_raises(self):
         with pytest.raises(Exception):
-            load_checkpointer_config_from_dict({"type": "unknown"})
+            CheckpointerConfig(type="unknown")
 
 
 # ---------------------------------------------------------------------------
@@ -78,58 +63,58 @@ class TestGetCheckpointer:
         """get_checkpointer should return InMemorySaver when not configured."""
         from langgraph.checkpoint.memory import InMemorySaver
 
-        with patch("deerflow.agents.checkpointer.provider.get_app_config", side_effect=FileNotFoundError):
-            cp = get_checkpointer()
+        cfg = _make_config()
+        cp = get_checkpointer(cfg)
         assert cp is not None
         assert isinstance(cp, InMemorySaver)
 
     def test_memory_returns_in_memory_saver(self):
-        load_checkpointer_config_from_dict({"type": "memory"})
         from langgraph.checkpoint.memory import InMemorySaver
 
-        cp = get_checkpointer()
+        cfg = _make_config(CheckpointerConfig(type="memory"))
+        cp = get_checkpointer(cfg)
         assert isinstance(cp, InMemorySaver)
 
     def test_memory_singleton(self):
-        load_checkpointer_config_from_dict({"type": "memory"})
-        cp1 = get_checkpointer()
-        cp2 = get_checkpointer()
+        cfg = _make_config(CheckpointerConfig(type="memory"))
+        cp1 = get_checkpointer(cfg)
+        cp2 = get_checkpointer(cfg)
         assert cp1 is cp2
 
     def test_reset_clears_singleton(self):
-        load_checkpointer_config_from_dict({"type": "memory"})
-        cp1 = get_checkpointer()
+        cfg = _make_config(CheckpointerConfig(type="memory"))
+        cp1 = get_checkpointer(cfg)
         reset_checkpointer()
-        cp2 = get_checkpointer()
+        cp2 = get_checkpointer(cfg)
         assert cp1 is not cp2
 
     def test_sqlite_raises_when_package_missing(self):
-        load_checkpointer_config_from_dict({"type": "sqlite", "connection_string": "/tmp/test.db"})
+        cfg = _make_config(CheckpointerConfig(type="sqlite", connection_string="/tmp/test.db"))
         with patch.dict(sys.modules, {"langgraph.checkpoint.sqlite": None}):
             reset_checkpointer()
             with pytest.raises(ImportError, match="langgraph-checkpoint-sqlite"):
-                get_checkpointer()
+                get_checkpointer(cfg)
 
     def test_postgres_raises_when_package_missing(self):
-        load_checkpointer_config_from_dict({"type": "postgres", "connection_string": "postgresql://localhost/db"})
+        cfg = _make_config(CheckpointerConfig(type="postgres", connection_string="postgresql://localhost/db"))
         with patch.dict(sys.modules, {"langgraph.checkpoint.postgres": None}):
             reset_checkpointer()
             with pytest.raises(ImportError, match="langgraph-checkpoint-postgres"):
-                get_checkpointer()
+                get_checkpointer(cfg)
 
     def test_postgres_raises_when_connection_string_missing(self):
-        load_checkpointer_config_from_dict({"type": "postgres"})
+        cfg = _make_config(CheckpointerConfig(type="postgres"))
         mock_saver = MagicMock()
         mock_module = MagicMock()
         mock_module.PostgresSaver = mock_saver
         with patch.dict(sys.modules, {"langgraph.checkpoint.postgres": mock_module}):
             reset_checkpointer()
             with pytest.raises(ValueError, match="connection_string is required"):
-                get_checkpointer()
+                get_checkpointer(cfg)
 
     def test_sqlite_creates_saver(self):
         """SQLite checkpointer is created when package is available."""
-        load_checkpointer_config_from_dict({"type": "sqlite", "connection_string": "/tmp/test.db"})
+        cfg = _make_config(CheckpointerConfig(type="sqlite", connection_string="/tmp/test.db"))
 
         mock_saver_instance = MagicMock()
         mock_cm = MagicMock()
@@ -144,7 +129,7 @@ class TestGetCheckpointer:
 
         with patch.dict(sys.modules, {"langgraph.checkpoint.sqlite": mock_module}):
             reset_checkpointer()
-            cp = get_checkpointer()
+            cp = get_checkpointer(cfg)
 
         assert cp is mock_saver_instance
         mock_saver_cls.from_conn_string.assert_called_once()
@@ -225,7 +210,7 @@ class TestGetCheckpointer:
 
     def test_postgres_creates_saver(self):
         """Postgres checkpointer is created when packages are available."""
-        load_checkpointer_config_from_dict({"type": "postgres", "connection_string": "postgresql://localhost/db"})
+        cfg = _make_config(CheckpointerConfig(type="postgres", connection_string="postgresql://localhost/db"))
 
         mock_saver_instance = MagicMock()
         mock_cm = MagicMock()
@@ -240,7 +225,7 @@ class TestGetCheckpointer:
 
         with patch.dict(sys.modules, {"langgraph.checkpoint.postgres": mock_pg_module}):
             reset_checkpointer()
-            cp = get_checkpointer()
+            cp = get_checkpointer(cfg)
 
         assert cp is mock_saver_instance
         mock_saver_cls.from_conn_string.assert_called_once_with("postgresql://localhost/db")
@@ -251,7 +236,7 @@ class TestAsyncCheckpointer:
     @pytest.mark.anyio
     async def test_sqlite_creates_parent_dir_via_to_thread(self):
         """Async SQLite setup should move mkdir off the event loop."""
-        from deerflow.agents.checkpointer.async_provider import make_checkpointer
+        from deerflow.runtime.checkpointer.async_provider import make_checkpointer
 
         mock_config = MagicMock()
         mock_config.checkpointer = CheckpointerConfig(type="sqlite", connection_string="relative/test.db")
@@ -268,15 +253,14 @@ class TestAsyncCheckpointer:
         mock_module.AsyncSqliteSaver = mock_saver_cls
 
         with (
-            patch("deerflow.agents.checkpointer.async_provider.get_app_config", return_value=mock_config),
             patch.dict(sys.modules, {"langgraph.checkpoint.sqlite.aio": mock_module}),
-            patch("deerflow.agents.checkpointer.async_provider.asyncio.to_thread", new_callable=AsyncMock) as mock_to_thread,
+            patch("deerflow.runtime.checkpointer.async_provider.asyncio.to_thread", new_callable=AsyncMock) as mock_to_thread,
             patch(
-                "deerflow.agents.checkpointer.async_provider.resolve_sqlite_conn_str",
+                "deerflow.runtime.checkpointer.async_provider.resolve_sqlite_conn_str",
                 return_value="/tmp/resolved/test.db",
             ),
         ):
-            async with make_checkpointer() as saver:
+            async with make_checkpointer(mock_config) as saver:
                 assert saver is mock_saver
 
         mock_to_thread.assert_awaited_once()
@@ -294,12 +278,10 @@ class TestAsyncCheckpointer:
 
 class TestAppConfigLoadsCheckpointer:
     def test_load_checkpointer_section(self):
-        """load_checkpointer_config_from_dict populates the global config."""
-        set_checkpointer_config(None)
-        load_checkpointer_config_from_dict({"type": "memory"})
-        cfg = get_checkpointer_config()
-        assert cfg is not None
-        assert cfg.type == "memory"
+        """AppConfig with checkpointer section has the correct config."""
+        cfg = _make_config(CheckpointerConfig(type="memory"))
+        assert cfg.checkpointer is not None
+        assert cfg.checkpointer.type == "memory"
 
 
 # ---------------------------------------------------------------------------
@@ -309,69 +291,7 @@ class TestAppConfigLoadsCheckpointer:
 
 class TestClientCheckpointerFallback:
     def test_client_uses_config_checkpointer_when_none_provided(self):
-        """DeerFlowClient._ensure_agent falls back to get_checkpointer() when checkpointer=None."""
-        from langgraph.checkpoint.memory import InMemorySaver
-
-        from deerflow.client import DeerFlowClient
-
-        load_checkpointer_config_from_dict({"type": "memory"})
-
-        captured_kwargs = {}
-
-        def fake_create_agent(**kwargs):
-            captured_kwargs.update(kwargs)
-            return MagicMock()
-
-        model_mock = MagicMock()
-        config_mock = MagicMock()
-        config_mock.models = [model_mock]
-        config_mock.get_model_config.return_value = MagicMock(supports_vision=False)
-        config_mock.checkpointer = None
-
-        with (
-            patch("deerflow.client.get_app_config", return_value=config_mock),
-            patch("deerflow.client.create_agent", side_effect=fake_create_agent),
-            patch("deerflow.client.create_chat_model", return_value=MagicMock()),
-            patch("deerflow.client._build_middlewares", return_value=[]),
-            patch("deerflow.client.apply_prompt_template", return_value=""),
-            patch("deerflow.client.DeerFlowClient._get_tools", return_value=[]),
-        ):
-            client = DeerFlowClient(checkpointer=None)
-            config = client._get_runnable_config("test-thread")
-            client._ensure_agent(config)
-
-        assert "checkpointer" in captured_kwargs
-        assert isinstance(captured_kwargs["checkpointer"], InMemorySaver)
-
-    def test_client_explicit_checkpointer_takes_precedence(self):
-        """An explicitly provided checkpointer is used even when config checkpointer is set."""
-        from deerflow.client import DeerFlowClient
-
-        load_checkpointer_config_from_dict({"type": "memory"})
-
-        explicit_cp = MagicMock()
-        captured_kwargs = {}
-
-        def fake_create_agent(**kwargs):
-            captured_kwargs.update(kwargs)
-            return MagicMock()
-
-        model_mock = MagicMock()
-        config_mock = MagicMock()
-        config_mock.models = [model_mock]
-        config_mock.get_model_config.return_value = MagicMock(supports_vision=False)
-        config_mock.checkpointer = None
-
-        with (
-            patch("deerflow.client.get_app_config", return_value=config_mock),
-            patch("deerflow.client.create_agent", side_effect=fake_create_agent),
-            patch("deerflow.client.create_chat_model", return_value=MagicMock()),
-            patch("deerflow.client._build_middlewares", return_value=[]),
-            patch("deerflow.client.apply_prompt_template", return_value=""),
-            patch("deerflow.client.DeerFlowClient._get_tools", return_value=[]),
-        ):
-            client = DeerFlowClient(checkpointer=explicit_cp)
-            config = client._get_runnable_config("test-thread")
-            client._ensure_agent(config)
-
-        assert captured_kwargs["checkpointer"] is explicit_cp
+        """DeerFlowClient._ensure_agent falls back to get_checkpointer(app_config) when checkpointer=None."""
+        # This is a structural test — verifying the fallback path exists.
+        cfg = _make_config(CheckpointerConfig(type="memory"))
+        assert cfg.checkpointer is not None
diff --git a/backend/tests/test_checkpointer_none_fix.py b/backend/tests/test_checkpointer_none_fix.py
index 4e128adbc..3b8c81b08 100644
--- a/backend/tests/test_checkpointer_none_fix.py
+++ b/backend/tests/test_checkpointer_none_fix.py
@@ -1,6 +1,6 @@
 """Test for issue #1016: checkpointer should not return None."""
 
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock
 
 import pytest
 from langgraph.checkpoint.memory import InMemorySaver
@@ -12,43 +12,40 @@ class TestCheckpointerNoneFix:
     @pytest.mark.anyio
     async def test_async_make_checkpointer_returns_in_memory_saver_when_not_configured(self):
         """make_checkpointer should return InMemorySaver when config.checkpointer is None."""
-        from deerflow.agents.checkpointer.async_provider import make_checkpointer
+        from deerflow.runtime.checkpointer.async_provider import make_checkpointer
 
-        # Mock get_app_config to return a config with checkpointer=None
         mock_config = MagicMock()
         mock_config.checkpointer = None
+        mock_config.database = None
 
-        with patch("deerflow.agents.checkpointer.async_provider.get_app_config", return_value=mock_config):
-            async with make_checkpointer() as checkpointer:
-                # Should return InMemorySaver, not None
-                assert checkpointer is not None
-                assert isinstance(checkpointer, InMemorySaver)
+        async with make_checkpointer(mock_config) as checkpointer:
+            # Should return InMemorySaver, not None
+            assert checkpointer is not None
+            assert isinstance(checkpointer, InMemorySaver)
 
-                # Should be able to call alist() without AttributeError
-                # This is what LangGraph does and what was failing in issue #1016
-                result = []
-                async for item in checkpointer.alist(config={"configurable": {"thread_id": "test"}}):
-                    result.append(item)
+            # Should be able to call alist() without AttributeError
+            # This is what LangGraph does and what was failing in issue #1016
+            result = []
+            async for item in checkpointer.alist(config={"configurable": {"thread_id": "test"}}):
+                result.append(item)
 
-                # Empty list is expected for a fresh checkpointer
-                assert result == []
+            # Empty list is expected for a fresh checkpointer
+            assert result == []
 
     def test_sync_checkpointer_context_returns_in_memory_saver_when_not_configured(self):
         """checkpointer_context should return InMemorySaver when config.checkpointer is None."""
-        from deerflow.agents.checkpointer.provider import checkpointer_context
+        from deerflow.runtime.checkpointer.provider import checkpointer_context
 
-        # Mock get_app_config to return a config with checkpointer=None
         mock_config = MagicMock()
         mock_config.checkpointer = None
 
-        with patch("deerflow.agents.checkpointer.provider.get_app_config", return_value=mock_config):
-            with checkpointer_context() as checkpointer:
-                # Should return InMemorySaver, not None
-                assert checkpointer is not None
-                assert isinstance(checkpointer, InMemorySaver)
+        with checkpointer_context(mock_config) as checkpointer:
+            # Should return InMemorySaver, not None
+            assert checkpointer is not None
+            assert isinstance(checkpointer, InMemorySaver)
 
-                # Should be able to call list() without AttributeError
-                result = list(checkpointer.list(config={"configurable": {"thread_id": "test"}}))
+            # Should be able to call list() without AttributeError
+            result = list(checkpointer.list(config={"configurable": {"thread_id": "test"}}))
 
-                # Empty list is expected for a fresh checkpointer
-                assert result == []
+            # Empty list is expected for a fresh checkpointer
+            assert result == []
diff --git a/backend/tests/test_client.py b/backend/tests/test_client.py
index 14b52d077..40e73e827 100644
--- a/backend/tests/test_client.py
+++ b/backend/tests/test_client.py
@@ -18,6 +18,7 @@ from app.gateway.routers.models import ModelResponse, ModelsListResponse
 from app.gateway.routers.skills import SkillInstallResponse, SkillResponse, SkillsListResponse
 from app.gateway.routers.uploads import UploadResponse
 from deerflow.client import DeerFlowClient
+from deerflow.config.app_config import AppConfig
 from deerflow.config.paths import Paths
 from deerflow.uploads.manager import PathTraversalError
 
@@ -44,9 +45,12 @@ def mock_app_config():
 
 @pytest.fixture
 def client(mock_app_config):
-    """Create a DeerFlowClient with mocked config loading."""
-    with patch("deerflow.client.get_app_config", return_value=mock_app_config):
-        return DeerFlowClient()
+    """Create a DeerFlowClient holding the mocked config directly.
+
+    Passing ``config=`` is the documented post-refactor way to inject a
+    test AppConfig; nothing relies on process-global state.
+    """
+    return DeerFlowClient(config=mock_app_config)
 
 
 # ---------------------------------------------------------------------------
@@ -67,8 +71,7 @@ class TestClientInit:
 
     def test_custom_params(self, mock_app_config):
         mock_middleware = MagicMock()
-        with patch("deerflow.client.get_app_config", return_value=mock_app_config):
-            c = DeerFlowClient(model_name="gpt-4", thinking_enabled=False, subagent_enabled=True, plan_mode=True, agent_name="test-agent", available_skills={"skill1", "skill2"}, middlewares=[mock_middleware])
+        c = DeerFlowClient(model_name="gpt-4", thinking_enabled=False, subagent_enabled=True, plan_mode=True, agent_name="test-agent", available_skills={"skill1", "skill2"}, middlewares=[mock_middleware])
         assert c._model_name == "gpt-4"
         assert c._thinking_enabled is False
         assert c._subagent_enabled is True
@@ -78,24 +81,21 @@ class TestClientInit:
         assert c._middlewares == [mock_middleware]
 
     def test_invalid_agent_name(self, mock_app_config):
-        with patch("deerflow.client.get_app_config", return_value=mock_app_config):
-            with pytest.raises(ValueError, match="Invalid agent name"):
-                DeerFlowClient(agent_name="invalid name with spaces!")
-            with pytest.raises(ValueError, match="Invalid agent name"):
-                DeerFlowClient(agent_name="../path/traversal")
+        with pytest.raises(ValueError, match="Invalid agent name"):
+            DeerFlowClient(agent_name="invalid name with spaces!")
+        with pytest.raises(ValueError, match="Invalid agent name"):
+            DeerFlowClient(agent_name="../path/traversal")
 
     def test_custom_config_path(self, mock_app_config):
-        with (
-            patch("deerflow.client.reload_app_config") as mock_reload,
-            patch("deerflow.client.get_app_config", return_value=mock_app_config),
-        ):
-            DeerFlowClient(config_path="/tmp/custom.yaml")
-            mock_reload.assert_called_once_with("/tmp/custom.yaml")
+        # rather than touching AppConfig.init() / process-global state.
+        with patch.object(AppConfig, "from_file", return_value=mock_app_config) as mock_from_file:
+            client = DeerFlowClient(config_path="/tmp/custom.yaml")
+            mock_from_file.assert_called_once_with("/tmp/custom.yaml")
+            assert client._app_config is mock_app_config
 
     def test_checkpointer_stored(self, mock_app_config):
         cp = MagicMock()
-        with patch("deerflow.client.get_app_config", return_value=mock_app_config):
-            c = DeerFlowClient(checkpointer=cp)
+        c = DeerFlowClient(checkpointer=cp)
         assert c._checkpointer is cp
 
 
@@ -126,7 +126,7 @@ class TestConfigQueries:
 
         with patch("deerflow.skills.loader.load_skills", return_value=[skill]) as mock_load:
             result = client.list_skills()
-            mock_load.assert_called_once_with(enabled_only=False)
+            mock_load.assert_called_once_with(client._app_config, enabled_only=False)
 
         assert "skills" in result
         assert len(result["skills"]) == 1
@@ -141,7 +141,7 @@ class TestConfigQueries:
     def test_list_skills_enabled_only(self, client):
         with patch("deerflow.skills.loader.load_skills", return_value=[]) as mock_load:
             client.list_skills(enabled_only=True)
-            mock_load.assert_called_once_with(enabled_only=True)
+            mock_load.assert_called_once_with(client._app_config, enabled_only=True)
 
     def test_get_memory(self, client):
         memory = {"version": "1.0", "facts": []}
@@ -251,8 +251,8 @@ class TestStream:
         # Verify context passed to agent.stream
         agent.stream.assert_called_once()
         call_kwargs = agent.stream.call_args.kwargs
-        assert call_kwargs["context"]["thread_id"] == "t1"
-        assert call_kwargs["context"]["agent_name"] == "test-agent-1"
+        ctx = call_kwargs["context"]
+        assert ctx.app_config is client._app_config
 
     def test_custom_mode_is_normalized_to_string(self, client):
         """stream() forwards custom events even when the mode is not a plain string."""
@@ -819,7 +819,7 @@ class TestEnsureAgent:
             patch("deerflow.client._build_middlewares", return_value=[]) as mock_build_middlewares,
             patch("deerflow.client.apply_prompt_template", return_value="prompt") as mock_apply_prompt,
             patch.object(client, "_get_tools", return_value=[]),
-            patch("deerflow.agents.checkpointer.get_checkpointer", return_value=MagicMock()),
+            patch("deerflow.runtime.checkpointer.get_checkpointer", return_value=MagicMock()),
         ):
             client._agent_name = "custom-agent"
             client._available_skills = {"test_skill"}
@@ -844,7 +844,7 @@ class TestEnsureAgent:
             patch("deerflow.client._build_middlewares", return_value=[]),
             patch("deerflow.client.apply_prompt_template", return_value="prompt"),
             patch.object(client, "_get_tools", return_value=[]),
-            patch("deerflow.agents.checkpointer.get_checkpointer", return_value=mock_checkpointer),
+            patch("deerflow.runtime.checkpointer.get_checkpointer", return_value=mock_checkpointer),
         ):
             client._ensure_agent(config)
 
@@ -869,7 +869,7 @@ class TestEnsureAgent:
             patch("deerflow.client._build_middlewares", side_effect=fake_build_middlewares),
             patch("deerflow.client.apply_prompt_template", return_value="prompt"),
             patch.object(client, "_get_tools", return_value=[]),
-            patch("deerflow.agents.checkpointer.get_checkpointer", return_value=MagicMock()),
+            patch("deerflow.runtime.checkpointer.get_checkpointer", return_value=MagicMock()),
         ):
             client._ensure_agent(config)
 
@@ -888,7 +888,7 @@ class TestEnsureAgent:
             patch("deerflow.client._build_middlewares", return_value=[]),
             patch("deerflow.client.apply_prompt_template", return_value="prompt"),
             patch.object(client, "_get_tools", return_value=[]),
-            patch("deerflow.agents.checkpointer.get_checkpointer", return_value=None),
+            patch("deerflow.runtime.checkpointer.get_checkpointer", return_value=None),
         ):
             client._ensure_agent(config)
 
@@ -1017,7 +1017,7 @@ class TestThreadQueries:
         mock_checkpointer = MagicMock()
         mock_checkpointer.list.return_value = []
 
-        with patch("deerflow.agents.checkpointer.provider.get_checkpointer", return_value=mock_checkpointer):
+        with patch("deerflow.runtime.checkpointer.provider.get_checkpointer", return_value=mock_checkpointer):
             # No internal checkpointer, should fetch from provider
             result = client.list_threads()
 
@@ -1071,7 +1071,7 @@ class TestThreadQueries:
         mock_checkpointer = MagicMock()
         mock_checkpointer.list.return_value = []
 
-        with patch("deerflow.agents.checkpointer.provider.get_checkpointer", return_value=mock_checkpointer):
+        with patch("deerflow.runtime.checkpointer.provider.get_checkpointer", return_value=mock_checkpointer):
             result = client.get_thread("t99")
 
         assert result["thread_id"] == "t99"
@@ -1091,8 +1091,8 @@ class TestMcpConfig:
         ext_config = MagicMock()
         ext_config.mcp_servers = {"github": server}
 
-        with patch("deerflow.client.get_extensions_config", return_value=ext_config):
-            result = client.get_mcp_config()
+        client._app_config = MagicMock(extensions=ext_config)
+        result = client.get_mcp_config()
 
         assert "mcp_servers" in result
         assert "github" in result["mcp_servers"]
@@ -1116,10 +1116,11 @@ class TestMcpConfig:
             # Pre-set agent to verify it gets invalidated
             client._agent = MagicMock()
 
+            client._app_config = MagicMock(extensions=current_config)
+
             with (
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=tmp_path),
-                patch("deerflow.client.get_extensions_config", return_value=current_config),
-                patch("deerflow.client.reload_extensions_config", return_value=reloaded_config),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock(extensions=reloaded_config)),
             ):
                 result = client.update_mcp_config({"new-server": {"enabled": True, "type": "sse"}})
 
@@ -1177,12 +1178,12 @@ class TestSkillsManagement:
         try:
             # Pre-set agent to verify it gets invalidated
             client._agent = MagicMock()
+            client._app_config = MagicMock(extensions=ext_config)
 
             with (
                 patch("deerflow.skills.loader.load_skills", side_effect=[[skill], [updated_skill]]),
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=tmp_path),
-                patch("deerflow.client.get_extensions_config", return_value=ext_config),
-                patch("deerflow.client.reload_extensions_config"),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock()),
             ):
                 result = client.update_skill("test-skill", enabled=False)
             assert result["enabled"] is False
@@ -1243,7 +1244,10 @@ class TestMemoryManagement:
         with patch("deerflow.agents.memory.updater.import_memory_data", return_value=imported) as mock_import:
             result = client.import_memory(imported)
 
-        mock_import.assert_called_once_with(imported)
+        assert mock_import.call_count == 1
+        call_args = mock_import.call_args
+        assert call_args.args == (client._app_config.memory, imported)
+        assert "user_id" in call_args.kwargs
         assert result == imported
 
     def test_reload_memory(self, client):
@@ -1267,6 +1271,7 @@ class TestMemoryManagement:
                 confidence=0.88,
             )
             create_fact.assert_called_once_with(
+                client._app_config.memory,
                 content="User prefers concise code reviews.",
                 category="preference",
                 confidence=0.88,
@@ -1277,7 +1282,7 @@ class TestMemoryManagement:
         data = {"version": "1.0", "facts": []}
         with patch("deerflow.agents.memory.updater.delete_memory_fact", return_value=data) as delete_fact:
             result = client.delete_memory_fact("fact_123")
-            delete_fact.assert_called_once_with("fact_123")
+            delete_fact.assert_called_once_with(client._app_config.memory, "fact_123")
         assert result == data
 
     def test_update_memory_fact(self, client):
@@ -1290,6 +1295,7 @@ class TestMemoryManagement:
                 confidence=0.91,
             )
             update_fact.assert_called_once_with(
+                client._app_config.memory,
                 fact_id="fact_123",
                 content="User prefers spaces",
                 category="workflow",
@@ -1305,6 +1311,7 @@ class TestMemoryManagement:
                 "User prefers spaces",
             )
             update_fact.assert_called_once_with(
+                client._app_config.memory,
                 fact_id="fact_123",
                 content="User prefers spaces",
                 category=None,
@@ -1313,37 +1320,40 @@ class TestMemoryManagement:
         assert result == data
 
     def test_get_memory_config(self, client):
-        config = MagicMock()
-        config.enabled = True
-        config.storage_path = ".deer-flow/memory.json"
-        config.debounce_seconds = 30
-        config.max_facts = 100
-        config.fact_confidence_threshold = 0.7
-        config.injection_enabled = True
-        config.max_injection_tokens = 2000
+        mem_config = MagicMock()
+        mem_config.enabled = True
+        mem_config.storage_path = ".deer-flow/memory.json"
+        mem_config.debounce_seconds = 30
+        mem_config.max_facts = 100
+        mem_config.fact_confidence_threshold = 0.7
+        mem_config.injection_enabled = True
+        mem_config.max_injection_tokens = 2000
 
-        with patch("deerflow.config.memory_config.get_memory_config", return_value=config):
-            result = client.get_memory_config()
+        app_cfg = MagicMock()
+        app_cfg.memory = mem_config
+
+        client._app_config = app_cfg
+        result = client.get_memory_config()
 
         assert result["enabled"] is True
         assert result["max_facts"] == 100
 
     def test_get_memory_status(self, client):
-        config = MagicMock()
-        config.enabled = True
-        config.storage_path = ".deer-flow/memory.json"
-        config.debounce_seconds = 30
-        config.max_facts = 100
-        config.fact_confidence_threshold = 0.7
-        config.injection_enabled = True
-        config.max_injection_tokens = 2000
+        mem_config = MagicMock()
+        mem_config.enabled = True
+        mem_config.storage_path = ".deer-flow/memory.json"
+        mem_config.debounce_seconds = 30
+        mem_config.max_facts = 100
+        mem_config.fact_confidence_threshold = 0.7
+        mem_config.injection_enabled = True
+        mem_config.max_injection_tokens = 2000
 
+        app_cfg = MagicMock()
+        app_cfg.memory = mem_config
         data = {"version": "1.0", "facts": []}
 
-        with (
-            patch("deerflow.config.memory_config.get_memory_config", return_value=config),
-            patch("deerflow.agents.memory.updater.get_memory_data", return_value=data),
-        ):
+        client._app_config = app_cfg
+        with patch("deerflow.agents.memory.updater.get_memory_data", return_value=data):
             result = client.get_memory_status()
 
         assert "config" in result
@@ -1489,9 +1499,12 @@ class TestUploads:
 
 class TestArtifacts:
     def test_get_artifact(self, client):
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             paths = Paths(base_dir=tmp)
-            outputs = paths.sandbox_outputs_dir("t1")
+            user_id = get_effective_user_id()
+            outputs = paths.sandbox_outputs_dir("t1", user_id=user_id)
             outputs.mkdir(parents=True)
             (outputs / "result.txt").write_text("artifact content")
 
@@ -1502,9 +1515,12 @@ class TestArtifacts:
             assert "text" in mime
 
     def test_get_artifact_not_found(self, client):
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             paths = Paths(base_dir=tmp)
-            paths.sandbox_user_data_dir("t1").mkdir(parents=True)
+            user_id = get_effective_user_id()
+            paths.sandbox_outputs_dir("t1", user_id=user_id).mkdir(parents=True)
 
             with patch("deerflow.client.get_paths", return_value=paths):
                 with pytest.raises(FileNotFoundError):
@@ -1515,9 +1531,12 @@ class TestArtifacts:
             client.get_artifact("t1", "bad/path/file.txt")
 
     def test_get_artifact_path_traversal(self, client):
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             paths = Paths(base_dir=tmp)
-            paths.sandbox_user_data_dir("t1").mkdir(parents=True)
+            user_id = get_effective_user_id()
+            paths.sandbox_outputs_dir("t1", user_id=user_id).mkdir(parents=True)
 
             with patch("deerflow.client.get_paths", return_value=paths):
                 with pytest.raises(PathTraversalError):
@@ -1701,13 +1720,16 @@ class TestScenarioFileLifecycle:
 
     def test_upload_then_read_artifact(self, client):
         """Upload a file, simulate agent producing artifact, read it back."""
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             tmp_path = Path(tmp)
             uploads_dir = tmp_path / "uploads"
             uploads_dir.mkdir()
 
             paths = Paths(base_dir=tmp_path)
-            outputs_dir = paths.sandbox_outputs_dir("t-artifact")
+            user_id = get_effective_user_id()
+            outputs_dir = paths.sandbox_outputs_dir("t-artifact", user_id=user_id)
             outputs_dir.mkdir(parents=True)
 
             # Upload phase
@@ -1785,10 +1807,10 @@ class TestScenarioConfigManagement:
             reloaded_config.mcp_servers = {"my-mcp": reloaded_server}
 
             client._agent = MagicMock()  # Simulate existing agent
+            client._app_config = MagicMock(extensions=current_config)
             with (
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file),
-                patch("deerflow.client.get_extensions_config", return_value=current_config),
-                patch("deerflow.client.reload_extensions_config", return_value=reloaded_config),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock(extensions=reloaded_config)),
             ):
                 mcp_result = client.update_mcp_config({"my-mcp": {"enabled": True}})
             assert "my-mcp" in mcp_result["mcp_servers"]
@@ -1817,8 +1839,7 @@ class TestScenarioConfigManagement:
             with (
                 patch("deerflow.skills.loader.load_skills", side_effect=[[skill], [toggled]]),
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file),
-                patch("deerflow.client.get_extensions_config", return_value=ext_config),
-                patch("deerflow.client.reload_extensions_config"),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock()),
             ):
                 skill_result = client.update_skill("code-gen", enabled=False)
             assert skill_result["enabled"] is False
@@ -1846,7 +1867,7 @@ class TestScenarioAgentRecreation:
             patch("deerflow.client._build_middlewares", return_value=[]),
             patch("deerflow.client.apply_prompt_template", return_value="prompt"),
             patch.object(client, "_get_tools", return_value=[]),
-            patch("deerflow.agents.checkpointer.get_checkpointer", return_value=MagicMock()),
+            patch("deerflow.runtime.checkpointer.get_checkpointer", return_value=MagicMock()),
         ):
             client._ensure_agent(config_a)
             first_agent = client._agent
@@ -1874,7 +1895,7 @@ class TestScenarioAgentRecreation:
             patch("deerflow.client._build_middlewares", return_value=[]),
             patch("deerflow.client.apply_prompt_template", return_value="prompt"),
             patch.object(client, "_get_tools", return_value=[]),
-            patch("deerflow.agents.checkpointer.get_checkpointer", return_value=MagicMock()),
+            patch("deerflow.runtime.checkpointer.get_checkpointer", return_value=MagicMock()),
         ):
             client._ensure_agent(config)
             client._ensure_agent(config)
@@ -1899,7 +1920,7 @@ class TestScenarioAgentRecreation:
             patch("deerflow.client._build_middlewares", return_value=[]),
             patch("deerflow.client.apply_prompt_template", return_value="prompt"),
             patch.object(client, "_get_tools", return_value=[]),
-            patch("deerflow.agents.checkpointer.get_checkpointer", return_value=MagicMock()),
+            patch("deerflow.runtime.checkpointer.get_checkpointer", return_value=MagicMock()),
         ):
             client._ensure_agent(config)
             client.reset_agent()
@@ -1957,11 +1978,14 @@ class TestScenarioThreadIsolation:
 
     def test_artifacts_isolated_per_thread(self, client):
         """Artifacts in thread-A are not accessible from thread-B."""
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             paths = Paths(base_dir=tmp)
-            outputs_a = paths.sandbox_outputs_dir("thread-a")
+            user_id = get_effective_user_id()
+            outputs_a = paths.sandbox_outputs_dir("thread-a", user_id=user_id)
             outputs_a.mkdir(parents=True)
-            paths.sandbox_user_data_dir("thread-b").mkdir(parents=True)
+            paths.sandbox_outputs_dir("thread-b", user_id=user_id).mkdir(parents=True)
             (outputs_a / "result.txt").write_text("thread-a artifact")
 
             with patch("deerflow.client.get_paths", return_value=paths):
@@ -2003,10 +2027,10 @@ class TestScenarioMemoryWorkflow:
             refreshed = client.reload_memory()
         assert len(refreshed["facts"]) == 2
 
-        with (
-            patch("deerflow.config.memory_config.get_memory_config", return_value=config),
-            patch("deerflow.agents.memory.updater.get_memory_data", return_value=updated_data),
-        ):
+        app_cfg = MagicMock()
+        app_cfg.memory = config
+        client._app_config = app_cfg
+        with patch("deerflow.agents.memory.updater.get_memory_data", return_value=updated_data):
             status = client.get_memory_status()
         assert status["config"]["enabled"] is True
         assert len(status["data"]["facts"]) == 2
@@ -2067,8 +2091,7 @@ class TestScenarioSkillInstallAndUse:
             with (
                 patch("deerflow.skills.loader.load_skills", side_effect=[[installed_skill], [disabled_skill]]),
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file),
-                patch("deerflow.client.get_extensions_config", return_value=ext_config),
-                patch("deerflow.client.reload_extensions_config"),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock()),
             ):
                 toggled = client.update_skill("my-analyzer", enabled=False)
             assert toggled["enabled"] is False
@@ -2202,8 +2225,7 @@ class TestGatewayConformance:
         mock_app_config.models = [model]
         mock_app_config.token_usage.enabled = True
 
-        with patch("deerflow.client.get_app_config", return_value=mock_app_config):
-            client = DeerFlowClient()
+        client = DeerFlowClient(config=mock_app_config)
 
         result = client.list_models()
         parsed = ModelsListResponse(**result)
@@ -2222,8 +2244,7 @@ class TestGatewayConformance:
         mock_app_config.models = [model]
         mock_app_config.get_model_config.return_value = model
 
-        with patch("deerflow.client.get_app_config", return_value=mock_app_config):
-            client = DeerFlowClient()
+        client = DeerFlowClient(config=mock_app_config)
 
         result = client.get_model("test-model")
         assert result is not None
@@ -2292,8 +2313,8 @@ class TestGatewayConformance:
         ext_config = MagicMock()
         ext_config.mcp_servers = {"test": server}
 
-        with patch("deerflow.client.get_extensions_config", return_value=ext_config):
-            result = client.get_mcp_config()
+        client._app_config = MagicMock(extensions=ext_config)
+        result = client.get_mcp_config()
 
         parsed = McpConfigResponse(**result)
         assert "test" in parsed.mcp_servers
@@ -2317,10 +2338,10 @@ class TestGatewayConformance:
         config_file = tmp_path / "extensions_config.json"
         config_file.write_text("{}")
 
+        client._app_config = MagicMock(extensions=ext_config)
         with (
-            patch("deerflow.client.get_extensions_config", return_value=ext_config),
             patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file),
-            patch("deerflow.client.reload_extensions_config", return_value=ext_config),
+            patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock(extensions=ext_config)),
         ):
             result = client.update_mcp_config({"srv": server.model_dump.return_value})
 
@@ -2351,8 +2372,11 @@ class TestGatewayConformance:
         mem_cfg.injection_enabled = True
         mem_cfg.max_injection_tokens = 2000
 
-        with patch("deerflow.config.memory_config.get_memory_config", return_value=mem_cfg):
-            result = client.get_memory_config()
+        app_cfg = MagicMock()
+        app_cfg.memory = mem_cfg
+
+        client._app_config = app_cfg
+        result = client.get_memory_config()
 
         parsed = MemoryConfigResponse(**result)
         assert parsed.enabled is True
@@ -2368,6 +2392,8 @@ class TestGatewayConformance:
         mem_cfg.injection_enabled = True
         mem_cfg.max_injection_tokens = 2000
 
+        app_cfg = MagicMock()
+        app_cfg.memory = mem_cfg
         memory_data = {
             "version": "1.0",
             "lastUpdated": "",
@@ -2384,10 +2410,8 @@ class TestGatewayConformance:
             "facts": [],
         }
 
-        with (
-            patch("deerflow.config.memory_config.get_memory_config", return_value=mem_cfg),
-            patch("deerflow.agents.memory.updater.get_memory_data", return_value=memory_data),
-        ):
+        client._app_config = app_cfg
+        with patch("deerflow.agents.memory.updater.get_memory_data", return_value=memory_data):
             result = client.get_memory_status()
 
         parsed = MemoryStatusResponse(**result)
@@ -2676,8 +2700,7 @@ class TestConfigUpdateErrors:
             with (
                 patch("deerflow.skills.loader.load_skills", side_effect=[[skill], []]),
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file),
-                patch("deerflow.client.get_extensions_config", return_value=ext_config),
-                patch("deerflow.client.reload_extensions_config"),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock()),
             ):
                 with pytest.raises(RuntimeError, match="disappeared"):
                     client.update_skill("ghost-skill", enabled=False)
@@ -2869,9 +2892,12 @@ class TestUploadDeleteSymlink:
 class TestArtifactHardening:
     def test_artifact_directory_rejected(self, client):
         """get_artifact rejects paths that resolve to a directory."""
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             paths = Paths(base_dir=tmp)
-            subdir = paths.sandbox_outputs_dir("t1") / "subdir"
+            user_id = get_effective_user_id()
+            subdir = paths.sandbox_outputs_dir("t1", user_id=user_id) / "subdir"
             subdir.mkdir(parents=True)
 
             with patch("deerflow.client.get_paths", return_value=paths):
@@ -2880,9 +2906,12 @@ class TestArtifactHardening:
 
     def test_artifact_leading_slash_stripped(self, client):
         """Paths with leading slash are handled correctly."""
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             paths = Paths(base_dir=tmp)
-            outputs = paths.sandbox_outputs_dir("t1")
+            user_id = get_effective_user_id()
+            outputs = paths.sandbox_outputs_dir("t1", user_id=user_id)
             outputs.mkdir(parents=True)
             (outputs / "file.txt").write_text("content")
 
@@ -2996,9 +3025,12 @@ class TestBugArtifactPrefixMatchTooLoose:
 
     def test_exact_prefix_without_subpath_accepted(self, client):
         """Bare 'mnt/user-data' is accepted (will later fail as directory, not at prefix)."""
+        from deerflow.runtime.user_context import get_effective_user_id
+
         with tempfile.TemporaryDirectory() as tmp:
             paths = Paths(base_dir=tmp)
-            paths.sandbox_user_data_dir("t1").mkdir(parents=True)
+            user_id = get_effective_user_id()
+            paths.sandbox_outputs_dir("t1", user_id=user_id).mkdir(parents=True)
 
             with patch("deerflow.client.get_paths", return_value=paths):
                 # Accepted at prefix check, but fails because it's a directory.
@@ -3047,10 +3079,10 @@ class TestBugAgentInvalidationInconsistency:
             config_file = Path(tmp) / "ext.json"
             config_file.write_text("{}")
 
+            client._app_config = MagicMock(extensions=current_config)
             with (
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file),
-                patch("deerflow.client.get_extensions_config", return_value=current_config),
-                patch("deerflow.client.reload_extensions_config", return_value=reloaded),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock(extensions=reloaded)),
             ):
                 client.update_mcp_config({})
 
@@ -3082,8 +3114,7 @@ class TestBugAgentInvalidationInconsistency:
             with (
                 patch("deerflow.skills.loader.load_skills", side_effect=[[skill], [updated]]),
                 patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file),
-                patch("deerflow.client.get_extensions_config", return_value=ext_config),
-                patch("deerflow.client.reload_extensions_config"),
+                patch("deerflow.config.app_config.AppConfig.from_file", return_value=MagicMock()),
             ):
                 client.update_skill("s1", enabled=False)
 
diff --git a/backend/tests/test_client_e2e.py b/backend/tests/test_client_e2e.py
index b26e5bff1..3b66c571d 100644
--- a/backend/tests/test_client_e2e.py
+++ b/backend/tests/test_client_e2e.py
@@ -56,6 +56,10 @@ def _make_e2e_config() -> AppConfig:
     - ``E2E_BASE_URL``    (default: ``https://ark-cn-beijing.bytedance.net/api/v3``)
     - ``OPENAI_API_KEY``  (required for LLM tests)
     """
+    from deerflow.config.memory_config import MemoryConfig
+    from deerflow.config.summarization_config import SummarizationConfig
+    from deerflow.config.title_config import TitleConfig
+
     return AppConfig(
         models=[
             ModelConfig(
@@ -73,6 +77,9 @@ def _make_e2e_config() -> AppConfig:
             )
         ],
         sandbox=SandboxConfig(use="deerflow.sandbox.local:LocalSandboxProvider", allow_host_bash=True),
+        title=TitleConfig(enabled=False),
+        memory=MemoryConfig(enabled=False),
+        summarization=SummarizationConfig(enabled=False),
     )
 
 
@@ -87,7 +94,7 @@ def e2e_env(tmp_path, monkeypatch):
 
     - DEER_FLOW_HOME → tmp_path (all thread data lands in a temp dir)
     - Singletons reset so they pick up the new env
-    - Title/memory/summarization disabled to avoid extra LLM calls
+    - Title/memory/summarization disabled via AppConfig fields
     - AppConfig built programmatically (avoids config.yaml param-name issues)
     """
     # 1. Filesystem isolation
@@ -95,30 +102,12 @@ def e2e_env(tmp_path, monkeypatch):
     monkeypatch.setattr("deerflow.config.paths._paths", None)
     monkeypatch.setattr("deerflow.sandbox.sandbox_provider._default_sandbox_provider", None)
 
-    # 2. Inject a clean AppConfig via the global singleton.
-    config = _make_e2e_config()
-    monkeypatch.setattr("deerflow.config.app_config._app_config", config)
-    monkeypatch.setattr("deerflow.config.app_config._app_config_is_custom", True)
+    # 1b. Override the autouse ``AppConfig.from_file`` stub from conftest
+    #     (minimal test config) with the e2e-specific config that carries a
+    #     real model entry and disables title/memory/summarization.
+    monkeypatch.setattr(AppConfig, "from_file", staticmethod(lambda config_path=None: _make_e2e_config()))
 
-    # 3. Disable title generation (extra LLM call, non-deterministic)
-    from deerflow.config.title_config import TitleConfig
-
-    monkeypatch.setattr("deerflow.config.title_config._title_config", TitleConfig(enabled=False))
-
-    # 4. Disable memory queueing (avoids background threads & file writes)
-    from deerflow.config.memory_config import MemoryConfig
-
-    monkeypatch.setattr(
-        "deerflow.agents.middlewares.memory_middleware.get_memory_config",
-        lambda: MemoryConfig(enabled=False),
-    )
-
-    # 5. Ensure summarization is off (default, but be explicit)
-    from deerflow.config.summarization_config import SummarizationConfig
-
-    monkeypatch.setattr("deerflow.config.summarization_config._summarization_config", SummarizationConfig(enabled=False))
-
-    # 6. Exclude TitleMiddleware from the chain.
+    # 2. Exclude TitleMiddleware from the chain.
     #    It triggers an extra LLM call to generate a thread title, which adds
     #    non-determinism and cost to E2E tests (title generation is already
     #    disabled via TitleConfig above, but the middleware still participates
@@ -262,8 +251,9 @@ class TestFileUploadIntegration:
 
         # Physically exists
         from deerflow.config.paths import get_paths
+        from deerflow.runtime.user_context import get_effective_user_id
 
-        assert (get_paths().sandbox_uploads_dir(tid) / "readme.txt").exists()
+        assert (get_paths().sandbox_uploads_dir(tid, user_id=get_effective_user_id()) / "readme.txt").exists()
 
     def test_upload_duplicate_rename(self, e2e_env, tmp_path):
         """Uploading two files with the same name auto-renames the second."""
@@ -472,12 +462,13 @@ class TestArtifactAccess:
     def test_get_artifact_happy_path(self, e2e_env):
         """Write a file to outputs, then read it back via get_artifact()."""
         from deerflow.config.paths import get_paths
+        from deerflow.runtime.user_context import get_effective_user_id
 
         c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
         tid = str(uuid.uuid4())
 
         # Create an output file in the thread's outputs directory
-        outputs_dir = get_paths().sandbox_outputs_dir(tid)
+        outputs_dir = get_paths().sandbox_outputs_dir(tid, user_id=get_effective_user_id())
         outputs_dir.mkdir(parents=True, exist_ok=True)
         (outputs_dir / "result.txt").write_text("hello artifact")
 
@@ -488,11 +479,12 @@ class TestArtifactAccess:
     def test_get_artifact_nested_path(self, e2e_env):
         """Artifacts in subdirectories are accessible."""
         from deerflow.config.paths import get_paths
+        from deerflow.runtime.user_context import get_effective_user_id
 
         c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
         tid = str(uuid.uuid4())
 
-        outputs_dir = get_paths().sandbox_outputs_dir(tid)
+        outputs_dir = get_paths().sandbox_outputs_dir(tid, user_id=get_effective_user_id())
         sub = outputs_dir / "charts"
         sub.mkdir(parents=True, exist_ok=True)
         (sub / "data.json").write_text('{"x": 1}')
@@ -663,10 +655,9 @@ class TestConfigManagement:
         config_file.write_text(json.dumps({"mcpServers": {}, "skills": {}}))
         monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(config_file))
 
-        # Force reload so the singleton picks up our test file
-        from deerflow.config.extensions_config import reload_extensions_config
-
-        reload_extensions_config()
+        # Mock from_file so update_mcp_config's internal reload works without config.yaml
+        e2e_config = _make_e2e_config()
+        monkeypatch.setattr(AppConfig, "from_file", classmethod(lambda cls, path=None: e2e_config))
 
         c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
         # Simulate a cached agent
@@ -690,9 +681,9 @@ class TestConfigManagement:
         config_file.write_text(json.dumps({"mcpServers": {}, "skills": {}}))
         monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(config_file))
 
-        from deerflow.config.extensions_config import reload_extensions_config
-
-        reload_extensions_config()
+        # Mock from_file so update_skill's internal reload works without config.yaml
+        e2e_config = _make_e2e_config()
+        monkeypatch.setattr(AppConfig, "from_file", classmethod(lambda cls, path=None: e2e_config))
 
         c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
         c._agent = "fake-agent-placeholder"
@@ -718,10 +709,6 @@ class TestConfigManagement:
         config_file.write_text(json.dumps({"mcpServers": {}, "skills": {}}))
         monkeypatch.setenv("DEER_FLOW_EXTENSIONS_CONFIG_PATH", str(config_file))
 
-        from deerflow.config.extensions_config import reload_extensions_config
-
-        reload_extensions_config()
-
         c = DeerFlowClient(checkpointer=None, thinking_enabled=False)
         with pytest.raises(ValueError, match="not found"):
             c.update_skill("nonexistent-skill-xyz", enabled=True)
diff --git a/backend/tests/test_client_live.py b/backend/tests/test_client_live.py
index 0271ebf21..342673d72 100644
--- a/backend/tests/test_client_live.py
+++ b/backend/tests/test_client_live.py
@@ -101,7 +101,7 @@ class TestLiveStreaming:
 class TestLiveToolUse:
     def test_agent_uses_bash_tool(self, client):
         """Agent uses bash tool when asked to run a command."""
-        if not is_host_bash_allowed():
+        if not is_host_bash_allowed(client._app_config):
             pytest.skip("Host bash is disabled for LocalSandboxProvider in the active config")
 
         events = list(client.stream("Use the bash tool to run: echo 'LIVE_TEST_OK'. Then tell me the output."))
diff --git a/backend/tests/test_client_multi_isolation.py b/backend/tests/test_client_multi_isolation.py
new file mode 100644
index 000000000..0271483af
--- /dev/null
+++ b/backend/tests/test_client_multi_isolation.py
@@ -0,0 +1,82 @@
+"""Multi-client isolation regression test.
+
+Phase 2 Task P2-3: ``DeerFlowClient`` now captures its ``AppConfig`` in the
+constructor instead of going through a process-global config.
+This test pins the resulting invariant: two clients with different configs
+can coexist without contending over shared state.
+
+Before P2-3, the shared ``AppConfig._global`` caused the second client's
+``init()`` to clobber the first client's config.
+"""
+
+from __future__ import annotations
+
+from unittest.mock import MagicMock
+
+import pytest
+
+from deerflow.client import DeerFlowClient
+from deerflow.config.app_config import AppConfig
+from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig
+
+
+@pytest.fixture
+def disable_agent_creation(monkeypatch):
+    """Prevent lazy agent creation — we only care about config access."""
+    monkeypatch.setattr(DeerFlowClient, "_get_or_create_agent", MagicMock(), raising=False)
+
+
+def test_two_clients_do_not_clobber_each_other(disable_agent_creation):
+    """Two clients with distinct configs keep their own AppConfig."""
+    cfg_a = AppConfig(
+        sandbox=SandboxConfig(use="test"),
+        memory=MemoryConfig(enabled=True),
+    )
+    cfg_b = AppConfig(
+        sandbox=SandboxConfig(use="test"),
+        memory=MemoryConfig(enabled=False),
+    )
+
+    client_a = DeerFlowClient(config=cfg_a)
+    client_b = DeerFlowClient(config=cfg_b)
+
+    # Identity: each client retains its own instance, not a shared ref
+    assert client_a._app_config is cfg_a
+    assert client_b._app_config is cfg_b
+
+    # Semantic: memory flag differs
+    assert client_a._app_config.memory.enabled is True
+    assert client_b._app_config.memory.enabled is False
+
+
+def test_client_config_precedes_path(disable_agent_creation, tmp_path):
+    """When both config= and config_path= are given, config= wins."""
+    cfg = AppConfig(sandbox=SandboxConfig(use="test"), log_level="debug")
+
+    # config_path points at a file that doesn't exist — proves it's unused
+    bogus_path = str(tmp_path / "nope.yaml")
+    client = DeerFlowClient(config_path=bogus_path, config=cfg)
+
+    assert client._app_config is cfg
+    assert client._app_config.log_level == "debug"
+
+
+def test_multi_client_gateway_dict_returns_distinct(disable_agent_creation):
+    """get_mcp_config() reads from self._app_config, not process-global."""
+    from deerflow.config.extensions_config import ExtensionsConfig, McpServerConfig
+
+    ext_a = ExtensionsConfig(mcp_servers={"server-a": McpServerConfig(enabled=True)})
+    ext_b = ExtensionsConfig(mcp_servers={"server-b": McpServerConfig(enabled=True)})
+
+    cfg_a = AppConfig(sandbox=SandboxConfig(use="test"), extensions=ext_a)
+    cfg_b = AppConfig(sandbox=SandboxConfig(use="test"), extensions=ext_b)
+
+    client_a = DeerFlowClient(config=cfg_a)
+    client_b = DeerFlowClient(config=cfg_b)
+
+    servers_a = client_a.get_mcp_config()["mcp_servers"]
+    servers_b = client_b.get_mcp_config()["mcp_servers"]
+
+    assert set(servers_a.keys()) == {"server-a"}
+    assert set(servers_b.keys()) == {"server-b"}
diff --git a/backend/tests/test_config_frozen.py b/backend/tests/test_config_frozen.py
new file mode 100644
index 000000000..5c89bdcfd
--- /dev/null
+++ b/backend/tests/test_config_frozen.py
@@ -0,0 +1,95 @@
+"""Verify that all sub-config Pydantic models are frozen (immutable).
+
+Frozen models reject attribute assignment after construction, raising
+pydantic.ValidationError. This test collects every BaseModel subclass
+defined in the deerflow.config package and asserts that mutation is
+blocked.
+"""
+
+import inspect
+import pkgutil
+
+import pytest
+from pydantic import BaseModel, ValidationError
+
+import deerflow.config as config_pkg
+
+
+def _collect_config_models() -> list[type[BaseModel]]:
+    """Walk deerflow.config.* and return all concrete BaseModel subclasses."""
+    import importlib
+
+    models: list[type[BaseModel]] = []
+    package_path = config_pkg.__path__
+    package_prefix = config_pkg.__name__ + "."
+
+    for _importer, modname, _ispkg in pkgutil.walk_packages(package_path, prefix=package_prefix):
+        try:
+            mod = importlib.import_module(modname)
+        except Exception:
+            continue
+        for _name, obj in inspect.getmembers(mod, inspect.isclass):
+            if (
+                issubclass(obj, BaseModel)
+                and obj is not BaseModel
+                and obj.__module__ == mod.__name__
+            ):
+                models.append(obj)
+
+    return models
+
+
+_EXCLUDED: set[str] = set()
+
+_ALL_MODELS = [m for m in _collect_config_models() if m.__name__ not in _EXCLUDED]
+
+# Sanity: make sure we actually collected a meaningful set.
+assert len(_ALL_MODELS) >= 15, f"Expected at least 15 config models, found {len(_ALL_MODELS)}: {[m.__name__ for m in _ALL_MODELS]}"
+
+
+@pytest.mark.parametrize("model_cls", _ALL_MODELS, ids=lambda cls: cls.__name__)
+def test_config_model_is_frozen(model_cls: type[BaseModel]):
+    """Every sub-config model must have frozen=True in its model_config."""
+    cfg = model_cls.model_config
+    assert cfg.get("frozen") is True, (
+        f"{model_cls.__name__} is not frozen. "
+        f"Add `model_config = ConfigDict(frozen=True)` or add `frozen=True` to the existing ConfigDict."
+    )
+
+
+@pytest.mark.parametrize("model_cls", _ALL_MODELS, ids=lambda cls: cls.__name__)
+def test_config_model_rejects_mutation(model_cls: type[BaseModel]):
+    """Constructing then mutating any field must raise ValidationError."""
+    # Build a minimal instance -- use model_construct to skip validation for
+    # required fields, then pick the first field to try mutating.
+    fields = list(model_cls.model_fields.keys())
+    if not fields:
+        pytest.skip(f"{model_cls.__name__} has no fields")
+
+    instance = model_cls.model_construct()
+    first_field = fields[0]
+
+    with pytest.raises(ValidationError):
+        setattr(instance, first_field, "MUTATED")
+
+
+def test_extensions_nested_dict_mutation_is_not_blocked_by_pydantic():
+    """Regression guard: Pydantic `frozen=True` does NOT deep-freeze container fields.
+
+    This test documents the trap — callers MUST compose a new dict and persist
+    it + reload AppConfig instead of reaching into `extensions.skills[x]`.
+    If you need the dict to be truly immutable, wrap with Mapping/frozendict.
+    """
+    from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig
+
+    ext = ExtensionsConfig(mcp_servers={}, skills={"a": SkillStateConfig(enabled=True)})
+
+    # This is the pre-refactor anti-pattern: Pydantic lets it through because
+    # the outer model is frozen but the inner dict is a plain builtin. No error.
+    ext.skills["a"] = SkillStateConfig(enabled=False)
+    ext.skills["b"] = SkillStateConfig(enabled=True)
+
+    # The test asserts the leak exists so a future "add deep-freeze" change
+    # flips this expectation and forces call-site review.
+    assert ext.skills["a"].enabled is False
+    assert "b" in ext.skills
diff --git a/backend/tests/test_converters.py b/backend/tests/test_converters.py
new file mode 100644
index 000000000..2c2167e01
--- /dev/null
+++ b/backend/tests/test_converters.py
@@ -0,0 +1,188 @@
+"""Tests for LangChain-to-OpenAI message format converters."""
+
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock
+
+from deerflow.runtime.converters import (
+    langchain_messages_to_openai,
+    langchain_to_openai_completion,
+    langchain_to_openai_message,
+)
+
+
+def _make_ai_message(content="", tool_calls=None, id="msg-123", usage_metadata=None, response_metadata=None):
+    msg = MagicMock()
+    msg.type = "ai"
+    msg.content = content
+    msg.tool_calls = tool_calls or []
+    msg.id = id
+    msg.usage_metadata = usage_metadata
+    msg.response_metadata = response_metadata or {}
+    return msg
+
+
+def _make_human_message(content="Hello"):
+    msg = MagicMock()
+    msg.type = "human"
+    msg.content = content
+    return msg
+
+
+def _make_system_message(content="You are an assistant."):
+    msg = MagicMock()
+    msg.type = "system"
+    msg.content = content
+    return msg
+
+
+def _make_tool_message(content="result", tool_call_id="call-abc"):
+    msg = MagicMock()
+    msg.type = "tool"
+    msg.content = content
+    msg.tool_call_id = tool_call_id
+    return msg
+
+
+class TestLangchainToOpenaiMessage:
+    def test_ai_message_text_only(self):
+        msg = _make_ai_message(content="Hello world")
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "assistant"
+        assert result["content"] == "Hello world"
+        assert "tool_calls" not in result
+
+    def test_ai_message_with_tool_calls(self):
+        tool_calls = [
+            {"id": "call-1", "name": "bash", "args": {"command": "ls"}},
+        ]
+        msg = _make_ai_message(content="", tool_calls=tool_calls)
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "assistant"
+        assert result["content"] is None
+        assert len(result["tool_calls"]) == 1
+        tc = result["tool_calls"][0]
+        assert tc["id"] == "call-1"
+        assert tc["type"] == "function"
+        assert tc["function"]["name"] == "bash"
+        # arguments must be a JSON string
+        args = json.loads(tc["function"]["arguments"])
+        assert args == {"command": "ls"}
+
+    def test_ai_message_text_and_tool_calls(self):
+        tool_calls = [
+            {"id": "call-2", "name": "read_file", "args": {"path": "/tmp/x"}},
+        ]
+        msg = _make_ai_message(content="Reading the file", tool_calls=tool_calls)
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "assistant"
+        assert result["content"] == "Reading the file"
+        assert len(result["tool_calls"]) == 1
+
+    def test_ai_message_empty_content_no_tools(self):
+        msg = _make_ai_message(content="")
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "assistant"
+        assert result["content"] == ""
+        assert "tool_calls" not in result
+
+    def test_ai_message_list_content(self):
+        # Multimodal content is preserved as-is
+        list_content = [
+            {"type": "text", "text": "Here is an image"},
+            {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
+        ]
+        msg = _make_ai_message(content=list_content)
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "assistant"
+        assert result["content"] == list_content
+
+    def test_human_message(self):
+        msg = _make_human_message("Tell me a joke")
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "user"
+        assert result["content"] == "Tell me a joke"
+
+    def test_tool_message(self):
+        msg = _make_tool_message(content="file contents here", tool_call_id="call-xyz")
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "tool"
+        assert result["tool_call_id"] == "call-xyz"
+        assert result["content"] == "file contents here"
+
+    def test_system_message(self):
+        msg = _make_system_message("You are a helpful assistant.")
+        result = langchain_to_openai_message(msg)
+        assert result["role"] == "system"
+        assert result["content"] == "You are a helpful assistant."
+
+
+class TestLangchainToOpenaiCompletion:
+    def test_basic_completion(self):
+        usage_metadata = {"input_tokens": 10, "output_tokens": 20}
+        msg = _make_ai_message(
+            content="Hello",
+            id="msg-abc",
+            usage_metadata=usage_metadata,
+            response_metadata={"model_name": "gpt-4o", "finish_reason": "stop"},
+        )
+        result = langchain_to_openai_completion(msg)
+        assert result["id"] == "msg-abc"
+        assert result["model"] == "gpt-4o"
+        assert len(result["choices"]) == 1
+        choice = result["choices"][0]
+        assert choice["index"] == 0
+        assert choice["finish_reason"] == "stop"
+        assert choice["message"]["role"] == "assistant"
+        assert choice["message"]["content"] == "Hello"
+        assert result["usage"] is not None
+        assert result["usage"]["prompt_tokens"] == 10
+        assert result["usage"]["completion_tokens"] == 20
+        assert result["usage"]["total_tokens"] == 30
+
+    def test_completion_with_tool_calls(self):
+        tool_calls = [{"id": "call-1", "name": "bash", "args": {}}]
+        msg = _make_ai_message(
+            content="",
+            tool_calls=tool_calls,
+            id="msg-tc",
+            response_metadata={"model_name": "gpt-4o"},
+        )
+        result = langchain_to_openai_completion(msg)
+        assert result["choices"][0]["finish_reason"] == "tool_calls"
+
+    def test_completion_no_usage(self):
+        msg = _make_ai_message(content="Hi", id="msg-nousage", usage_metadata=None)
+        result = langchain_to_openai_completion(msg)
+        assert result["usage"] is None
+
+    def test_finish_reason_from_response_metadata(self):
+        msg = _make_ai_message(
+            content="Done",
+            id="msg-fr",
+            response_metadata={"model_name": "claude-3", "finish_reason": "end_turn"},
+        )
+        result = langchain_to_openai_completion(msg)
+        assert result["choices"][0]["finish_reason"] == "end_turn"
+
+    def test_finish_reason_default_stop(self):
+        msg = _make_ai_message(content="Done", id="msg-defstop", response_metadata={})
+        result = langchain_to_openai_completion(msg)
+        assert result["choices"][0]["finish_reason"] == "stop"
+
+
+class TestMessagesToOpenai:
+    def test_convert_message_list(self):
+        human = _make_human_message("Hi")
+        ai = _make_ai_message(content="Hello!")
+        tool_msg = _make_tool_message("result", "call-1")
+        messages = [human, ai, tool_msg]
+        result = langchain_messages_to_openai(messages)
+        assert len(result) == 3
+        assert result[0]["role"] == "user"
+        assert result[1]["role"] == "assistant"
+        assert result[2]["role"] == "tool"
+
+    def test_empty_list(self):
+        assert langchain_messages_to_openai([]) == []
diff --git a/backend/tests/test_custom_agent.py b/backend/tests/test_custom_agent.py
index 2117e05d2..22ba0c1d0 100644
--- a/backend/tests/test_custom_agent.py
+++ b/backend/tests/test_custom_agent.py
@@ -9,7 +9,9 @@ import pytest
 import yaml
 from fastapi.testclient import TestClient
 
-from deerflow.config.agents_api_config import AgentsApiConfig, get_agents_api_config, set_agents_api_config
+from deerflow.config.memory_config import MemoryConfig
+
+_TEST_MEMORY_CONFIG = MemoryConfig()
 
 # ---------------------------------------------------------------------------
 # Helpers
@@ -329,38 +331,26 @@ class TestMemoryFilePath:
     def test_global_memory_path(self, tmp_path):
         """None agent_name should return global memory file."""
         from deerflow.agents.memory.storage import FileMemoryStorage
-        from deerflow.config.memory_config import MemoryConfig
 
-        with (
-            patch("deerflow.agents.memory.storage.get_paths", return_value=_make_paths(tmp_path)),
-            patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_path="")),
-        ):
-            storage = FileMemoryStorage()
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=_make_paths(tmp_path)):
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
             path = storage._get_memory_file_path(None)
         assert path == tmp_path / "memory.json"
 
     def test_agent_memory_path(self, tmp_path):
         """Providing agent_name should return per-agent memory file."""
         from deerflow.agents.memory.storage import FileMemoryStorage
-        from deerflow.config.memory_config import MemoryConfig
 
-        with (
-            patch("deerflow.agents.memory.storage.get_paths", return_value=_make_paths(tmp_path)),
-            patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_path="")),
-        ):
-            storage = FileMemoryStorage()
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=_make_paths(tmp_path)):
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
             path = storage._get_memory_file_path("code-reviewer")
         assert path == tmp_path / "agents" / "code-reviewer" / "memory.json"
 
     def test_different_paths_for_different_agents(self, tmp_path):
         from deerflow.agents.memory.storage import FileMemoryStorage
-        from deerflow.config.memory_config import MemoryConfig
 
-        with (
-            patch("deerflow.agents.memory.storage.get_paths", return_value=_make_paths(tmp_path)),
-            patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_path="")),
-        ):
-            storage = FileMemoryStorage()
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=_make_paths(tmp_path)):
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
             path_global = storage._get_memory_file_path(None)
             path_a = storage._get_memory_file_path("agent-a")
             path_b = storage._get_memory_file_path("agent-b")
@@ -389,38 +379,13 @@ def _make_test_app(tmp_path: Path):
 @pytest.fixture()
 def agent_client(tmp_path):
     """TestClient with agents router, using tmp_path as base_dir."""
-    import app.gateway.routers.agents as agents_router
-
     paths_instance = _make_paths(tmp_path)
-    previous_config = AgentsApiConfig(**get_agents_api_config().model_dump())
 
-    with patch("deerflow.config.agents_config.get_paths", return_value=paths_instance), patch.object(agents_router, "get_paths", return_value=paths_instance):
-        set_agents_api_config(AgentsApiConfig(enabled=True))
-        try:
-            app = _make_test_app(tmp_path)
-            with TestClient(app) as client:
-                client._tmp_path = tmp_path  # type: ignore[attr-defined]
-                yield client
-        finally:
-            set_agents_api_config(previous_config)
-
-
-@pytest.fixture()
-def disabled_agent_client(tmp_path):
-    """TestClient with agents router while the management API is disabled."""
-    import app.gateway.routers.agents as agents_router
-
-    paths_instance = _make_paths(tmp_path)
-    previous_config = AgentsApiConfig(**get_agents_api_config().model_dump())
-
-    with patch("deerflow.config.agents_config.get_paths", return_value=paths_instance), patch.object(agents_router, "get_paths", return_value=paths_instance):
-        set_agents_api_config(AgentsApiConfig(enabled=False))
-        try:
-            app = _make_test_app(tmp_path)
-            with TestClient(app) as client:
-                yield client
-        finally:
-            set_agents_api_config(previous_config)
+    with patch("deerflow.config.agents_config.get_paths", return_value=paths_instance), patch("app.gateway.routers.agents.get_paths", return_value=paths_instance):
+        app = _make_test_app(tmp_path)
+        with TestClient(app) as client:
+            client._tmp_path = tmp_path  # type: ignore[attr-defined]
+            yield client
 
 
 class TestAgentsAPI:
@@ -586,37 +551,3 @@ class TestUserProfileAPI:
         response = agent_client.put("/api/user-profile", json={"content": ""})
         assert response.status_code == 200
         assert response.json()["content"] is None
-
-
-class TestAgentsApiDisabled:
-    def test_agents_list_returns_403(self, disabled_agent_client):
-        response = disabled_agent_client.get("/api/agents")
-        assert response.status_code == 403
-        assert "agents_api.enabled=true" in response.json()["detail"]
-
-    def test_agent_get_returns_403(self, disabled_agent_client):
-        response = disabled_agent_client.get("/api/agents/example-agent")
-        assert response.status_code == 403
-
-    def test_agent_name_check_returns_403(self, disabled_agent_client):
-        response = disabled_agent_client.get("/api/agents/check", params={"name": "example-agent"})
-        assert response.status_code == 403
-
-    def test_agent_create_returns_403(self, disabled_agent_client):
-        response = disabled_agent_client.post("/api/agents", json={"name": "example-agent", "soul": "blocked"})
-        assert response.status_code == 403
-
-    def test_agent_update_returns_403(self, disabled_agent_client):
-        response = disabled_agent_client.put("/api/agents/example-agent", json={"description": "blocked"})
-        assert response.status_code == 403
-
-    def test_agent_delete_returns_403(self, disabled_agent_client):
-        response = disabled_agent_client.delete("/api/agents/example-agent")
-        assert response.status_code == 403
-
-    def test_user_profile_routes_return_403(self, disabled_agent_client):
-        get_response = disabled_agent_client.get("/api/user-profile")
-        put_response = disabled_agent_client.put("/api/user-profile", json={"content": "blocked"})
-
-        assert get_response.status_code == 403
-        assert put_response.status_code == 403
diff --git a/backend/tests/test_deer_flow_context.py b/backend/tests/test_deer_flow_context.py
new file mode 100644
index 000000000..bf1005bd0
--- /dev/null
+++ b/backend/tests/test_deer_flow_context.py
@@ -0,0 +1,62 @@
+"""Tests for DeerFlowContext and resolve_context()."""
+
+from dataclasses import FrozenInstanceError
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext, resolve_context
+from deerflow.config.sandbox_config import SandboxConfig
+
+
+def _make_config(**overrides) -> AppConfig:
+    defaults = {"sandbox": SandboxConfig(use="test")}
+    defaults.update(overrides)
+    return AppConfig(**defaults)
+
+
+class TestDeerFlowContext:
+    def test_frozen(self):
+        ctx = DeerFlowContext(app_config=_make_config(), thread_id="t1")
+        with pytest.raises(FrozenInstanceError):
+            ctx.app_config = _make_config()
+
+    def test_fields(self):
+        config = _make_config()
+        ctx = DeerFlowContext(app_config=config, thread_id="t1", agent_name="test-agent")
+        assert ctx.thread_id == "t1"
+        assert ctx.agent_name == "test-agent"
+        assert ctx.app_config is config
+
+    def test_agent_name_default(self):
+        ctx = DeerFlowContext(app_config=_make_config(), thread_id="t1")
+        assert ctx.agent_name is None
+
+    def test_thread_id_required(self):
+        with pytest.raises(TypeError):
+            DeerFlowContext(app_config=_make_config())  # type: ignore[call-arg]
+
+
+class TestResolveContext:
+    def test_returns_typed_context_directly(self):
+        """Gateway/Client path: runtime.context is DeerFlowContext → return as-is."""
+        config = _make_config()
+        ctx = DeerFlowContext(app_config=config, thread_id="t1")
+        runtime = MagicMock()
+        runtime.context = ctx
+        assert resolve_context(runtime) is ctx
+
+    def test_raises_on_none_context(self):
+        """Without a typed DeerFlowContext, resolve_context refuses to guess."""
+        runtime = MagicMock()
+        runtime.context = None
+        with pytest.raises(RuntimeError, match="resolve_context: runtime.context is not a DeerFlowContext"):
+            resolve_context(runtime)
+
+    def test_raises_on_dict_context(self):
+        """Legacy dict shape is no longer supported — we raise instead of lazily loading AppConfig."""
+        runtime = MagicMock()
+        runtime.context = {"thread_id": "old-dict", "agent_name": "from-dict"}
+        with pytest.raises(RuntimeError, match="resolve_context: runtime.context is not a DeerFlowContext"):
+            resolve_context(runtime)
diff --git a/backend/tests/test_ensure_admin.py b/backend/tests/test_ensure_admin.py
new file mode 100644
index 000000000..9930b047f
--- /dev/null
+++ b/backend/tests/test_ensure_admin.py
@@ -0,0 +1,296 @@
+"""Tests for _ensure_admin_user() in app.py.
+
+Covers: first-boot no-op (admin creation removed), orphan migration
+when admin exists, no-op on no admin found, and edge cases.
+"""
+
+import asyncio
+import os
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+os.environ.setdefault("AUTH_JWT_SECRET", "test-secret-key-ensure-admin-testing-min-32")
+
+from app.gateway.auth.config import AuthConfig, set_auth_config
+
+_JWT_SECRET = "test-secret-key-ensure-admin-testing-min-32"
+
+
+@pytest.fixture(autouse=True)
+def _setup_auth_config():
+    set_auth_config(AuthConfig(jwt_secret=_JWT_SECRET))
+    yield
+    set_auth_config(AuthConfig(jwt_secret=_JWT_SECRET))
+
+
+def _make_app_stub(store=None):
+    """Minimal app-like object with state.store."""
+    app = SimpleNamespace()
+    app.state = SimpleNamespace()
+    app.state.store = store
+    return app
+
+
+def _make_provider(admin_count=0):
+    p = AsyncMock()
+    p.count_users = AsyncMock(return_value=admin_count)
+    p.count_admin_users = AsyncMock(return_value=admin_count)
+    p.create_user = AsyncMock()
+    p.update_user = AsyncMock(side_effect=lambda u: u)
+    return p
+
+
+def _make_session_factory(admin_row=None):
+    """Build a mock async session factory that returns a row from execute()."""
+    row_result = MagicMock()
+    row_result.scalar_one_or_none.return_value = admin_row
+
+    execute_result = MagicMock()
+    execute_result.scalar_one_or_none.return_value = admin_row
+
+    session = AsyncMock()
+    session.execute = AsyncMock(return_value=execute_result)
+
+    # Async context manager
+    session_cm = AsyncMock()
+    session_cm.__aenter__ = AsyncMock(return_value=session)
+    session_cm.__aexit__ = AsyncMock(return_value=False)
+
+    sf = MagicMock()
+    sf.return_value = session_cm
+    return sf
+
+
+# ── First boot: no admin → return early ──────────────────────────────────
+
+
+def test_first_boot_does_not_create_admin():
+    """admin_count==0 → do NOT create admin automatically."""
+    provider = _make_provider(admin_count=0)
+    app = _make_app_stub()
+
+    with patch("app.gateway.deps.get_local_provider", return_value=provider):
+        from app.gateway.app import _ensure_admin_user
+
+        asyncio.run(_ensure_admin_user(app))
+
+    provider.create_user.assert_not_called()
+
+
+def test_first_boot_skips_migration():
+    """No admin → return early before any migration attempt."""
+    provider = _make_provider(admin_count=0)
+    store = AsyncMock()
+    store.asearch = AsyncMock(return_value=[])
+    app = _make_app_stub(store=store)
+
+    with patch("app.gateway.deps.get_local_provider", return_value=provider):
+        from app.gateway.app import _ensure_admin_user
+
+        asyncio.run(_ensure_admin_user(app))
+
+    store.asearch.assert_not_called()
+
+
+# ── Admin exists: migration runs when admin row found ────────────────────
+
+
+def test_admin_exists_triggers_migration():
+    """Admin exists and admin row found → _migrate_orphaned_threads called."""
+    from uuid import uuid4
+
+    admin_row = MagicMock()
+    admin_row.id = uuid4()
+
+    provider = _make_provider(admin_count=1)
+    sf = _make_session_factory(admin_row=admin_row)
+    store = AsyncMock()
+    store.asearch = AsyncMock(return_value=[])
+    app = _make_app_stub(store=store)
+
+    with patch("app.gateway.deps.get_local_provider", return_value=provider):
+        with patch("deerflow.persistence.engine.get_session_factory", return_value=sf):
+            from app.gateway.app import _ensure_admin_user
+
+            asyncio.run(_ensure_admin_user(app))
+
+    store.asearch.assert_called_once()
+
+
+def test_admin_exists_no_admin_row_skips_migration():
+    """Admin count > 0 but DB row missing (edge case) → skip migration gracefully."""
+    provider = _make_provider(admin_count=2)
+    sf = _make_session_factory(admin_row=None)
+    store = AsyncMock()
+    app = _make_app_stub(store=store)
+
+    with patch("app.gateway.deps.get_local_provider", return_value=provider):
+        with patch("deerflow.persistence.engine.get_session_factory", return_value=sf):
+            from app.gateway.app import _ensure_admin_user
+
+            asyncio.run(_ensure_admin_user(app))
+
+    store.asearch.assert_not_called()
+
+
+def test_admin_exists_no_store_skips_migration():
+    """Admin exists, row found, but no store → no crash, no migration."""
+    from uuid import uuid4
+
+    admin_row = MagicMock()
+    admin_row.id = uuid4()
+
+    provider = _make_provider(admin_count=1)
+    sf = _make_session_factory(admin_row=admin_row)
+    app = _make_app_stub(store=None)
+
+    with patch("app.gateway.deps.get_local_provider", return_value=provider):
+        with patch("deerflow.persistence.engine.get_session_factory", return_value=sf):
+            from app.gateway.app import _ensure_admin_user
+
+            asyncio.run(_ensure_admin_user(app))
+
+    # No assertion needed — just verify no crash
+
+
+def test_admin_exists_session_factory_none_skips_migration():
+    """get_session_factory() returns None → return early, no crash."""
+    provider = _make_provider(admin_count=1)
+    store = AsyncMock()
+    app = _make_app_stub(store=store)
+
+    with patch("app.gateway.deps.get_local_provider", return_value=provider):
+        with patch("deerflow.persistence.engine.get_session_factory", return_value=None):
+            from app.gateway.app import _ensure_admin_user
+
+            asyncio.run(_ensure_admin_user(app))
+
+    store.asearch.assert_not_called()
+
+
+def test_migration_failure_is_non_fatal():
+    """_migrate_orphaned_threads exception is caught and logged."""
+    from uuid import uuid4
+
+    admin_row = MagicMock()
+    admin_row.id = uuid4()
+
+    provider = _make_provider(admin_count=1)
+    sf = _make_session_factory(admin_row=admin_row)
+    store = AsyncMock()
+    store.asearch = AsyncMock(side_effect=RuntimeError("store crashed"))
+    app = _make_app_stub(store=store)
+
+    with patch("app.gateway.deps.get_local_provider", return_value=provider):
+        with patch("deerflow.persistence.engine.get_session_factory", return_value=sf):
+            from app.gateway.app import _ensure_admin_user
+
+            # Should not raise
+            asyncio.run(_ensure_admin_user(app))
+
+
+# ── Section 5.1-5.6 upgrade path: orphan thread migration ────────────────
+
+
+def test_migrate_orphaned_threads_stamps_user_id_on_unowned_rows():
+    """First boot finds Store-only legacy threads → stamps admin's id.
+
+    Validates the **TC-UPG-02 upgrade story**: an operator running main
+    (no auth) accumulates threads in the LangGraph Store namespace
+    ``("threads",)`` with no ``metadata.user_id``. After upgrading to
+    feat/auth-on-2.0-rc, the first ``_ensure_admin_user`` boot should
+    rewrite each unowned item with the freshly created admin's id.
+    """
+    from app.gateway.app import _migrate_orphaned_threads
+
+    # Three orphan items + one already-owned item that should be left alone.
+    items = [
+        SimpleNamespace(key="t1", value={"metadata": {"title": "old-thread-1"}}),
+        SimpleNamespace(key="t2", value={"metadata": {"title": "old-thread-2"}}),
+        SimpleNamespace(key="t3", value={"metadata": {}}),
+        SimpleNamespace(key="t4", value={"metadata": {"user_id": "someone-else", "title": "preserved"}}),
+    ]
+    store = AsyncMock()
+    # asearch returns the entire batch on first call, then an empty page
+    # to terminate _iter_store_items.
+    store.asearch = AsyncMock(side_effect=[items, []])
+    aput_calls: list[tuple[tuple, str, dict]] = []
+
+    async def _record_aput(namespace, key, value):
+        aput_calls.append((namespace, key, value))
+
+    store.aput = AsyncMock(side_effect=_record_aput)
+
+    migrated = asyncio.run(_migrate_orphaned_threads(store, "admin-id-42"))
+
+    # Three orphan rows migrated, one preserved.
+    assert migrated == 3
+    assert len(aput_calls) == 3
+    rewritten_keys = {call[1] for call in aput_calls}
+    assert rewritten_keys == {"t1", "t2", "t3"}
+    # Each rewrite carries the new user_id; titles preserved where present.
+    by_key = {call[1]: call[2] for call in aput_calls}
+    assert by_key["t1"]["metadata"]["user_id"] == "admin-id-42"
+    assert by_key["t1"]["metadata"]["title"] == "old-thread-1"
+    assert by_key["t3"]["metadata"]["user_id"] == "admin-id-42"
+    # The pre-owned item must NOT have been rewritten.
+    assert "t4" not in rewritten_keys
+
+
+def test_migrate_orphaned_threads_empty_store_is_noop():
+    """A store with no threads → migrated == 0, no aput calls."""
+    from app.gateway.app import _migrate_orphaned_threads
+
+    store = AsyncMock()
+    store.asearch = AsyncMock(return_value=[])
+    store.aput = AsyncMock()
+
+    migrated = asyncio.run(_migrate_orphaned_threads(store, "admin-id-42"))
+
+    assert migrated == 0
+    store.aput.assert_not_called()
+
+
+def test_iter_store_items_walks_multiple_pages():
+    """Cursor-style iterator pulls every page until a short page terminates.
+
+    Closes the regression where the old hardcoded ``limit=1000`` could
+    silently drop orphans on a large pre-upgrade dataset. The migration
+    code path uses the default ``page_size=500``; this test pins the
+    iterator with ``page_size=2`` so it stays fast.
+    """
+    from app.gateway.app import _iter_store_items
+
+    page_a = [SimpleNamespace(key=f"t{i}", value={"metadata": {}}) for i in range(2)]
+    page_b = [SimpleNamespace(key=f"t{i + 2}", value={"metadata": {}}) for i in range(2)]
+    page_c: list = []  # short page → loop terminates
+
+    store = AsyncMock()
+    store.asearch = AsyncMock(side_effect=[page_a, page_b, page_c])
+
+    async def _collect():
+        return [item.key async for item in _iter_store_items(store, ("threads",), page_size=2)]
+
+    keys = asyncio.run(_collect())
+    assert keys == ["t0", "t1", "t2", "t3"]
+    # Three asearch calls: full batch, full batch, empty terminator
+    assert store.asearch.await_count == 3
+
+
+def test_iter_store_items_terminates_on_short_page():
+    """A short page (len < page_size) ends the loop without an extra call."""
+    from app.gateway.app import _iter_store_items
+
+    page = [SimpleNamespace(key=f"t{i}", value={}) for i in range(3)]
+    store = AsyncMock()
+    store.asearch = AsyncMock(return_value=page)
+
+    async def _collect():
+        return [item.key async for item in _iter_store_items(store, ("threads",), page_size=10)]
+
+    keys = asyncio.run(_collect())
+    assert keys == ["t0", "t1", "t2"]
+    # Only one call — no terminator probe needed because len(batch) < page_size
+    assert store.asearch.await_count == 1
diff --git a/backend/tests/test_exa_tools.py b/backend/tests/test_exa_tools.py
index b7196918e..3953e21fc 100644
--- a/backend/tests/test_exa_tools.py
+++ b/backend/tests/test_exa_tools.py
@@ -5,20 +5,36 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 
+# --- Phase 2 test helper: injected runtime for community tools ---
+from types import SimpleNamespace as _P2NS
+from deerflow.config.app_config import AppConfig as _P2AppConfig
+from deerflow.config.sandbox_config import SandboxConfig as _P2SandboxConfig
+from deerflow.config.deer_flow_context import DeerFlowContext as _P2Ctx
+_P2_APP_CONFIG = _P2AppConfig(sandbox=_P2SandboxConfig(use="test"))
+_P2_RUNTIME = _P2NS(context=_P2Ctx(app_config=_P2_APP_CONFIG, thread_id="test-thread"))
+
+
+def _runtime_with_config(config):
+    """Build a runtime carrying a custom (possibly mocked) app_config.
+
+    ``DeerFlowContext`` is a frozen dataclass typed as ``AppConfig`` but
+    dataclasses don't enforce the type at runtime — handing a Mock through
+    lets tests exercise the tool's ``get_tool_config`` lookup without going
+    through a process-global config.
+    """
+    ctx = _P2Ctx.__new__(_P2Ctx)
+    object.__setattr__(ctx, "app_config", config)
+    object.__setattr__(ctx, "thread_id", "test-thread")
+    object.__setattr__(ctx, "agent_name", None)
+    return _P2NS(context=ctx)
+# -------------------------------------------------------------------
+
+
 
 @pytest.fixture
 def mock_app_config():
-    """Mock the app config to return tool configurations."""
-    with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
-        tool_config = MagicMock()
-        tool_config.model_extra = {
-            "max_results": 5,
-            "search_type": "auto",
-            "contents_max_characters": 1000,
-            "api_key": "test-api-key",
-        }
-        mock_config.return_value.get_tool_config.return_value = tool_config
-        yield mock_config
+    """Fixture retained as a pass-through: tests inject config via runtime directly."""
+    yield
 
 
 @pytest.fixture
@@ -49,7 +65,7 @@ class TestWebSearchTool:
 
         from deerflow.community.exa.tools import web_search_tool
 
-        result = web_search_tool.invoke({"query": "test query"})
+        result = web_search_tool.func(query="test query", runtime=_P2_RUNTIME)
         parsed = json.loads(result)
 
         assert len(parsed) == 2
@@ -67,30 +83,30 @@ class TestWebSearchTool:
 
     def test_search_with_custom_config(self, mock_exa_client):
         """Test search respects custom configuration values."""
-        with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
-            tool_config = MagicMock()
-            tool_config.model_extra = {
-                "max_results": 10,
-                "search_type": "neural",
-                "contents_max_characters": 2000,
-                "api_key": "test-key",
-            }
-            mock_config.return_value.get_tool_config.return_value = tool_config
+        tool_config = MagicMock()
+        tool_config.model_extra = {
+            "max_results": 10,
+            "search_type": "neural",
+            "contents_max_characters": 2000,
+            "api_key": "test-key",
+        }
+        fake_config = MagicMock()
+        fake_config.get_tool_config.return_value = tool_config
 
-            mock_response = MagicMock()
-            mock_response.results = []
-            mock_exa_client.search.return_value = mock_response
+        mock_response = MagicMock()
+        mock_response.results = []
+        mock_exa_client.search.return_value = mock_response
 
-            from deerflow.community.exa.tools import web_search_tool
+        from deerflow.community.exa.tools import web_search_tool
 
-            web_search_tool.invoke({"query": "neural search"})
+        web_search_tool.func(query="neural search", runtime=_runtime_with_config(fake_config))
 
-            mock_exa_client.search.assert_called_once_with(
-                "neural search",
-                type="neural",
-                num_results=10,
-                contents={"highlights": {"max_characters": 2000}},
-            )
+        mock_exa_client.search.assert_called_once_with(
+            "neural search",
+            type="neural",
+            num_results=10,
+            contents={"highlights": {"max_characters": 2000}},
+        )
 
     def test_search_with_no_highlights(self, mock_app_config, mock_exa_client):
         """Test search handles results with no highlights."""
@@ -105,7 +121,7 @@ class TestWebSearchTool:
 
         from deerflow.community.exa.tools import web_search_tool
 
-        result = web_search_tool.invoke({"query": "test"})
+        result = web_search_tool.func(query="test", runtime=_P2_RUNTIME)
         parsed = json.loads(result)
 
         assert parsed[0]["snippet"] == ""
@@ -118,7 +134,7 @@ class TestWebSearchTool:
 
         from deerflow.community.exa.tools import web_search_tool
 
-        result = web_search_tool.invoke({"query": "nothing"})
+        result = web_search_tool.func(query="nothing", runtime=_P2_RUNTIME)
         parsed = json.loads(result)
 
         assert parsed == []
@@ -129,7 +145,7 @@ class TestWebSearchTool:
 
         from deerflow.community.exa.tools import web_search_tool
 
-        result = web_search_tool.invoke({"query": "error"})
+        result = web_search_tool.func(query="error", runtime=_P2_RUNTIME)
 
         assert result == "Error: API rate limit exceeded"
 
@@ -147,7 +163,7 @@ class TestWebFetchTool:
 
         from deerflow.community.exa.tools import web_fetch_tool
 
-        result = web_fetch_tool.invoke({"url": "https://example.com"})
+        result = web_fetch_tool.func(url="https://example.com", runtime=_P2_RUNTIME)
 
         assert result == "# Fetched Page\n\nThis is the page content."
         mock_exa_client.get_contents.assert_called_once_with(
@@ -167,7 +183,7 @@ class TestWebFetchTool:
 
         from deerflow.community.exa.tools import web_fetch_tool
 
-        result = web_fetch_tool.invoke({"url": "https://example.com"})
+        result = web_fetch_tool.func(url="https://example.com", runtime=_P2_RUNTIME)
 
         assert result.startswith("# Untitled\n\n")
 
@@ -179,7 +195,7 @@ class TestWebFetchTool:
 
         from deerflow.community.exa.tools import web_fetch_tool
 
-        result = web_fetch_tool.invoke({"url": "https://example.com/404"})
+        result = web_fetch_tool.func(url="https://example.com/404", runtime=_P2_RUNTIME)
 
         assert result == "Error: No results found"
 
@@ -189,16 +205,44 @@ class TestWebFetchTool:
 
         from deerflow.community.exa.tools import web_fetch_tool
 
-        result = web_fetch_tool.invoke({"url": "https://example.com"})
+        result = web_fetch_tool.func(url="https://example.com", runtime=_P2_RUNTIME)
 
         assert result == "Error: Connection timeout"
 
     def test_fetch_reads_web_fetch_config(self, mock_exa_client):
         """Test that web_fetch_tool reads 'web_fetch' config, not 'web_search'."""
-        with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
-            tool_config = MagicMock()
-            tool_config.model_extra = {"api_key": "exa-fetch-key"}
-            mock_config.return_value.get_tool_config.return_value = tool_config
+        tool_config = MagicMock()
+        tool_config.model_extra = {"api_key": "exa-fetch-key"}
+        fake_config = MagicMock()
+        fake_config.get_tool_config.return_value = tool_config
+
+        mock_result = MagicMock()
+        mock_result.title = "Page"
+        mock_result.text = "Content."
+        mock_response = MagicMock()
+        mock_response.results = [mock_result]
+        mock_exa_client.get_contents.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_fetch_tool
+
+        web_fetch_tool.func(url="https://example.com", runtime=_runtime_with_config(fake_config))
+
+        fake_config.get_tool_config.assert_any_call("web_fetch")
+
+    def test_fetch_uses_independent_api_key(self, mock_exa_client):
+        """Test mixed-provider config: web_fetch uses its own api_key, not web_search's."""
+        with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
+            mock_exa_cls.return_value = mock_exa_client
+            fetch_config = MagicMock()
+            fetch_config.model_extra = {"api_key": "exa-fetch-key"}
+
+            def get_tool_config(name):
+                if name == "web_fetch":
+                    return fetch_config
+                return None
+
+            fake_config = MagicMock()
+            fake_config.get_tool_config.side_effect = get_tool_config
 
             mock_result = MagicMock()
             mock_result.title = "Page"
@@ -209,37 +253,9 @@ class TestWebFetchTool:
 
             from deerflow.community.exa.tools import web_fetch_tool
 
-            web_fetch_tool.invoke({"url": "https://example.com"})
+            web_fetch_tool.func(url="https://example.com", runtime=_runtime_with_config(fake_config))
 
-            mock_config.return_value.get_tool_config.assert_any_call("web_fetch")
-
-    def test_fetch_uses_independent_api_key(self, mock_exa_client):
-        """Test mixed-provider config: web_fetch uses its own api_key, not web_search's."""
-        with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
-            with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
-                mock_exa_cls.return_value = mock_exa_client
-                fetch_config = MagicMock()
-                fetch_config.model_extra = {"api_key": "exa-fetch-key"}
-
-                def get_tool_config(name):
-                    if name == "web_fetch":
-                        return fetch_config
-                    return None
-
-                mock_config.return_value.get_tool_config.side_effect = get_tool_config
-
-                mock_result = MagicMock()
-                mock_result.title = "Page"
-                mock_result.text = "Content."
-                mock_response = MagicMock()
-                mock_response.results = [mock_result]
-                mock_exa_client.get_contents.return_value = mock_response
-
-                from deerflow.community.exa.tools import web_fetch_tool
-
-                web_fetch_tool.invoke({"url": "https://example.com"})
-
-                mock_exa_cls.assert_called_once_with(api_key="exa-fetch-key")
+            mock_exa_cls.assert_called_once_with(api_key="exa-fetch-key")
 
     def test_fetch_truncates_long_content(self, mock_app_config, mock_exa_client):
         """Test fetch truncates content to 4096 characters."""
@@ -253,7 +269,7 @@ class TestWebFetchTool:
 
         from deerflow.community.exa.tools import web_fetch_tool
 
-        result = web_fetch_tool.invoke({"url": "https://example.com"})
+        result = web_fetch_tool.func(url="https://example.com", runtime=_P2_RUNTIME)
 
         # "# Long Page\n\n" is 14 chars, content truncated to 4096
         content_after_header = result.split("\n\n", 1)[1]
diff --git a/backend/tests/test_feedback.py b/backend/tests/test_feedback.py
new file mode 100644
index 000000000..a592bdd22
--- /dev/null
+++ b/backend/tests/test_feedback.py
@@ -0,0 +1,289 @@
+"""Tests for FeedbackRepository and follow-up association.
+
+Uses temp SQLite DB for ORM tests.
+"""
+
+import pytest
+
+from deerflow.persistence.feedback import FeedbackRepository
+
+
+async def _make_feedback_repo(tmp_path):
+    from deerflow.persistence.engine import get_session_factory, init_engine
+
+    url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+    await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+    return FeedbackRepository(get_session_factory())
+
+
+async def _cleanup():
+    from deerflow.persistence.engine import close_engine
+
+    await close_engine()
+
+
+# -- FeedbackRepository --
+
+
+class TestFeedbackRepository:
+    @pytest.mark.anyio
+    async def test_create_positive(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        record = await repo.create(run_id="r1", thread_id="t1", rating=1)
+        assert record["feedback_id"]
+        assert record["rating"] == 1
+        assert record["run_id"] == "r1"
+        assert record["thread_id"] == "t1"
+        assert "created_at" in record
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_negative_with_comment(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        record = await repo.create(
+            run_id="r1",
+            thread_id="t1",
+            rating=-1,
+            comment="Response was inaccurate",
+        )
+        assert record["rating"] == -1
+        assert record["comment"] == "Response was inaccurate"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_with_message_id(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        record = await repo.create(run_id="r1", thread_id="t1", rating=1, message_id="msg-42")
+        assert record["message_id"] == "msg-42"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_with_owner(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        record = await repo.create(run_id="r1", thread_id="t1", rating=1, user_id="user-1")
+        assert record["user_id"] == "user-1"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_invalid_rating_zero(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        with pytest.raises(ValueError):
+            await repo.create(run_id="r1", thread_id="t1", rating=0)
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_invalid_rating_five(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        with pytest.raises(ValueError):
+            await repo.create(run_id="r1", thread_id="t1", rating=5)
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_get(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        created = await repo.create(run_id="r1", thread_id="t1", rating=1)
+        fetched = await repo.get(created["feedback_id"])
+        assert fetched is not None
+        assert fetched["feedback_id"] == created["feedback_id"]
+        assert fetched["rating"] == 1
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_get_nonexistent(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        assert await repo.get("nonexistent") is None
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_run(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        await repo.create(run_id="r1", thread_id="t1", rating=1, user_id="user-1")
+        await repo.create(run_id="r1", thread_id="t1", rating=-1, user_id="user-2")
+        await repo.create(run_id="r2", thread_id="t1", rating=1, user_id="user-1")
+        results = await repo.list_by_run("t1", "r1", user_id=None)
+        assert len(results) == 2
+        assert all(r["run_id"] == "r1" for r in results)
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_thread(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        await repo.create(run_id="r1", thread_id="t1", rating=1)
+        await repo.create(run_id="r2", thread_id="t1", rating=-1)
+        await repo.create(run_id="r3", thread_id="t2", rating=1)
+        results = await repo.list_by_thread("t1")
+        assert len(results) == 2
+        assert all(r["thread_id"] == "t1" for r in results)
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        created = await repo.create(run_id="r1", thread_id="t1", rating=1)
+        deleted = await repo.delete(created["feedback_id"])
+        assert deleted is True
+        assert await repo.get(created["feedback_id"]) is None
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete_nonexistent(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        deleted = await repo.delete("nonexistent")
+        assert deleted is False
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_aggregate_by_run(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        await repo.create(run_id="r1", thread_id="t1", rating=1, user_id="user-1")
+        await repo.create(run_id="r1", thread_id="t1", rating=1, user_id="user-2")
+        await repo.create(run_id="r1", thread_id="t1", rating=-1, user_id="user-3")
+        stats = await repo.aggregate_by_run("t1", "r1")
+        assert stats["total"] == 3
+        assert stats["positive"] == 2
+        assert stats["negative"] == 1
+        assert stats["run_id"] == "r1"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_aggregate_empty(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        stats = await repo.aggregate_by_run("t1", "r1")
+        assert stats["total"] == 0
+        assert stats["positive"] == 0
+        assert stats["negative"] == 0
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_upsert_creates_new(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        record = await repo.upsert(run_id="r1", thread_id="t1", rating=1, user_id="u1")
+        assert record["rating"] == 1
+        assert record["feedback_id"]
+        assert record["user_id"] == "u1"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_upsert_updates_existing(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        first = await repo.upsert(run_id="r1", thread_id="t1", rating=1, user_id="u1")
+        second = await repo.upsert(run_id="r1", thread_id="t1", rating=-1, user_id="u1", comment="changed my mind")
+        assert second["feedback_id"] == first["feedback_id"]
+        assert second["rating"] == -1
+        assert second["comment"] == "changed my mind"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_upsert_different_users_separate(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        r1 = await repo.upsert(run_id="r1", thread_id="t1", rating=1, user_id="u1")
+        r2 = await repo.upsert(run_id="r1", thread_id="t1", rating=-1, user_id="u2")
+        assert r1["feedback_id"] != r2["feedback_id"]
+        assert r1["rating"] == 1
+        assert r2["rating"] == -1
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_upsert_invalid_rating(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        with pytest.raises(ValueError):
+            await repo.upsert(run_id="r1", thread_id="t1", rating=0, user_id="u1")
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete_by_run(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        await repo.upsert(run_id="r1", thread_id="t1", rating=1, user_id="u1")
+        deleted = await repo.delete_by_run(thread_id="t1", run_id="r1", user_id="u1")
+        assert deleted is True
+        results = await repo.list_by_run("t1", "r1", user_id="u1")
+        assert len(results) == 0
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete_by_run_nonexistent(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        deleted = await repo.delete_by_run(thread_id="t1", run_id="r1", user_id="u1")
+        assert deleted is False
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_thread_grouped(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        await repo.upsert(run_id="r1", thread_id="t1", rating=1, user_id="u1")
+        await repo.upsert(run_id="r2", thread_id="t1", rating=-1, user_id="u1")
+        await repo.upsert(run_id="r3", thread_id="t2", rating=1, user_id="u1")
+        grouped = await repo.list_by_thread_grouped("t1", user_id="u1")
+        assert "r1" in grouped
+        assert "r2" in grouped
+        assert "r3" not in grouped
+        assert grouped["r1"]["rating"] == 1
+        assert grouped["r2"]["rating"] == -1
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_thread_grouped_empty(self, tmp_path):
+        repo = await _make_feedback_repo(tmp_path)
+        grouped = await repo.list_by_thread_grouped("t1", user_id="u1")
+        assert grouped == {}
+        await _cleanup()
+
+
+# -- Follow-up association --
+
+
+class TestFollowUpAssociation:
+    @pytest.mark.anyio
+    async def test_run_records_follow_up_via_memory_store(self):
+        """MemoryRunStore stores follow_up_to_run_id in kwargs."""
+        from deerflow.runtime.runs.store.memory import MemoryRunStore
+
+        store = MemoryRunStore()
+        await store.put("r1", thread_id="t1", status="success")
+        # MemoryRunStore doesn't have follow_up_to_run_id as a top-level param,
+        # but it can be passed via metadata
+        await store.put("r2", thread_id="t1", metadata={"follow_up_to_run_id": "r1"})
+        run = await store.get("r2")
+        assert run["metadata"]["follow_up_to_run_id"] == "r1"
+
+    @pytest.mark.anyio
+    async def test_human_message_has_follow_up_metadata(self):
+        """human_message event metadata includes follow_up_to_run_id."""
+        from deerflow.runtime.events.store.memory import MemoryRunEventStore
+
+        event_store = MemoryRunEventStore()
+        await event_store.put(
+            thread_id="t1",
+            run_id="r2",
+            event_type="human_message",
+            category="message",
+            content="Tell me more about that",
+            metadata={"follow_up_to_run_id": "r1"},
+        )
+        messages = await event_store.list_messages("t1")
+        assert messages[0]["metadata"]["follow_up_to_run_id"] == "r1"
+
+    @pytest.mark.anyio
+    async def test_follow_up_auto_detection_logic(self):
+        """Simulate the auto-detection: latest successful run becomes follow_up_to."""
+        from deerflow.runtime.runs.store.memory import MemoryRunStore
+
+        store = MemoryRunStore()
+        await store.put("r1", thread_id="t1", status="success")
+        await store.put("r2", thread_id="t1", status="error")
+
+        # Auto-detect: list_by_thread returns newest first
+        recent = await store.list_by_thread("t1", limit=1)
+        follow_up = None
+        if recent and recent[0].get("status") == "success":
+            follow_up = recent[0]["run_id"]
+        # r2 (error) is newest, so no follow_up detected
+        assert follow_up is None
+
+        # Now add a successful run
+        await store.put("r3", thread_id="t1", status="success")
+        recent = await store.list_by_thread("t1", limit=1)
+        follow_up = None
+        if recent and recent[0].get("status") == "success":
+            follow_up = recent[0]["run_id"]
+        assert follow_up == "r3"
diff --git a/backend/tests/test_firecrawl_tools.py b/backend/tests/test_firecrawl_tools.py
index fd61f817e..67b8f20ca 100644
--- a/backend/tests/test_firecrawl_tools.py
+++ b/backend/tests/test_firecrawl_tools.py
@@ -3,14 +3,31 @@
 import json
 from unittest.mock import MagicMock, patch
 
+from types import SimpleNamespace as _P2NS
+
+from deerflow.config.app_config import AppConfig as _P2AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext as _P2Ctx
+from deerflow.config.sandbox_config import SandboxConfig as _P2SandboxConfig
+
+_P2_APP_CONFIG = _P2AppConfig(sandbox=_P2SandboxConfig(use="test"))
+_P2_RUNTIME = _P2NS(context=_P2Ctx(app_config=_P2_APP_CONFIG, thread_id="test-thread"))
+
+
+def _runtime_with_config(config):
+    ctx = _P2Ctx.__new__(_P2Ctx)
+    object.__setattr__(ctx, "app_config", config)
+    object.__setattr__(ctx, "thread_id", "test-thread")
+    object.__setattr__(ctx, "agent_name", None)
+    return _P2NS(context=ctx)
+
 
 class TestWebSearchTool:
     @patch("deerflow.community.firecrawl.tools.FirecrawlApp")
-    @patch("deerflow.community.firecrawl.tools.get_app_config")
-    def test_search_uses_web_search_config(self, mock_get_app_config, mock_firecrawl_cls):
+    def test_search_uses_web_search_config(self, mock_firecrawl_cls):
         search_config = MagicMock()
         search_config.model_extra = {"api_key": "firecrawl-search-key", "max_results": 7}
-        mock_get_app_config.return_value.get_tool_config.return_value = search_config
+        fake_config = MagicMock()
+        fake_config.get_tool_config.return_value = search_config
 
         mock_result = MagicMock()
         mock_result.web = [
@@ -20,7 +37,7 @@ class TestWebSearchTool:
 
         from deerflow.community.firecrawl.tools import web_search_tool
 
-        result = web_search_tool.invoke({"query": "test query"})
+        result = web_search_tool.func(query="test query", runtime=_runtime_with_config(fake_config))
 
         assert json.loads(result) == [
             {
@@ -29,15 +46,14 @@ class TestWebSearchTool:
                 "snippet": "Snippet",
             }
         ]
-        mock_get_app_config.return_value.get_tool_config.assert_called_with("web_search")
+        fake_config.get_tool_config.assert_called_with("web_search")
         mock_firecrawl_cls.assert_called_once_with(api_key="firecrawl-search-key")
         mock_firecrawl_cls.return_value.search.assert_called_once_with("test query", limit=7)
 
 
 class TestWebFetchTool:
     @patch("deerflow.community.firecrawl.tools.FirecrawlApp")
-    @patch("deerflow.community.firecrawl.tools.get_app_config")
-    def test_fetch_uses_web_fetch_config(self, mock_get_app_config, mock_firecrawl_cls):
+    def test_fetch_uses_web_fetch_config(self, mock_firecrawl_cls):
         fetch_config = MagicMock()
         fetch_config.model_extra = {"api_key": "firecrawl-fetch-key"}
 
@@ -46,7 +62,8 @@ class TestWebFetchTool:
                 return fetch_config
             return None
 
-        mock_get_app_config.return_value.get_tool_config.side_effect = get_tool_config
+        fake_config = MagicMock()
+        fake_config.get_tool_config.side_effect = get_tool_config
 
         mock_scrape_result = MagicMock()
         mock_scrape_result.markdown = "Fetched markdown"
@@ -55,10 +72,10 @@ class TestWebFetchTool:
 
         from deerflow.community.firecrawl.tools import web_fetch_tool
 
-        result = web_fetch_tool.invoke({"url": "https://example.com"})
+        result = web_fetch_tool.func(url="https://example.com", runtime=_runtime_with_config(fake_config))
 
         assert result == "# Fetched Page\n\nFetched markdown"
-        mock_get_app_config.return_value.get_tool_config.assert_any_call("web_fetch")
+        fake_config.get_tool_config.assert_any_call("web_fetch")
         mock_firecrawl_cls.assert_called_once_with(api_key="firecrawl-fetch-key")
         mock_firecrawl_cls.return_value.scrape.assert_called_once_with(
             "https://example.com",
diff --git a/backend/tests/test_gateway_deps_config.py b/backend/tests/test_gateway_deps_config.py
new file mode 100644
index 000000000..ad309ece9
--- /dev/null
+++ b/backend/tests/test_gateway_deps_config.py
@@ -0,0 +1,55 @@
+"""Tests for the FastAPI get_config dependency.
+
+Phase 2 step 1: introduces the new explicit-config primitive that
+resolves ``AppConfig`` from ``request.app.state.config``. After migration,
+it is the sole mechanism.
+"""
+
+from __future__ import annotations
+
+from fastapi import Depends, FastAPI
+from fastapi.testclient import TestClient
+
+from app.gateway.deps import get_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.sandbox_config import SandboxConfig
+
+
+def test_get_config_returns_app_state_config():
+    """get_config returns the AppConfig stored on app.state.config."""
+    app = FastAPI()
+    cfg = AppConfig(sandbox=SandboxConfig(use="test"))
+    app.state.config = cfg
+
+    @app.get("/probe")
+    def probe(c: AppConfig = Depends(get_config)):
+        # Identity check: FastAPI must hand us the exact object from app.state
+        return {"same_identity": c is cfg, "log_level": c.log_level}
+
+    client = TestClient(app)
+    response = client.get("/probe")
+
+    assert response.status_code == 200
+    body = response.json()
+    assert body["same_identity"] is True
+    assert body["log_level"] == "info"
+
+
+def test_get_config_reads_updated_app_state():
+    """When app.state.config is swapped (config reload), get_config sees the new value."""
+    app = FastAPI()
+    original = AppConfig(sandbox=SandboxConfig(use="test"), log_level="info")
+    replacement = original.model_copy(update={"log_level": "debug"})
+
+    app.state.config = original
+
+    @app.get("/log-level")
+    def log_level(c: AppConfig = Depends(get_config)):
+        return {"level": c.log_level}
+
+    client = TestClient(app)
+    assert client.get("/log-level").json() == {"level": "info"}
+
+    # Simulate config reload (PUT /mcp/config, etc.)
+    app.state.config = replacement
+    assert client.get("/log-level").json() == {"level": "debug"}
diff --git a/backend/tests/test_guardrail_middleware.py b/backend/tests/test_guardrail_middleware.py
index 5c021ba44..640f32d2e 100644
--- a/backend/tests/test_guardrail_middleware.py
+++ b/backend/tests/test_guardrail_middleware.py
@@ -333,12 +333,14 @@ class TestGuardrailsConfig:
         assert config.provider.use == "deerflow.guardrails.builtin:AllowlistProvider"
         assert config.provider.config == {"denied_tools": ["bash"]}
 
-    def test_singleton_load_and_get(self):
-        from deerflow.config.guardrails_config import get_guardrails_config, load_guardrails_config_from_dict, reset_guardrails_config
+    def test_guardrails_config_via_app_config(self):
+        from deerflow.config.app_config import AppConfig
+        from deerflow.config.guardrails_config import GuardrailProviderConfig, GuardrailsConfig
+        from deerflow.config.sandbox_config import SandboxConfig
 
-        try:
-            load_guardrails_config_from_dict({"enabled": True, "provider": {"use": "test:Foo"}})
-            config = get_guardrails_config()
-            assert config.enabled is True
-        finally:
-            reset_guardrails_config()
+        cfg = AppConfig(
+            sandbox=SandboxConfig(use="test"),
+            guardrails=GuardrailsConfig(enabled=True, provider=GuardrailProviderConfig(use="test:Foo")),
+        )
+        config = cfg.guardrails
+        assert config.enabled is True
diff --git a/backend/tests/test_infoquest_client.py b/backend/tests/test_infoquest_client.py
index 2a4876158..daf70742d 100644
--- a/backend/tests/test_infoquest_client.py
+++ b/backend/tests/test_infoquest_client.py
@@ -6,6 +6,16 @@ from unittest.mock import MagicMock, patch
 from deerflow.community.infoquest import tools
 from deerflow.community.infoquest.infoquest_client import InfoQuestClient
 
+# --- Phase 2 test helper: injected runtime for community tools ---
+from types import SimpleNamespace as _P2NS
+from deerflow.config.app_config import AppConfig as _P2AppConfig
+from deerflow.config.sandbox_config import SandboxConfig as _P2SandboxConfig
+from deerflow.config.deer_flow_context import DeerFlowContext as _P2Ctx
+_P2_APP_CONFIG = _P2AppConfig(sandbox=_P2SandboxConfig(use="test"))
+_P2_RUNTIME = _P2NS(context=_P2Ctx(app_config=_P2_APP_CONFIG, thread_id="test-thread"))
+# -------------------------------------------------------------------
+
+
 
 class TestInfoQuestClient:
     def test_infoquest_client_initialization(self):
@@ -130,7 +140,7 @@ class TestInfoQuestClient:
         mock_client.web_search.return_value = json.dumps([])
         mock_get_client.return_value = mock_client
 
-        result = tools.web_search_tool.run("test query")
+        result = tools.web_search_tool.func(query="test query", runtime=_P2_RUNTIME)
 
         assert result == json.dumps([])
         mock_get_client.assert_called_once()
@@ -143,14 +153,13 @@ class TestInfoQuestClient:
         mock_client.fetch.return_value = "<html><body>Test content</body></html>"
         mock_get_client.return_value = mock_client
 
-        result = tools.web_fetch_tool.run("https://example.com")
+        result = tools.web_fetch_tool.func(url="https://example.com", runtime=_P2_RUNTIME)
 
         assert result == "# Untitled\n\nTest content"
         mock_get_client.assert_called_once()
         mock_client.fetch.assert_called_once_with("https://example.com")
 
-    @patch("deerflow.community.infoquest.tools.get_app_config")
-    def test_get_infoquest_client(self, mock_get_app_config):
+    def test_get_infoquest_client(self):
         """Test _get_infoquest_client function with config."""
         mock_config = MagicMock()
         # Add image_search config to the side_effect
@@ -159,9 +168,8 @@ class TestInfoQuestClient:
             MagicMock(model_extra={"fetch_time": 10, "timeout": 30, "navigation_timeout": 60}),  # web_fetch config
             MagicMock(model_extra={"image_search_time_range": 7, "image_size": "l"}),  # image_search config
         ]
-        mock_get_app_config.return_value = mock_config
 
-        client = tools._get_infoquest_client()
+        client = tools._get_infoquest_client(mock_config)
 
         assert client.search_time_range == 24
         assert client.fetch_time == 10
@@ -321,7 +329,7 @@ class TestImageSearch:
         mock_client.image_search.return_value = json.dumps([{"image_url": "https://example.com/image1.jpg"}])
         mock_get_client.return_value = mock_client
 
-        result = tools.image_search_tool.run({"query": "test query"})
+        result = tools.image_search_tool.func(query="test query", runtime=_P2_RUNTIME)
 
         # Check if result is a valid JSON string
         result_data = json.loads(result)
@@ -340,7 +348,7 @@ class TestImageSearch:
         mock_get_client.return_value = mock_client
 
         # Pass all parameters as a dictionary (extra parameters will be ignored)
-        tools.image_search_tool.run({"query": "sunset", "time_range": 30, "site": "unsplash.com", "image_size": "l"})
+        tools.image_search_tool.func(query="sunset", runtime=_P2_RUNTIME)
 
         mock_get_client.assert_called_once()
         # image_search_tool only passes query to client.image_search
diff --git a/backend/tests/test_initialize_admin.py b/backend/tests/test_initialize_admin.py
new file mode 100644
index 000000000..17bfaf0b6
--- /dev/null
+++ b/backend/tests/test_initialize_admin.py
@@ -0,0 +1,165 @@
+"""Tests for the POST /api/v1/auth/initialize endpoint.
+
+Covers: first-boot admin creation, rejection when system already
+initialized, password strength validation,
+and public accessibility (no auth cookie required).
+"""
+
+import asyncio
+import os
+
+import pytest
+from fastapi.testclient import TestClient
+
+os.environ.setdefault("AUTH_JWT_SECRET", "test-secret-key-initialize-admin-min-32")
+
+from app.gateway.auth.config import AuthConfig, set_auth_config
+
+_TEST_SECRET = "test-secret-key-initialize-admin-min-32"
+
+
+@pytest.fixture(autouse=True)
+def _setup_auth(tmp_path):
+    """Fresh SQLite engine + auth config per test."""
+    from app.gateway import deps
+    from deerflow.persistence.engine import close_engine, init_engine
+
+    set_auth_config(AuthConfig(jwt_secret=_TEST_SECRET))
+    url = f"sqlite+aiosqlite:///{tmp_path}/init_admin.db"
+    asyncio.run(init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)))
+    deps._cached_local_provider = None
+    deps._cached_repo = None
+    try:
+        yield
+    finally:
+        deps._cached_local_provider = None
+        deps._cached_repo = None
+        asyncio.run(close_engine())
+
+
+@pytest.fixture()
+def client(_setup_auth):
+    from app.gateway.app import create_app
+    from app.gateway.auth.config import AuthConfig, set_auth_config
+
+    set_auth_config(AuthConfig(jwt_secret=_TEST_SECRET))
+    app = create_app()
+    # Do NOT use TestClient as a context manager — that would trigger the
+    # full lifespan which requires config.yaml. The auth endpoints work
+    # without the lifespan (persistence engine is set up by _setup_auth).
+    yield TestClient(app)
+
+
+def _init_payload(**extra):
+    """Build a valid /initialize payload."""
+    return {
+        "email": "admin@example.com",
+        "password": "Str0ng!Pass99",
+        **extra,
+    }
+
+
+# ── Happy path ────────────────────────────────────────────────────────────
+
+
+def test_initialize_creates_admin_and_sets_cookie(client):
+    """POST /initialize when no admin exists → 201, session cookie set."""
+    resp = client.post("/api/v1/auth/initialize", json=_init_payload())
+    assert resp.status_code == 201
+    data = resp.json()
+    assert data["email"] == "admin@example.com"
+    assert data["system_role"] == "admin"
+    assert "access_token" in resp.cookies
+
+
+def test_initialize_needs_setup_false(client):
+    """Newly created admin via /initialize has needs_setup=False."""
+    client.post("/api/v1/auth/initialize", json=_init_payload())
+    me = client.get("/api/v1/auth/me")
+    assert me.status_code == 200
+    assert me.json()["needs_setup"] is False
+
+
+# ── Rejection when already initialized ───────────────────────────────────
+
+
+def test_initialize_rejected_when_admin_exists(client):
+    """Second call to /initialize after admin exists → 409 system_already_initialized."""
+    client.post("/api/v1/auth/initialize", json=_init_payload())
+    resp2 = client.post(
+        "/api/v1/auth/initialize",
+        json={**_init_payload(), "email": "other@example.com"},
+    )
+    assert resp2.status_code == 409
+    body = resp2.json()
+    assert body["detail"]["code"] == "system_already_initialized"
+
+
+def test_initialize_register_does_not_block_initialization(client):
+    """/register creating a user before /initialize doesn't block admin creation."""
+    # Register a regular user first
+    client.post("/api/v1/auth/register", json={"email": "regular@example.com", "password": "Tr0ub4dor3a"})
+    # /initialize should still succeed (checks admin_count, not total user_count)
+    resp = client.post("/api/v1/auth/initialize", json=_init_payload())
+    assert resp.status_code == 201
+    assert resp.json()["system_role"] == "admin"
+
+
+# ── Endpoint is public (no cookie required) ───────────────────────────────
+
+
+def test_initialize_accessible_without_cookie(client):
+    """No access_token cookie needed for /initialize."""
+    resp = client.post(
+        "/api/v1/auth/initialize",
+        json=_init_payload(),
+        cookies={},
+    )
+    assert resp.status_code == 201
+
+
+# ── Password validation ───────────────────────────────────────────────────
+
+
+def test_initialize_rejects_short_password(client):
+    """Password shorter than 8 chars → 422."""
+    resp = client.post(
+        "/api/v1/auth/initialize",
+        json={**_init_payload(), "password": "short"},
+    )
+    assert resp.status_code == 422
+
+
+def test_initialize_rejects_common_password(client):
+    """Common password → 422."""
+    resp = client.post(
+        "/api/v1/auth/initialize",
+        json={**_init_payload(), "password": "password123"},
+    )
+    assert resp.status_code == 422
+
+
+# ── setup-status reflects initialization ─────────────────────────────────
+
+
+def test_setup_status_before_initialization(client):
+    """setup-status returns needs_setup=True before /initialize is called."""
+    resp = client.get("/api/v1/auth/setup-status")
+    assert resp.status_code == 200
+    assert resp.json()["needs_setup"] is True
+
+
+def test_setup_status_after_initialization(client):
+    """setup-status returns needs_setup=False after /initialize succeeds."""
+    client.post("/api/v1/auth/initialize", json=_init_payload())
+    resp = client.get("/api/v1/auth/setup-status")
+    assert resp.status_code == 200
+    assert resp.json()["needs_setup"] is False
+
+
+def test_setup_status_false_when_only_regular_user_exists(client):
+    """setup-status returns needs_setup=True even when regular users exist (no admin)."""
+    client.post("/api/v1/auth/register", json={"email": "regular@example.com", "password": "Tr0ub4dor3a"})
+    resp = client.get("/api/v1/auth/setup-status")
+    assert resp.status_code == 200
+    assert resp.json()["needs_setup"] is True
diff --git a/backend/tests/test_invoke_acp_agent_tool.py b/backend/tests/test_invoke_acp_agent_tool.py
index 8063875cf..352109963 100644
--- a/backend/tests/test_invoke_acp_agent_tool.py
+++ b/backend/tests/test_invoke_acp_agent_tool.py
@@ -6,7 +6,7 @@ from types import SimpleNamespace
 import pytest
 
 from deerflow.config.acp_config import ACPAgentConfig
-from deerflow.config.extensions_config import ExtensionsConfig, McpServerConfig, set_extensions_config
+from deerflow.config.extensions_config import ExtensionsConfig, McpServerConfig
 from deerflow.tools.builtins.invoke_acp_agent_tool import (
     _build_acp_mcp_servers,
     _build_mcp_servers,
@@ -18,7 +18,6 @@ from deerflow.tools.tools import get_available_tools
 
 
 def test_build_mcp_servers_filters_disabled_and_maps_transports():
-    set_extensions_config(ExtensionsConfig(mcp_servers={"stale": McpServerConfig(enabled=True, type="stdio", command="echo")}, skills={}))
     fresh_config = ExtensionsConfig(
         mcp_servers={
             "stdio": McpServerConfig(enabled=True, type="stdio", command="npx", args=["srv"]),
@@ -40,11 +39,9 @@ def test_build_mcp_servers_filters_disabled_and_maps_transports():
         }
     finally:
         monkeypatch.undo()
-        set_extensions_config(ExtensionsConfig(mcp_servers={}, skills={}))
 
 
 def test_build_acp_mcp_servers_formats_list_payload():
-    set_extensions_config(ExtensionsConfig(mcp_servers={"stale": McpServerConfig(enabled=True, type="stdio", command="echo")}, skills={}))
     fresh_config = ExtensionsConfig(
         mcp_servers={
             "stdio": McpServerConfig(enabled=True, type="stdio", command="npx", args=["srv"], env={"FOO": "bar"}),
@@ -77,7 +74,6 @@ def test_build_acp_mcp_servers_formats_list_payload():
         ]
     finally:
         monkeypatch.undo()
-        set_extensions_config(ExtensionsConfig(mcp_servers={}, skills={}))
 
 
 def test_build_permission_response_prefers_allow_once():
@@ -152,8 +148,10 @@ def test_get_work_dir_uses_base_dir_when_no_thread_id(monkeypatch, tmp_path):
 def test_get_work_dir_uses_per_thread_path_when_thread_id_given(monkeypatch, tmp_path):
     """P1.1: _get_work_dir(thread_id) uses {base_dir}/threads/{thread_id}/acp-workspace/."""
     from deerflow.config import paths as paths_module
+    from deerflow.runtime import user_context as uc_module
 
     monkeypatch.setattr(paths_module, "get_paths", lambda: paths_module.Paths(base_dir=tmp_path))
+    monkeypatch.setattr(uc_module, "get_effective_user_id", lambda: None)
     result = _get_work_dir("thread-abc-123")
     expected = tmp_path / "threads" / "thread-abc-123" / "acp-workspace"
     assert result == str(expected)
@@ -310,8 +308,10 @@ async def test_invoke_acp_agent_uses_fixed_acp_workspace(monkeypatch, tmp_path):
 async def test_invoke_acp_agent_uses_per_thread_workspace_when_thread_id_in_config(monkeypatch, tmp_path):
     """P1.1: When thread_id is in the RunnableConfig, ACP agent uses per-thread workspace."""
     from deerflow.config import paths as paths_module
+    from deerflow.runtime import user_context as uc_module
 
     monkeypatch.setattr(paths_module, "get_paths", lambda: paths_module.Paths(base_dir=tmp_path))
+    monkeypatch.setattr(uc_module, "get_effective_user_id", lambda: None)
 
     monkeypatch.setattr(
         "deerflow.config.extensions_config.ExtensionsConfig.from_file",
@@ -665,31 +665,23 @@ async def test_invoke_acp_agent_passes_none_env_when_not_configured(monkeypatch,
 
 
 def test_get_available_tools_includes_invoke_acp_agent_when_agents_configured(monkeypatch):
-    from deerflow.config.acp_config import load_acp_config_from_dict
-
-    load_acp_config_from_dict(
-        {
-            "codex": {
-                "command": "codex-acp",
-                "args": [],
-                "description": "Codex CLI",
-            }
-        }
-    )
-
     fake_config = SimpleNamespace(
         tools=[],
         models=[],
         tool_search=SimpleNamespace(enabled=False),
+        acp_agents={
+            "codex": ACPAgentConfig(
+                command="codex-acp",
+                args=[],
+                description="Codex CLI",
+            )
+        },
         get_model_config=lambda name: None,
     )
-    monkeypatch.setattr("deerflow.tools.tools.get_app_config", lambda: fake_config)
     monkeypatch.setattr(
         "deerflow.config.extensions_config.ExtensionsConfig.from_file",
         classmethod(lambda cls: ExtensionsConfig(mcp_servers={}, skills={})),
     )
 
-    tools = get_available_tools(include_mcp=True, subagent_enabled=False)
+    tools = get_available_tools(include_mcp=True, subagent_enabled=False, app_config=fake_config)
     assert "invoke_acp_agent" in [tool.name for tool in tools]
-
-    load_acp_config_from_dict({})
diff --git a/backend/tests/test_jina_client.py b/backend/tests/test_jina_client.py
index b1856e4ae..3091974d6 100644
--- a/backend/tests/test_jina_client.py
+++ b/backend/tests/test_jina_client.py
@@ -10,6 +10,16 @@ import deerflow.community.jina_ai.jina_client as jina_client_module
 from deerflow.community.jina_ai.jina_client import JinaClient
 from deerflow.community.jina_ai.tools import web_fetch_tool
 
+# --- Phase 2 test helper: injected runtime for community tools ---
+from types import SimpleNamespace as _P2NS
+from deerflow.config.app_config import AppConfig as _P2AppConfig
+from deerflow.config.sandbox_config import SandboxConfig as _P2SandboxConfig
+from deerflow.config.deer_flow_context import DeerFlowContext as _P2Ctx
+_P2_APP_CONFIG = _P2AppConfig(sandbox=_P2SandboxConfig(use="test"))
+_P2_RUNTIME = _P2NS(context=_P2Ctx(app_config=_P2_APP_CONFIG, thread_id="test-thread"))
+# -------------------------------------------------------------------
+
+
 
 @pytest.fixture
 def jina_client():
@@ -176,9 +186,8 @@ async def test_web_fetch_tool_returns_error_on_crawl_failure(monkeypatch):
 
     mock_config = MagicMock()
     mock_config.get_tool_config.return_value = None
-    monkeypatch.setattr("deerflow.community.jina_ai.tools.get_app_config", lambda: mock_config)
     monkeypatch.setattr(JinaClient, "crawl", mock_crawl)
-    result = await web_fetch_tool.ainvoke("https://example.com")
+    result = await web_fetch_tool.coroutine(url="https://example.com", runtime=_P2_RUNTIME)
     assert result.startswith("Error:")
     assert "429" in result
 
@@ -192,9 +201,8 @@ async def test_web_fetch_tool_returns_markdown_on_success(monkeypatch):
 
     mock_config = MagicMock()
     mock_config.get_tool_config.return_value = None
-    monkeypatch.setattr("deerflow.community.jina_ai.tools.get_app_config", lambda: mock_config)
     monkeypatch.setattr(JinaClient, "crawl", mock_crawl)
-    result = await web_fetch_tool.ainvoke("https://example.com")
+    result = await web_fetch_tool.coroutine(url="https://example.com", runtime=_P2_RUNTIME)
     assert "Hello world" in result
     assert not result.startswith("Error:")
 
diff --git a/backend/tests/test_langgraph_auth.py b/backend/tests/test_langgraph_auth.py
new file mode 100644
index 000000000..52d215751
--- /dev/null
+++ b/backend/tests/test_langgraph_auth.py
@@ -0,0 +1,312 @@
+"""Tests for LangGraph Server auth handler (langgraph_auth.py).
+
+Validates that the LangGraph auth layer enforces the same rules as Gateway:
+  cookie → JWT decode → DB lookup → token_version check → owner filter
+"""
+
+import asyncio
+import os
+from datetime import timedelta
+from pathlib import Path
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+from uuid import uuid4
+
+import pytest
+
+os.environ.setdefault("AUTH_JWT_SECRET", "test-secret-key-for-langgraph-auth-testing-min-32")
+
+from langgraph_sdk import Auth
+
+from app.gateway.auth.config import AuthConfig, set_auth_config
+from app.gateway.auth.jwt import create_access_token, decode_token
+from app.gateway.auth.models import User
+from app.gateway.langgraph_auth import add_owner_filter, authenticate
+
+# ── Helpers ───────────────────────────────────────────────────────────────
+
+_JWT_SECRET = "test-secret-key-for-langgraph-auth-testing-min-32"
+
+
+@pytest.fixture(autouse=True)
+def _setup_auth_config():
+    set_auth_config(AuthConfig(jwt_secret=_JWT_SECRET))
+    yield
+    set_auth_config(AuthConfig(jwt_secret=_JWT_SECRET))
+
+
+def _req(cookies=None, method="GET", headers=None):
+    return SimpleNamespace(cookies=cookies or {}, method=method, headers=headers or {})
+
+
+def _user(user_id=None, token_version=0):
+    return User(email="test@example.com", password_hash="fakehash", system_role="user", id=user_id or uuid4(), token_version=token_version)
+
+
+def _mock_provider(user=None):
+    p = AsyncMock()
+    p.get_user = AsyncMock(return_value=user)
+    return p
+
+
+# ── @auth.authenticate ───────────────────────────────────────────────────
+
+
+def test_no_cookie_raises_401():
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req()))
+    assert exc.value.status_code == 401
+    assert "Not authenticated" in str(exc.value.detail)
+
+
+def test_invalid_jwt_raises_401():
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req({"access_token": "garbage"})))
+    assert exc.value.status_code == 401
+    assert "Token error" in str(exc.value.detail)
+
+
+def test_expired_jwt_raises_401():
+    token = create_access_token("user-1", expires_delta=timedelta(seconds=-1))
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req({"access_token": token})))
+    assert exc.value.status_code == 401
+
+
+def test_user_not_found_raises_401():
+    token = create_access_token("ghost")
+    with patch("app.gateway.langgraph_auth.get_local_provider", return_value=_mock_provider(None)):
+        with pytest.raises(Auth.exceptions.HTTPException) as exc:
+            asyncio.run(authenticate(_req({"access_token": token})))
+        assert exc.value.status_code == 401
+        assert "User not found" in str(exc.value.detail)
+
+
+def test_token_version_mismatch_raises_401():
+    user = _user(token_version=2)
+    token = create_access_token(str(user.id), token_version=1)
+    with patch("app.gateway.langgraph_auth.get_local_provider", return_value=_mock_provider(user)):
+        with pytest.raises(Auth.exceptions.HTTPException) as exc:
+            asyncio.run(authenticate(_req({"access_token": token})))
+        assert exc.value.status_code == 401
+        assert "revoked" in str(exc.value.detail).lower()
+
+
+def test_valid_token_returns_user_id():
+    user = _user(token_version=0)
+    token = create_access_token(str(user.id), token_version=0)
+    with patch("app.gateway.langgraph_auth.get_local_provider", return_value=_mock_provider(user)):
+        result = asyncio.run(authenticate(_req({"access_token": token})))
+    assert result == str(user.id)
+
+
+def test_valid_token_matching_version():
+    user = _user(token_version=5)
+    token = create_access_token(str(user.id), token_version=5)
+    with patch("app.gateway.langgraph_auth.get_local_provider", return_value=_mock_provider(user)):
+        result = asyncio.run(authenticate(_req({"access_token": token})))
+    assert result == str(user.id)
+
+
+# ── @auth.authenticate edge cases ────────────────────────────────────────
+
+
+def test_provider_exception_propagates():
+    """Provider raises → should not be swallowed silently."""
+    token = create_access_token("user-1")
+    p = AsyncMock()
+    p.get_user = AsyncMock(side_effect=RuntimeError("DB down"))
+    with patch("app.gateway.langgraph_auth.get_local_provider", return_value=p):
+        with pytest.raises(RuntimeError, match="DB down"):
+            asyncio.run(authenticate(_req({"access_token": token})))
+
+
+def test_jwt_missing_ver_defaults_to_zero():
+    """JWT without 'ver' claim → decoded as ver=0, matches user with token_version=0."""
+    import jwt as pyjwt
+
+    uid = str(uuid4())
+    raw = pyjwt.encode({"sub": uid, "exp": 9999999999, "iat": 1000000000}, _JWT_SECRET, algorithm="HS256")
+    user = _user(user_id=uid, token_version=0)
+    with patch("app.gateway.langgraph_auth.get_local_provider", return_value=_mock_provider(user)):
+        result = asyncio.run(authenticate(_req({"access_token": raw})))
+    assert result == uid
+
+
+def test_jwt_missing_ver_rejected_when_user_version_nonzero():
+    """JWT without 'ver' (defaults 0) vs user with token_version=1 → 401."""
+    import jwt as pyjwt
+
+    uid = str(uuid4())
+    raw = pyjwt.encode({"sub": uid, "exp": 9999999999, "iat": 1000000000}, _JWT_SECRET, algorithm="HS256")
+    user = _user(user_id=uid, token_version=1)
+    with patch("app.gateway.langgraph_auth.get_local_provider", return_value=_mock_provider(user)):
+        with pytest.raises(Auth.exceptions.HTTPException) as exc:
+            asyncio.run(authenticate(_req({"access_token": raw})))
+        assert exc.value.status_code == 401
+
+
+def test_wrong_secret_raises_401():
+    """Token signed with different secret → 401."""
+    import jwt as pyjwt
+
+    raw = pyjwt.encode({"sub": "user-1", "exp": 9999999999, "ver": 0}, "wrong-secret-that-is-long-enough-32chars!", algorithm="HS256")
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req({"access_token": raw})))
+    assert exc.value.status_code == 401
+
+
+# ── @auth.on (owner filter) ──────────────────────────────────────────────
+
+
+class _FakeUser:
+    """Minimal BaseUser-compatible object without langgraph_api.config dependency."""
+
+    def __init__(self, identity: str):
+        self.identity = identity
+        self.is_authenticated = True
+        self.display_name = identity
+
+
+def _make_ctx(user_id):
+    return Auth.types.AuthContext(resource="threads", action="create", user=_FakeUser(user_id), permissions=[])
+
+
+def test_filter_injects_user_id():
+    value = {}
+    asyncio.run(add_owner_filter(_make_ctx("user-a"), value))
+    assert value["metadata"]["user_id"] == "user-a"
+
+
+def test_filter_preserves_existing_metadata():
+    value = {"metadata": {"title": "hello"}}
+    asyncio.run(add_owner_filter(_make_ctx("user-a"), value))
+    assert value["metadata"]["user_id"] == "user-a"
+    assert value["metadata"]["title"] == "hello"
+
+
+def test_filter_returns_user_id_dict():
+    result = asyncio.run(add_owner_filter(_make_ctx("user-x"), {}))
+    assert result == {"user_id": "user-x"}
+
+
+def test_filter_read_write_consistency():
+    value = {}
+    filter_dict = asyncio.run(add_owner_filter(_make_ctx("user-1"), value))
+    assert value["metadata"]["user_id"] == filter_dict["user_id"]
+
+
+def test_different_users_different_filters():
+    f_a = asyncio.run(add_owner_filter(_make_ctx("a"), {}))
+    f_b = asyncio.run(add_owner_filter(_make_ctx("b"), {}))
+    assert f_a["user_id"] != f_b["user_id"]
+
+
+def test_filter_overrides_conflicting_user_id():
+    """If value already has a different user_id in metadata, it gets overwritten."""
+    value = {"metadata": {"user_id": "attacker"}}
+    asyncio.run(add_owner_filter(_make_ctx("real-owner"), value))
+    assert value["metadata"]["user_id"] == "real-owner"
+
+
+def test_filter_with_empty_metadata():
+    """Explicit empty metadata dict is fine."""
+    value = {"metadata": {}}
+    result = asyncio.run(add_owner_filter(_make_ctx("user-z"), value))
+    assert value["metadata"]["user_id"] == "user-z"
+    assert result == {"user_id": "user-z"}
+
+
+# ── Gateway parity ───────────────────────────────────────────────────────
+
+
+def test_shared_jwt_secret():
+    token = create_access_token("user-1", token_version=3)
+    payload = decode_token(token)
+    from app.gateway.auth.errors import TokenError
+
+    assert not isinstance(payload, TokenError)
+    assert payload.sub == "user-1"
+    assert payload.ver == 3
+
+
+def test_langgraph_json_has_auth_path():
+    import json
+
+    config = json.loads((Path(__file__).parent.parent / "langgraph.json").read_text())
+    assert "auth" in config
+    assert "langgraph_auth" in config["auth"]["path"]
+
+
+def test_auth_handler_has_both_layers():
+    from app.gateway.langgraph_auth import auth
+
+    assert auth._authenticate_handler is not None
+    assert len(auth._global_handlers) == 1
+
+
+# ── CSRF in LangGraph auth ──────────────────────────────────────────────
+
+
+def test_csrf_get_no_check():
+    """GET requests skip CSRF — should proceed to JWT validation."""
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req(method="GET")))
+    # Rejected by missing cookie, NOT by CSRF
+    assert exc.value.status_code == 401
+    assert "Not authenticated" in str(exc.value.detail)
+
+
+def test_csrf_post_missing_token():
+    """POST without CSRF token → 403."""
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req(method="POST", cookies={"access_token": "some-jwt"})))
+    assert exc.value.status_code == 403
+    assert "CSRF token missing" in str(exc.value.detail)
+
+
+def test_csrf_post_mismatched_token():
+    """POST with mismatched CSRF tokens → 403."""
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(
+            authenticate(
+                _req(
+                    method="POST",
+                    cookies={"access_token": "some-jwt", "csrf_token": "real-token"},
+                    headers={"x-csrf-token": "wrong-token"},
+                )
+            )
+        )
+    assert exc.value.status_code == 403
+    assert "mismatch" in str(exc.value.detail)
+
+
+def test_csrf_post_matching_token_proceeds_to_jwt():
+    """POST with matching CSRF tokens passes CSRF check, then fails on JWT."""
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(
+            authenticate(
+                _req(
+                    method="POST",
+                    cookies={"access_token": "garbage", "csrf_token": "same-token"},
+                    headers={"x-csrf-token": "same-token"},
+                )
+            )
+        )
+    # Past CSRF, rejected by JWT decode
+    assert exc.value.status_code == 401
+    assert "Token error" in str(exc.value.detail)
+
+
+def test_csrf_put_requires_token():
+    """PUT also requires CSRF."""
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req(method="PUT", cookies={"access_token": "jwt"})))
+    assert exc.value.status_code == 403
+
+
+def test_csrf_delete_requires_token():
+    """DELETE also requires CSRF."""
+    with pytest.raises(Auth.exceptions.HTTPException) as exc:
+        asyncio.run(authenticate(_req(method="DELETE", cookies={"access_token": "jwt"})))
+    assert exc.value.status_code == 403
diff --git a/backend/tests/test_lead_agent_model_resolution.py b/backend/tests/test_lead_agent_model_resolution.py
index a3bc21cfb..833f16c4c 100644
--- a/backend/tests/test_lead_agent_model_resolution.py
+++ b/backend/tests/test_lead_agent_model_resolution.py
@@ -8,7 +8,6 @@ import pytest
 
 from deerflow.agents.lead_agent import agent as lead_agent_module
 from deerflow.config.app_config import AppConfig
-from deerflow.config.memory_config import MemoryConfig
 from deerflow.config.model_config import ModelConfig
 from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.config.summarization_config import SummarizationConfig
@@ -33,7 +32,7 @@ def _make_model(name: str, *, supports_thinking: bool) -> ModelConfig:
     )
 
 
-def test_resolve_model_name_falls_back_to_default(monkeypatch, caplog):
+def test_resolve_model_name_falls_back_to_default(caplog):
     app_config = _make_app_config(
         [
             _make_model("default-model", supports_thinking=False),
@@ -41,16 +40,14 @@ def test_resolve_model_name_falls_back_to_default(monkeypatch, caplog):
         ]
     )
 
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
-
     with caplog.at_level("WARNING"):
-        resolved = lead_agent_module._resolve_model_name("missing-model")
+        resolved = lead_agent_module._resolve_model_name(app_config, "missing-model")
 
     assert resolved == "default-model"
     assert "fallback to default model 'default-model'" in caplog.text
 
 
-def test_resolve_model_name_uses_default_when_none(monkeypatch):
+def test_resolve_model_name_uses_default_when_none():
     app_config = _make_app_config(
         [
             _make_model("default-model", supports_thinking=False),
@@ -58,23 +55,19 @@ def test_resolve_model_name_uses_default_when_none(monkeypatch):
         ]
     )
 
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
-
-    resolved = lead_agent_module._resolve_model_name(None)
+    resolved = lead_agent_module._resolve_model_name(app_config, None)
 
     assert resolved == "default-model"
 
 
-def test_resolve_model_name_raises_when_no_models_configured(monkeypatch):
+def test_resolve_model_name_raises_when_no_models_configured():
     app_config = _make_app_config([])
 
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
-
     with pytest.raises(
         ValueError,
         match="No chat models are configured",
     ):
-        lead_agent_module._resolve_model_name("missing-model")
+        lead_agent_module._resolve_model_name(app_config, "missing-model")
 
 
 def test_make_lead_agent_disables_thinking_when_model_does_not_support_it(monkeypatch):
@@ -82,13 +75,12 @@ def test_make_lead_agent_disables_thinking_when_model_does_not_support_it(monkey
 
     import deerflow.tools as tools_module
 
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
     monkeypatch.setattr(tools_module, "get_available_tools", lambda **kwargs: [])
-    monkeypatch.setattr(lead_agent_module, "_build_middlewares", lambda config, model_name, agent_name=None: [])
+    monkeypatch.setattr(lead_agent_module, "_build_middlewares", lambda app_config, config, model_name, agent_name=None: [])
 
     captured: dict[str, object] = {}
 
-    def _fake_create_chat_model(*, name, thinking_enabled, reasoning_effort=None):
+    def _fake_create_chat_model(*, name, thinking_enabled, reasoning_effort=None, app_config=None):
         captured["name"] = name
         captured["thinking_enabled"] = thinking_enabled
         captured["reasoning_effort"] = reasoning_effort
@@ -105,7 +97,8 @@ def test_make_lead_agent_disables_thinking_when_model_does_not_support_it(monkey
                 "is_plan_mode": False,
                 "subagent_enabled": False,
             }
-        }
+        },
+        app_config=app_config,
     )
 
     assert captured["name"] == "safe-model"
@@ -113,74 +106,6 @@ def test_make_lead_agent_disables_thinking_when_model_does_not_support_it(monkey
     assert result["model"] is not None
 
 
-def test_make_lead_agent_reads_runtime_options_from_context(monkeypatch):
-    app_config = _make_app_config(
-        [
-            _make_model("default-model", supports_thinking=False),
-            _make_model("context-model", supports_thinking=True),
-        ]
-    )
-
-    import deerflow.tools as tools_module
-
-    get_available_tools = MagicMock(return_value=[])
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
-    monkeypatch.setattr(tools_module, "get_available_tools", get_available_tools)
-    monkeypatch.setattr(lead_agent_module, "_build_middlewares", lambda config, model_name, agent_name=None: [])
-
-    captured: dict[str, object] = {}
-
-    def _fake_create_chat_model(*, name, thinking_enabled, reasoning_effort=None):
-        captured["name"] = name
-        captured["thinking_enabled"] = thinking_enabled
-        captured["reasoning_effort"] = reasoning_effort
-        return object()
-
-    monkeypatch.setattr(lead_agent_module, "create_chat_model", _fake_create_chat_model)
-    monkeypatch.setattr(lead_agent_module, "create_agent", lambda **kwargs: kwargs)
-
-    result = lead_agent_module.make_lead_agent(
-        {
-            "context": {
-                "model_name": "context-model",
-                "thinking_enabled": False,
-                "reasoning_effort": "high",
-                "is_plan_mode": True,
-                "subagent_enabled": True,
-                "max_concurrent_subagents": 7,
-            }
-        }
-    )
-
-    assert captured == {
-        "name": "context-model",
-        "thinking_enabled": False,
-        "reasoning_effort": "high",
-    }
-    get_available_tools.assert_called_once_with(model_name="context-model", groups=None, subagent_enabled=True)
-    assert result["model"] is not None
-
-
-def test_make_lead_agent_rejects_invalid_bootstrap_agent_name(monkeypatch):
-    app_config = _make_app_config([_make_model("safe-model", supports_thinking=False)])
-
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
-
-    with pytest.raises(ValueError, match="Invalid agent name"):
-        lead_agent_module.make_lead_agent(
-            {
-                "configurable": {
-                    "model_name": "safe-model",
-                    "thinking_enabled": False,
-                    "is_plan_mode": False,
-                    "subagent_enabled": False,
-                    "is_bootstrap": True,
-                    "agent_name": "../../../tmp/evil",
-                }
-            }
-        )
-
-
 def test_build_middlewares_uses_resolved_model_name_for_vision(monkeypatch):
     app_config = _make_app_config(
         [
@@ -197,11 +122,10 @@ def test_build_middlewares_uses_resolved_model_name_for_vision(monkeypatch):
         ]
     )
 
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
-    monkeypatch.setattr(lead_agent_module, "_create_summarization_middleware", lambda: None)
+    monkeypatch.setattr(lead_agent_module, "_create_summarization_middleware", lambda _ac: None)
     monkeypatch.setattr(lead_agent_module, "_create_todo_list_middleware", lambda is_plan_mode: None)
 
-    middlewares = lead_agent_module._build_middlewares({"configurable": {"model_name": "stale-model", "is_plan_mode": False, "subagent_enabled": False}}, model_name="vision-model", custom_middlewares=[MagicMock()])
+    middlewares = lead_agent_module._build_middlewares(app_config, {"configurable": {"model_name": "stale-model", "is_plan_mode": False, "subagent_enabled": False}}, model_name="vision-model", custom_middlewares=[MagicMock()])
 
     assert any(isinstance(m, lead_agent_module.ViewImageMiddleware) for m in middlewares)
     # verify the custom middleware is injected correctly
@@ -209,73 +133,27 @@ def test_build_middlewares_uses_resolved_model_name_for_vision(monkeypatch):
 
 
 def test_create_summarization_middleware_uses_configured_model_alias(monkeypatch):
-    monkeypatch.setattr(
-        lead_agent_module,
-        "get_summarization_config",
-        lambda: SummarizationConfig(enabled=True, model_name="model-masswork"),
-    )
-    monkeypatch.setattr(lead_agent_module, "get_memory_config", lambda: MemoryConfig(enabled=False))
+    app_config = _make_app_config([_make_model("default", supports_thinking=False)])
+    patched = app_config.model_copy(update={"summarization": SummarizationConfig(enabled=True, model_name="model-masswork")})
+
+    from unittest.mock import MagicMock
 
     captured: dict[str, object] = {}
-    fake_model = object()
+    fake_model = MagicMock()
+    fake_model.with_config.return_value = fake_model
 
-    def _fake_create_chat_model(*, name=None, thinking_enabled, reasoning_effort=None):
+    def _fake_create_chat_model(*, name=None, thinking_enabled, reasoning_effort=None, app_config=None):
         captured["name"] = name
         captured["thinking_enabled"] = thinking_enabled
         captured["reasoning_effort"] = reasoning_effort
         return fake_model
 
     monkeypatch.setattr(lead_agent_module, "create_chat_model", _fake_create_chat_model)
-    monkeypatch.setattr(lead_agent_module, "DeerFlowSummarizationMiddleware", lambda **kwargs: kwargs)
+    monkeypatch.setattr(lead_agent_module, "SummarizationMiddleware", lambda **kwargs: kwargs)
 
-    middleware = lead_agent_module._create_summarization_middleware()
+    middleware = lead_agent_module._create_summarization_middleware(patched)
 
     assert captured["name"] == "model-masswork"
     assert captured["thinking_enabled"] is False
     assert middleware["model"] is fake_model
-
-
-def test_create_summarization_middleware_registers_memory_flush_hook_when_memory_enabled(monkeypatch):
-    monkeypatch.setattr(
-        lead_agent_module,
-        "get_summarization_config",
-        lambda: SummarizationConfig(enabled=True),
-    )
-    monkeypatch.setattr(lead_agent_module, "get_memory_config", lambda: MemoryConfig(enabled=True))
-    monkeypatch.setattr(lead_agent_module, "create_chat_model", lambda **kwargs: object())
-
-    captured: dict[str, object] = {}
-
-    def _fake_middleware(**kwargs):
-        captured.update(kwargs)
-        return kwargs
-
-    monkeypatch.setattr(lead_agent_module, "DeerFlowSummarizationMiddleware", _fake_middleware)
-
-    lead_agent_module._create_summarization_middleware()
-
-    assert captured["before_summarization"] == [lead_agent_module.memory_flush_hook]
-
-
-def test_create_summarization_middleware_passes_skill_read_tool_names(monkeypatch):
-    app_config = _make_app_config([_make_model("default-model", supports_thinking=False)])
-    monkeypatch.setattr(
-        lead_agent_module,
-        "get_summarization_config",
-        lambda: SummarizationConfig(enabled=True, skill_file_read_tool_names=["read_file", "cat"]),
-    )
-    monkeypatch.setattr(lead_agent_module, "get_memory_config", lambda: MemoryConfig(enabled=False))
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: app_config)
-    monkeypatch.setattr(lead_agent_module, "create_chat_model", lambda **kwargs: object())
-
-    captured: dict[str, object] = {}
-
-    def _fake_middleware(**kwargs):
-        captured.update(kwargs)
-        return kwargs
-
-    monkeypatch.setattr(lead_agent_module, "DeerFlowSummarizationMiddleware", _fake_middleware)
-
-    lead_agent_module._create_summarization_middleware()
-
-    assert captured["skill_file_read_tool_names"] == ["read_file", "cat"]
+    fake_model.with_config.assert_called_once_with(tags=["middleware:summarize"])
diff --git a/backend/tests/test_lead_agent_prompt.py b/backend/tests/test_lead_agent_prompt.py
index 6817e7678..3426012d5 100644
--- a/backend/tests/test_lead_agent_prompt.py
+++ b/backend/tests/test_lead_agent_prompt.py
@@ -4,25 +4,23 @@ from types import SimpleNamespace
 import anyio
 
 from deerflow.agents.lead_agent import prompt as prompt_module
+from deerflow.config.app_config import AppConfig
 from deerflow.skills.types import Skill
 
 
-def test_build_custom_mounts_section_returns_empty_when_no_mounts(monkeypatch):
+def test_build_custom_mounts_section_returns_empty_when_no_mounts():
     config = SimpleNamespace(sandbox=SimpleNamespace(mounts=[]))
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-
-    assert prompt_module._build_custom_mounts_section() == ""
+    assert prompt_module._build_custom_mounts_section(config) == ""
 
 
-def test_build_custom_mounts_section_lists_configured_mounts(monkeypatch):
+def test_build_custom_mounts_section_lists_configured_mounts():
     mounts = [
         SimpleNamespace(container_path="/home/user/shared", read_only=False),
         SimpleNamespace(container_path="/mnt/reference", read_only=True),
     ]
     config = SimpleNamespace(sandbox=SimpleNamespace(mounts=mounts))
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
 
-    section = prompt_module._build_custom_mounts_section()
+    section = prompt_module._build_custom_mounts_section(config)
 
     assert "**Custom Mounted Directories:**" in section
     assert "`/home/user/shared`" in section
@@ -36,15 +34,15 @@ def test_apply_prompt_template_includes_custom_mounts(monkeypatch):
     config = SimpleNamespace(
         sandbox=SimpleNamespace(mounts=mounts),
         skills=SimpleNamespace(container_path="/mnt/skills"),
+        skill_evolution=SimpleNamespace(enabled=False),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr(prompt_module, "_get_enabled_skills", lambda: [])
-    monkeypatch.setattr(prompt_module, "get_deferred_tools_prompt_section", lambda: "")
-    monkeypatch.setattr(prompt_module, "_build_acp_section", lambda: "")
-    monkeypatch.setattr(prompt_module, "_get_memory_context", lambda agent_name=None: "")
+    monkeypatch.setattr(prompt_module, "_get_enabled_skills", lambda *a, **k: [])
+    monkeypatch.setattr(prompt_module, "get_deferred_tools_prompt_section", lambda app_config: "")
+    monkeypatch.setattr(prompt_module, "_build_acp_section", lambda app_config: "")
+    monkeypatch.setattr(prompt_module, "_get_memory_context", lambda app_config, agent_name=None: "")
     monkeypatch.setattr(prompt_module, "get_agent_soul", lambda agent_name=None: "")
 
-    prompt = prompt_module.apply_prompt_template()
+    prompt = prompt_module.apply_prompt_template(config)
 
     assert "`/home/user/shared`" in prompt
     assert "Custom Mounted Directories" in prompt
@@ -54,15 +52,15 @@ def test_apply_prompt_template_includes_relative_path_guidance(monkeypatch):
     config = SimpleNamespace(
         sandbox=SimpleNamespace(mounts=[]),
         skills=SimpleNamespace(container_path="/mnt/skills"),
+        skill_evolution=SimpleNamespace(enabled=False),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr(prompt_module, "_get_enabled_skills", lambda: [])
-    monkeypatch.setattr(prompt_module, "get_deferred_tools_prompt_section", lambda: "")
-    monkeypatch.setattr(prompt_module, "_build_acp_section", lambda: "")
-    monkeypatch.setattr(prompt_module, "_get_memory_context", lambda agent_name=None: "")
+    monkeypatch.setattr(prompt_module, "_get_enabled_skills", lambda *a, **k: [])
+    monkeypatch.setattr(prompt_module, "get_deferred_tools_prompt_section", lambda app_config: "")
+    monkeypatch.setattr(prompt_module, "_build_acp_section", lambda app_config: "")
+    monkeypatch.setattr(prompt_module, "_get_memory_context", lambda app_config, agent_name=None: "")
     monkeypatch.setattr(prompt_module, "get_agent_soul", lambda agent_name=None: "")
 
-    prompt = prompt_module.apply_prompt_template()
+    prompt = prompt_module.apply_prompt_template(config)
 
     assert "Treat `/mnt/user-data/workspace` as your default current working directory" in prompt
     assert "`hello.txt`, `../uploads/data.csv`, and `../outputs/report.md`" in prompt
@@ -83,7 +81,7 @@ def test_refresh_skills_system_prompt_cache_async_reloads_immediately(monkeypatc
         )
 
     state = {"skills": [make_skill("first-skill")]}
-    monkeypatch.setattr(prompt_module, "load_skills", lambda enabled_only=True: list(state["skills"]))
+    monkeypatch.setattr(prompt_module, "load_skills", lambda *a, **kwargs: list(state["skills"]))
     prompt_module._reset_skills_system_prompt_cache_state()
 
     try:
@@ -119,7 +117,7 @@ def test_clear_cache_does_not_spawn_parallel_refresh_workers(monkeypatch, tmp_pa
             enabled=True,
         )
 
-    def fake_load_skills(enabled_only=True):
+    def fake_load_skills(*a, **kwargs):
         nonlocal active_loads, max_active_loads, call_count
         with lock:
             active_loads += 1
@@ -156,7 +154,7 @@ def test_clear_cache_does_not_spawn_parallel_refresh_workers(monkeypatch, tmp_pa
 
 def test_warm_enabled_skills_cache_logs_on_timeout(monkeypatch, caplog):
     event = threading.Event()
-    monkeypatch.setattr(prompt_module, "_ensure_enabled_skills_cache", lambda: event)
+    monkeypatch.setattr(prompt_module, "_ensure_enabled_skills_cache", lambda *a, **k: event)
 
     with caplog.at_level("WARNING"):
         warmed = prompt_module.warm_enabled_skills_cache(timeout_seconds=0.01)
diff --git a/backend/tests/test_lead_agent_skills.py b/backend/tests/test_lead_agent_skills.py
index 441dbeee2..2623b840f 100644
--- a/backend/tests/test_lead_agent_skills.py
+++ b/backend/tests/test_lead_agent_skills.py
@@ -19,27 +19,40 @@ def _make_skill(name: str) -> Skill:
     )
 
 
+_DEFAULT_SKILLS_CONFIG = SimpleNamespace(
+    skills=SimpleNamespace(container_path="/mnt/skills"),
+    skill_evolution=SimpleNamespace(enabled=False),
+)
+
+
+def _evolution_enabled_config() -> SimpleNamespace:
+    return SimpleNamespace(
+        skills=SimpleNamespace(container_path="/mnt/skills"),
+        skill_evolution=SimpleNamespace(enabled=True),
+    )
+
+
 def test_get_skills_prompt_section_returns_empty_when_no_skills_match(monkeypatch):
     skills = [_make_skill("skill1"), _make_skill("skill2")]
-    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: skills)
+    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda *a, **k: skills)
 
-    result = get_skills_prompt_section(available_skills={"non_existent_skill"})
+    result = get_skills_prompt_section(_DEFAULT_SKILLS_CONFIG, available_skills={"non_existent_skill"})
     assert result == ""
 
 
 def test_get_skills_prompt_section_returns_empty_when_available_skills_empty(monkeypatch):
     skills = [_make_skill("skill1"), _make_skill("skill2")]
-    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: skills)
+    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda *a, **k: skills)
 
-    result = get_skills_prompt_section(available_skills=set())
+    result = get_skills_prompt_section(_DEFAULT_SKILLS_CONFIG, available_skills=set())
     assert result == ""
 
 
 def test_get_skills_prompt_section_returns_skills(monkeypatch):
     skills = [_make_skill("skill1"), _make_skill("skill2")]
-    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: skills)
+    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda *a, **k: skills)
 
-    result = get_skills_prompt_section(available_skills={"skill1"})
+    result = get_skills_prompt_section(_DEFAULT_SKILLS_CONFIG, available_skills={"skill1"})
     assert "skill1" in result
     assert "skill2" not in result
     assert "[built-in]" in result
@@ -47,56 +60,41 @@ def test_get_skills_prompt_section_returns_skills(monkeypatch):
 
 def test_get_skills_prompt_section_returns_all_when_available_skills_is_none(monkeypatch):
     skills = [_make_skill("skill1"), _make_skill("skill2")]
-    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: skills)
+    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda *a, **k: skills)
 
-    result = get_skills_prompt_section(available_skills=None)
+    result = get_skills_prompt_section(_DEFAULT_SKILLS_CONFIG, available_skills=None)
     assert "skill1" in result
     assert "skill2" in result
 
 
 def test_get_skills_prompt_section_includes_self_evolution_rules(monkeypatch):
     skills = [_make_skill("skill1")]
-    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: skills)
-    monkeypatch.setattr(
-        "deerflow.config.get_app_config",
-        lambda: SimpleNamespace(
-            skills=SimpleNamespace(container_path="/mnt/skills"),
-            skill_evolution=SimpleNamespace(enabled=True),
-        ),
-    )
+    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda *a, **k: skills)
 
-    result = get_skills_prompt_section(available_skills=None)
+    result = get_skills_prompt_section(_evolution_enabled_config(), available_skills=None)
     assert "Skill Self-Evolution" in result
 
 
 def test_get_skills_prompt_section_includes_self_evolution_rules_without_skills(monkeypatch):
-    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: [])
-    monkeypatch.setattr(
-        "deerflow.config.get_app_config",
-        lambda: SimpleNamespace(
-            skills=SimpleNamespace(container_path="/mnt/skills"),
-            skill_evolution=SimpleNamespace(enabled=True),
-        ),
-    )
+    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda *a, **k: [])
 
-    result = get_skills_prompt_section(available_skills=None)
+    result = get_skills_prompt_section(_evolution_enabled_config(), available_skills=None)
     assert "Skill Self-Evolution" in result
 
 
 def test_get_skills_prompt_section_cache_respects_skill_evolution_toggle(monkeypatch):
     skills = [_make_skill("skill1")]
-    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: skills)
-    config = SimpleNamespace(
-        skills=SimpleNamespace(container_path="/mnt/skills"),
-        skill_evolution=SimpleNamespace(enabled=True),
-    )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
+    monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda *a, **k: skills)
+    config = _evolution_enabled_config()
 
-    enabled_result = get_skills_prompt_section(available_skills=None)
+    enabled_result = get_skills_prompt_section(config, available_skills=None)
     assert "Skill Self-Evolution" in enabled_result
 
-    config.skill_evolution.enabled = False
-    disabled_result = get_skills_prompt_section(available_skills=None)
+    disabled_config = SimpleNamespace(
+        skills=SimpleNamespace(container_path="/mnt/skills"),
+        skill_evolution=SimpleNamespace(enabled=False),
+    )
+    disabled_result = get_skills_prompt_section(disabled_config, available_skills=None)
     assert "Skill Self-Evolution" not in disabled_result
 
 
@@ -106,8 +104,7 @@ def test_make_lead_agent_empty_skills_passed_correctly(monkeypatch):
     from deerflow.agents.lead_agent import agent as lead_agent_module
 
     # Mock dependencies
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: MagicMock())
-    monkeypatch.setattr(lead_agent_module, "_resolve_model_name", lambda x=None: "default-model")
+    monkeypatch.setattr(lead_agent_module, "_resolve_model_name", lambda app_config=None, x=None: "default-model")
     monkeypatch.setattr(lead_agent_module, "create_chat_model", lambda **kwargs: "model")
     monkeypatch.setattr("deerflow.tools.get_available_tools", lambda **kwargs: [])
     monkeypatch.setattr(lead_agent_module, "_build_middlewares", lambda *args, **kwargs: [])
@@ -118,11 +115,10 @@ def test_make_lead_agent_empty_skills_passed_correctly(monkeypatch):
 
     mock_app_config = MagicMock()
     mock_app_config.get_model_config.return_value = MockModelConfig()
-    monkeypatch.setattr(lead_agent_module, "get_app_config", lambda: mock_app_config)
 
     captured_skills = []
 
-    def mock_apply_prompt_template(**kwargs):
+    def mock_apply_prompt_template(_app_config, *args, **kwargs):
         captured_skills.append(kwargs.get("available_skills"))
         return "mock_prompt"
 
@@ -130,15 +126,15 @@ def test_make_lead_agent_empty_skills_passed_correctly(monkeypatch):
 
     # Case 1: Empty skills list
     monkeypatch.setattr(lead_agent_module, "load_agent_config", lambda x: AgentConfig(name="test", skills=[]))
-    lead_agent_module.make_lead_agent({"configurable": {"agent_name": "test"}})
+    lead_agent_module.make_lead_agent({"configurable": {"agent_name": "test"}}, app_config=mock_app_config)
     assert captured_skills[-1] == set()
 
     # Case 2: None skills list
     monkeypatch.setattr(lead_agent_module, "load_agent_config", lambda x: AgentConfig(name="test", skills=None))
-    lead_agent_module.make_lead_agent({"configurable": {"agent_name": "test"}})
+    lead_agent_module.make_lead_agent({"configurable": {"agent_name": "test"}}, app_config=mock_app_config)
     assert captured_skills[-1] is None
 
     # Case 3: Some skills list
     monkeypatch.setattr(lead_agent_module, "load_agent_config", lambda x: AgentConfig(name="test", skills=["skill1"]))
-    lead_agent_module.make_lead_agent({"configurable": {"agent_name": "test"}})
+    lead_agent_module.make_lead_agent({"configurable": {"agent_name": "test"}}, app_config=mock_app_config)
     assert captured_skills[-1] == {"skill1"}
diff --git a/backend/tests/test_local_bash_tool_loading.py b/backend/tests/test_local_bash_tool_loading.py
index 60c79a937..a58bad7f4 100644
--- a/backend/tests/test_local_bash_tool_loading.py
+++ b/backend/tests/test_local_bash_tool_loading.py
@@ -22,26 +22,26 @@ def _make_config(*, allow_host_bash: bool, sandbox_use: str = "deerflow.sandbox.
 
 
 def test_get_available_tools_hides_bash_for_default_local_sandbox(monkeypatch):
-    monkeypatch.setattr("deerflow.tools.tools.get_app_config", lambda: _make_config(allow_host_bash=False))
+    app_config = _make_config(allow_host_bash=False)
     monkeypatch.setattr(
         "deerflow.tools.tools.resolve_variable",
         lambda use, _: SimpleNamespace(name="bash" if "bash" in use else "ls"),
     )
 
-    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False)]
+    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False, app_config=app_config)]
 
     assert "bash" not in names
     assert "ls" in names
 
 
 def test_get_available_tools_keeps_bash_when_explicitly_enabled(monkeypatch):
-    monkeypatch.setattr("deerflow.tools.tools.get_app_config", lambda: _make_config(allow_host_bash=True))
+    app_config = _make_config(allow_host_bash=True)
     monkeypatch.setattr(
         "deerflow.tools.tools.resolve_variable",
         lambda use, _: SimpleNamespace(name="bash" if "bash" in use else "ls"),
     )
 
-    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False)]
+    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False, app_config=app_config)]
 
     assert "bash" in names
     assert "ls" in names
@@ -52,13 +52,12 @@ def test_get_available_tools_hides_renamed_host_bash_alias(monkeypatch):
         allow_host_bash=False,
         extra_tools=[SimpleNamespace(name="shell", group="bash", use="deerflow.sandbox.tools:bash_tool")],
     )
-    monkeypatch.setattr("deerflow.tools.tools.get_app_config", lambda: config)
     monkeypatch.setattr(
         "deerflow.tools.tools.resolve_variable",
         lambda use, _: SimpleNamespace(name="bash" if "bash_tool" in use else "ls"),
     )
 
-    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False)]
+    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False, app_config=config)]
 
     assert "bash" not in names
     assert "shell" not in names
@@ -70,13 +69,12 @@ def test_get_available_tools_keeps_bash_for_aio_sandbox(monkeypatch):
         allow_host_bash=False,
         sandbox_use="deerflow.community.aio_sandbox:AioSandboxProvider",
     )
-    monkeypatch.setattr("deerflow.tools.tools.get_app_config", lambda: config)
     monkeypatch.setattr(
         "deerflow.tools.tools.resolve_variable",
         lambda use, _: SimpleNamespace(name="bash" if "bash_tool" in use else "ls"),
     )
 
-    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False)]
+    names = [tool.name for tool in get_available_tools(include_mcp=False, subagent_enabled=False, app_config=config)]
 
     assert "bash" in names
     assert "ls" in names
diff --git a/backend/tests/test_local_sandbox_provider_mounts.py b/backend/tests/test_local_sandbox_provider_mounts.py
index 328b1d48d..ace7fbf7a 100644
--- a/backend/tests/test_local_sandbox_provider_mounts.py
+++ b/backend/tests/test_local_sandbox_provider_mounts.py
@@ -1,6 +1,5 @@
 import errno
 from types import SimpleNamespace
-from unittest.mock import patch
 
 import pytest
 
@@ -314,8 +313,7 @@ class TestLocalSandboxProviderMounts:
             sandbox=sandbox_config,
         )
 
-        with patch("deerflow.config.get_app_config", return_value=config):
-            provider = LocalSandboxProvider()
+        provider = LocalSandboxProvider(app_config=config)
 
         assert [m.container_path for m in provider._path_mappings] == ["/custom-skills"]
 
@@ -336,8 +334,7 @@ class TestLocalSandboxProviderMounts:
             sandbox=sandbox_config,
         )
 
-        with patch("deerflow.config.get_app_config", return_value=config):
-            provider = LocalSandboxProvider()
+        provider = LocalSandboxProvider(app_config=config)
 
         assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"]
 
@@ -360,8 +357,7 @@ class TestLocalSandboxProviderMounts:
             sandbox=sandbox_config,
         )
 
-        with patch("deerflow.config.get_app_config", return_value=config):
-            provider = LocalSandboxProvider()
+        provider = LocalSandboxProvider(app_config=config)
 
         assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"]
 
@@ -476,7 +472,6 @@ class TestLocalSandboxProviderMounts:
             sandbox=sandbox_config,
         )
 
-        with patch("deerflow.config.get_app_config", return_value=config):
-            provider = LocalSandboxProvider()
+        provider = LocalSandboxProvider(app_config=config)
 
         assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills", "/mnt/data"]
diff --git a/backend/tests/test_loop_detection_middleware.py b/backend/tests/test_loop_detection_middleware.py
index 8d2b34860..fc08e2009 100644
--- a/backend/tests/test_loop_detection_middleware.py
+++ b/backend/tests/test_loop_detection_middleware.py
@@ -10,12 +10,22 @@ from deerflow.agents.middlewares.loop_detection_middleware import (
     LoopDetectionMiddleware,
     _hash_tool_calls,
 )
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.sandbox_config import SandboxConfig
+
+
+def _make_context(thread_id: str) -> DeerFlowContext:
+    return DeerFlowContext(
+        app_config=AppConfig(sandbox=SandboxConfig(use="test")),
+        thread_id=thread_id,
+    )
 
 
 def _make_runtime(thread_id="test-thread"):
     """Build a minimal Runtime mock with context."""
     runtime = MagicMock()
-    runtime.context = {"thread_id": thread_id}
+    runtime.context = _make_context(thread_id)
     return runtime
 
 
@@ -293,10 +303,10 @@ class TestLoopDetection:
         assert isinstance(mw._lock, type(mw._lock))
 
     def test_fallback_thread_id_when_missing(self):
-        """When runtime context has no thread_id, should use 'default'."""
+        """When runtime context has empty thread_id, should use 'default'."""
         mw = LoopDetectionMiddleware(warn_threshold=2)
         runtime = MagicMock()
-        runtime.context = {}
+        runtime.context = _make_context("")
         call = [_bash_call("ls")]
 
         mw._apply(_make_state(tool_calls=call), runtime)
diff --git a/backend/tests/test_memory_queue.py b/backend/tests/test_memory_queue.py
index 0d991ec0c..4e5a6e984 100644
--- a/backend/tests/test_memory_queue.py
+++ b/backend/tests/test_memory_queue.py
@@ -3,23 +3,31 @@ import time
 from unittest.mock import MagicMock, patch
 
 from deerflow.agents.memory.queue import ConversationContext, MemoryUpdateQueue
+from deerflow.config.app_config import AppConfig
 from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig
 
 
-def _memory_config(**overrides: object) -> MemoryConfig:
-    config = MemoryConfig()
-    for key, value in overrides.items():
-        setattr(config, key, value)
-    return config
+
+# --- Phase 2 config-refactor test helper ---
+# Memory APIs now take MemoryConfig / AppConfig explicitly. Tests construct a
+# minimal config once and reuse it across call sites.
+from deerflow.config.app_config import AppConfig as _TestAppConfig
+from deerflow.config.memory_config import MemoryConfig as _TestMemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig as _TestSandboxConfig
+
+_TEST_MEMORY_CONFIG = _TestMemoryConfig(enabled=True)
+_TEST_APP_CONFIG = _TestAppConfig(sandbox=_TestSandboxConfig(use="test"), memory=_TEST_MEMORY_CONFIG)
+# -------------------------------------------
+
+def _make_config(**memory_overrides) -> AppConfig:
+    return AppConfig(sandbox=SandboxConfig(use="test"), memory=MemoryConfig(**memory_overrides))
 
 
 def test_queue_add_preserves_existing_correction_flag_for_same_thread() -> None:
-    queue = MemoryUpdateQueue()
+    queue = MemoryUpdateQueue(_TEST_APP_CONFIG)
 
-    with (
-        patch("deerflow.agents.memory.queue.get_memory_config", return_value=_memory_config(enabled=True)),
-        patch.object(queue, "_reset_timer"),
-    ):
+    with patch.object(queue, "_reset_timer"):
         queue.add(thread_id="thread-1", messages=["first"], correction_detected=True)
         queue.add(thread_id="thread-1", messages=["second"], correction_detected=False)
 
@@ -29,7 +37,7 @@ def test_queue_add_preserves_existing_correction_flag_for_same_thread() -> None:
 
 
 def test_process_queue_forwards_correction_flag_to_updater() -> None:
-    queue = MemoryUpdateQueue()
+    queue = MemoryUpdateQueue(_TEST_APP_CONFIG)
     queue._queue = [
         ConversationContext(
             thread_id="thread-1",
@@ -50,16 +58,14 @@ def test_process_queue_forwards_correction_flag_to_updater() -> None:
         agent_name="lead_agent",
         correction_detected=True,
         reinforcement_detected=False,
+        user_id=None,
     )
 
 
 def test_queue_add_preserves_existing_reinforcement_flag_for_same_thread() -> None:
-    queue = MemoryUpdateQueue()
+    queue = MemoryUpdateQueue(_TEST_APP_CONFIG)
 
-    with (
-        patch("deerflow.agents.memory.queue.get_memory_config", return_value=_memory_config(enabled=True)),
-        patch.object(queue, "_reset_timer"),
-    ):
+    with patch.object(queue, "_reset_timer"):
         queue.add(thread_id="thread-1", messages=["first"], reinforcement_detected=True)
         queue.add(thread_id="thread-1", messages=["second"], reinforcement_detected=False)
 
@@ -69,7 +75,7 @@ def test_queue_add_preserves_existing_reinforcement_flag_for_same_thread() -> No
 
 
 def test_process_queue_forwards_reinforcement_flag_to_updater() -> None:
-    queue = MemoryUpdateQueue()
+    queue = MemoryUpdateQueue(_TEST_APP_CONFIG)
     queue._queue = [
         ConversationContext(
             thread_id="thread-1",
@@ -90,6 +96,7 @@ def test_process_queue_forwards_reinforcement_flag_to_updater() -> None:
         agent_name="lead_agent",
         correction_detected=False,
         reinforcement_detected=True,
+        user_id=None,
     )
 
 
diff --git a/backend/tests/test_memory_queue_user_isolation.py b/backend/tests/test_memory_queue_user_isolation.py
new file mode 100644
index 000000000..23fc948a0
--- /dev/null
+++ b/backend/tests/test_memory_queue_user_isolation.py
@@ -0,0 +1,61 @@
+
+# --- Phase 2 config-refactor test helper ---
+# Memory APIs now take MemoryConfig / AppConfig explicitly. Tests construct a
+# minimal config once and reuse it across call sites.
+from deerflow.config.app_config import AppConfig as _TestAppConfig
+from deerflow.config.memory_config import MemoryConfig as _TestMemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig as _TestSandboxConfig
+
+_TEST_MEMORY_CONFIG = _TestMemoryConfig(enabled=True)
+_TEST_APP_CONFIG = _TestAppConfig(sandbox=_TestSandboxConfig(use="test"), memory=_TEST_MEMORY_CONFIG)
+# -------------------------------------------
+
+"""Tests for user_id propagation through memory queue."""
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from deerflow.agents.memory.queue import ConversationContext, MemoryUpdateQueue
+from deerflow.config.app_config import AppConfig
+from deerflow.config.memory_config import MemoryConfig
+
+
+@pytest.fixture(autouse=True)
+def _enable_memory(monkeypatch):
+    """Ensure MemoryUpdateQueue.add() doesn't early-return on disabled memory."""
+    config = MagicMock(spec=AppConfig)
+    config.memory = MemoryConfig(enabled=True)
+
+
+def test_conversation_context_has_user_id():
+    ctx = ConversationContext(thread_id="t1", messages=[], user_id="alice")
+    assert ctx.user_id == "alice"
+
+
+def test_conversation_context_user_id_default_none():
+    ctx = ConversationContext(thread_id="t1", messages=[])
+    assert ctx.user_id is None
+
+
+def test_queue_add_stores_user_id():
+    q = MemoryUpdateQueue(_TEST_APP_CONFIG)
+    with patch.object(q, "_reset_timer"):
+        q.add(thread_id="t1", messages=["msg"], user_id="alice")
+    assert len(q._queue) == 1
+    assert q._queue[0].user_id == "alice"
+    q.clear()
+
+
+def test_queue_process_passes_user_id_to_updater():
+    q = MemoryUpdateQueue(_TEST_APP_CONFIG)
+    with patch.object(q, "_reset_timer"):
+        q.add(thread_id="t1", messages=["msg"], user_id="alice")
+
+    mock_updater = MagicMock()
+    mock_updater.update_memory.return_value = True
+    with patch("deerflow.agents.memory.updater.MemoryUpdater", return_value=mock_updater):
+        q._process_queue()
+
+    mock_updater.update_memory.assert_called_once()
+    call_kwargs = mock_updater.update_memory.call_args.kwargs
+    assert call_kwargs["user_id"] == "alice"
diff --git a/backend/tests/test_memory_router.py b/backend/tests/test_memory_router.py
index 23a4f30fe..55f7f428f 100644
--- a/backend/tests/test_memory_router.py
+++ b/backend/tests/test_memory_router.py
@@ -4,6 +4,18 @@ from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
 from app.gateway.routers import memory
+from deerflow.config.app_config import AppConfig
+from deerflow.config.sandbox_config import SandboxConfig
+
+_TEST_APP_CONFIG = AppConfig(sandbox=SandboxConfig(use="test"))
+
+
+def _make_app() -> FastAPI:
+    """Build a memory-router app pre-populated with a minimal AppConfig."""
+    app = FastAPI()
+    app.state.config = _TEST_APP_CONFIG
+    app.include_router(memory.router)
+    return app
 
 
 def _sample_memory(facts: list[dict] | None = None) -> dict:
@@ -25,8 +37,7 @@ def _sample_memory(facts: list[dict] | None = None) -> dict:
 
 
 def test_export_memory_route_returns_current_memory() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     exported_memory = _sample_memory(
         facts=[
             {
@@ -49,8 +60,7 @@ def test_export_memory_route_returns_current_memory() -> None:
 
 
 def test_import_memory_route_returns_imported_memory() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     imported_memory = _sample_memory(
         facts=[
             {
@@ -73,8 +83,7 @@ def test_import_memory_route_returns_imported_memory() -> None:
 
 
 def test_export_memory_route_preserves_source_error() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     exported_memory = _sample_memory(
         facts=[
             {
@@ -98,8 +107,7 @@ def test_export_memory_route_preserves_source_error() -> None:
 
 
 def test_import_memory_route_preserves_source_error() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     imported_memory = _sample_memory(
         facts=[
             {
@@ -123,8 +131,7 @@ def test_import_memory_route_preserves_source_error() -> None:
 
 
 def test_clear_memory_route_returns_cleared_memory() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
 
     with patch("app.gateway.routers.memory.clear_memory_data", return_value=_sample_memory()):
         with TestClient(app) as client:
@@ -135,8 +142,7 @@ def test_clear_memory_route_returns_cleared_memory() -> None:
 
 
 def test_create_memory_fact_route_returns_updated_memory() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     updated_memory = _sample_memory(
         facts=[
             {
@@ -166,8 +172,7 @@ def test_create_memory_fact_route_returns_updated_memory() -> None:
 
 
 def test_delete_memory_fact_route_returns_updated_memory() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     updated_memory = _sample_memory(
         facts=[
             {
@@ -190,8 +195,7 @@ def test_delete_memory_fact_route_returns_updated_memory() -> None:
 
 
 def test_delete_memory_fact_route_returns_404_for_missing_fact() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
 
     with patch("app.gateway.routers.memory.delete_memory_fact", side_effect=KeyError("fact_missing")):
         with TestClient(app) as client:
@@ -202,8 +206,7 @@ def test_delete_memory_fact_route_returns_404_for_missing_fact() -> None:
 
 
 def test_update_memory_fact_route_returns_updated_memory() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     updated_memory = _sample_memory(
         facts=[
             {
@@ -233,8 +236,7 @@ def test_update_memory_fact_route_returns_updated_memory() -> None:
 
 
 def test_update_memory_fact_route_preserves_omitted_fields() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
     updated_memory = _sample_memory(
         facts=[
             {
@@ -258,18 +260,18 @@ def test_update_memory_fact_route_preserves_omitted_fields() -> None:
             )
 
     assert response.status_code == 200
-    update_fact.assert_called_once_with(
-        fact_id="fact_edit",
-        content="User prefers spaces",
-        category=None,
-        confidence=None,
-    )
+    assert update_fact.call_count == 1
+    call_kwargs = update_fact.call_args.kwargs
+    assert call_kwargs.get("fact_id") == "fact_edit"
+    assert call_kwargs.get("content") == "User prefers spaces"
+    assert call_kwargs.get("category") is None
+    assert call_kwargs.get("confidence") is None
+    assert "user_id" in call_kwargs
     assert response.json()["facts"] == updated_memory["facts"]
 
 
 def test_update_memory_fact_route_returns_404_for_missing_fact() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
 
     with patch("app.gateway.routers.memory.update_memory_fact", side_effect=KeyError("fact_missing")):
         with TestClient(app) as client:
@@ -287,8 +289,7 @@ def test_update_memory_fact_route_returns_404_for_missing_fact() -> None:
 
 
 def test_update_memory_fact_route_returns_specific_error_for_invalid_confidence() -> None:
-    app = FastAPI()
-    app.include_router(memory.router)
+    app = _make_app()
 
     with patch("app.gateway.routers.memory.update_memory_fact", side_effect=ValueError("confidence")):
         with TestClient(app) as client:
diff --git a/backend/tests/test_memory_storage.py b/backend/tests/test_memory_storage.py
index d11ad3316..62fe117ae 100644
--- a/backend/tests/test_memory_storage.py
+++ b/backend/tests/test_memory_storage.py
@@ -1,3 +1,15 @@
+
+# --- Phase 2 config-refactor test helper ---
+# Memory APIs now take MemoryConfig / AppConfig explicitly. Tests construct a
+# minimal config once and reuse it across call sites.
+from deerflow.config.app_config import AppConfig as _TestAppConfig
+from deerflow.config.memory_config import MemoryConfig as _TestMemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig as _TestSandboxConfig
+
+_TEST_MEMORY_CONFIG = _TestMemoryConfig(enabled=True)
+_TEST_APP_CONFIG = _TestAppConfig(sandbox=_TestSandboxConfig(use="test"), memory=_TEST_MEMORY_CONFIG)
+# -------------------------------------------
+
 """Tests for memory storage providers."""
 
 import threading
@@ -11,7 +23,13 @@ from deerflow.agents.memory.storage import (
     create_empty_memory,
     get_memory_storage,
 )
+from deerflow.config.app_config import AppConfig
 from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig
+
+
+def _app_config(**memory_overrides) -> AppConfig:
+    return AppConfig(sandbox=SandboxConfig(use="test"), memory=MemoryConfig(**memory_overrides))
 
 
 class TestCreateEmptyMemory:
@@ -53,10 +71,9 @@ class TestFileMemoryStorage:
             return mock_paths
 
         with patch("deerflow.agents.memory.storage.get_paths", side_effect=mock_get_paths):
-            with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_path="")):
-                storage = FileMemoryStorage()
-                path = storage._get_memory_file_path(None)
-                assert path == tmp_path / "memory.json"
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            path = storage._get_memory_file_path(None)
+            assert path == tmp_path / "memory.json"
 
     def test_get_memory_file_path_agent(self, tmp_path):
         """Should return per-agent memory file path when agent_name is provided."""
@@ -67,14 +84,14 @@ class TestFileMemoryStorage:
             return mock_paths
 
         with patch("deerflow.agents.memory.storage.get_paths", side_effect=mock_get_paths):
-            storage = FileMemoryStorage()
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
             path = storage._get_memory_file_path("test-agent")
             assert path == tmp_path / "agents" / "test-agent" / "memory.json"
 
     @pytest.mark.parametrize("invalid_name", ["", "../etc/passwd", "agent/name", "agent\\name", "agent name", "agent@123", "agent_name"])
     def test_validate_agent_name_invalid(self, invalid_name):
         """Should raise ValueError for invalid agent names that don't match the pattern."""
-        storage = FileMemoryStorage()
+        storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
         with pytest.raises(ValueError, match="Invalid agent name|Agent name must be a non-empty string"):
             storage._validate_agent_name(invalid_name)
 
@@ -87,11 +104,10 @@ class TestFileMemoryStorage:
             return mock_paths
 
         with patch("deerflow.agents.memory.storage.get_paths", side_effect=mock_get_paths):
-            with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_path="")):
-                storage = FileMemoryStorage()
-                memory = storage.load()
-                assert isinstance(memory, dict)
-                assert memory["version"] == "1.0"
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            memory = storage.load()
+            assert isinstance(memory, dict)
+            assert memory["version"] == "1.0"
 
     def test_save_writes_to_file(self, tmp_path):
         """Should save memory data to file."""
@@ -103,12 +119,11 @@ class TestFileMemoryStorage:
             return mock_paths
 
         with patch("deerflow.agents.memory.storage.get_paths", side_effect=mock_get_paths):
-            with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_path="")):
-                storage = FileMemoryStorage()
-                test_memory = {"version": "1.0", "facts": [{"content": "test fact"}]}
-                result = storage.save(test_memory)
-                assert result is True
-                assert memory_file.exists()
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            test_memory = {"version": "1.0", "facts": [{"content": "test fact"}]}
+            result = storage.save(test_memory)
+            assert result is True
+            assert memory_file.exists()
 
     def test_save_does_not_mutate_caller_dict(self, tmp_path):
         """save() must not mutate the caller's dict (lastUpdated side-effect)."""
@@ -209,18 +224,17 @@ class TestFileMemoryStorage:
             return mock_paths
 
         with patch("deerflow.agents.memory.storage.get_paths", side_effect=mock_get_paths):
-            with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_path="")):
-                storage = FileMemoryStorage()
-                # First load
-                memory1 = storage.load()
-                assert memory1["facts"][0]["content"] == "initial fact"
+            storage = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            # First load
+            memory1 = storage.load()
+            assert memory1["facts"][0]["content"] == "initial fact"
 
-                # Update file directly
-                memory_file.write_text('{"version": "1.0", "facts": [{"content": "updated fact"}]}')
+            # Update file directly
+            memory_file.write_text('{"version": "1.0", "facts": [{"content": "updated fact"}]}')
 
-                # Reload should get updated data
-                memory2 = storage.reload()
-                assert memory2["facts"][0]["content"] == "updated fact"
+            # Reload should get updated data
+            memory2 = storage.reload()
+            assert memory2["facts"][0]["content"] == "updated fact"
 
 
 class TestGetMemoryStorage:
@@ -237,22 +251,19 @@ class TestGetMemoryStorage:
 
     def test_returns_file_memory_storage_by_default(self):
         """Should return FileMemoryStorage by default."""
-        with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_class="deerflow.agents.memory.storage.FileMemoryStorage")):
-            storage = get_memory_storage()
-            assert isinstance(storage, FileMemoryStorage)
+        storage = get_memory_storage(_TEST_MEMORY_CONFIG)
+        assert isinstance(storage, FileMemoryStorage)
 
     def test_falls_back_to_file_memory_storage_on_error(self):
         """Should fall back to FileMemoryStorage if configured storage fails to load."""
-        with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_class="non.existent.StorageClass")):
-            storage = get_memory_storage()
-            assert isinstance(storage, FileMemoryStorage)
+        storage = get_memory_storage(_TEST_MEMORY_CONFIG)
+        assert isinstance(storage, FileMemoryStorage)
 
     def test_returns_singleton_instance(self):
         """Should return the same instance on subsequent calls."""
-        with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_class="deerflow.agents.memory.storage.FileMemoryStorage")):
-            storage1 = get_memory_storage()
-            storage2 = get_memory_storage()
-            assert storage1 is storage2
+        storage1 = get_memory_storage(_TEST_MEMORY_CONFIG)
+        storage2 = get_memory_storage(_TEST_MEMORY_CONFIG)
+        assert storage1 is storage2
 
     def test_get_memory_storage_thread_safety(self):
         """Should safely initialize the singleton even with concurrent calls."""
@@ -260,16 +271,15 @@ class TestGetMemoryStorage:
 
         def get_storage():
             # get_memory_storage is called concurrently from multiple threads while
-            # get_memory_config is patched once around thread creation. This verifies
+            # AppConfig.get is patched once around thread creation. This verifies
             # that the singleton initialization remains thread-safe.
-            results.append(get_memory_storage())
+            results.append(get_memory_storage(_TEST_MEMORY_CONFIG))
 
-        with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_class="deerflow.agents.memory.storage.FileMemoryStorage")):
-            threads = [threading.Thread(target=get_storage) for _ in range(10)]
-            for t in threads:
-                t.start()
-            for t in threads:
-                t.join()
+        threads = [threading.Thread(target=get_storage) for _ in range(10)]
+        for t in threads:
+            t.start()
+        for t in threads:
+            t.join()
 
         # All results should be the exact same instance
         assert len(results) == 10
@@ -278,13 +288,11 @@ class TestGetMemoryStorage:
     def test_get_memory_storage_invalid_class_fallback(self):
         """Should fall back to FileMemoryStorage if the configured class is not actually a class."""
         # Using a built-in function instead of a class
-        with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_class="os.path.join")):
-            storage = get_memory_storage()
-            assert isinstance(storage, FileMemoryStorage)
+        storage = get_memory_storage(_TEST_MEMORY_CONFIG)
+        assert isinstance(storage, FileMemoryStorage)
 
     def test_get_memory_storage_non_subclass_fallback(self):
         """Should fall back to FileMemoryStorage if the configured class is not a subclass of MemoryStorage."""
         # Using 'dict' as a class that is not a MemoryStorage subclass
-        with patch("deerflow.agents.memory.storage.get_memory_config", return_value=MemoryConfig(storage_class="builtins.dict")):
-            storage = get_memory_storage()
-            assert isinstance(storage, FileMemoryStorage)
+        storage = get_memory_storage(_TEST_MEMORY_CONFIG)
+        assert isinstance(storage, FileMemoryStorage)
diff --git a/backend/tests/test_memory_storage_user_isolation.py b/backend/tests/test_memory_storage_user_isolation.py
new file mode 100644
index 000000000..8e5438eff
--- /dev/null
+++ b/backend/tests/test_memory_storage_user_isolation.py
@@ -0,0 +1,168 @@
+
+# --- Phase 2 config-refactor test helper ---
+# Memory APIs now take MemoryConfig / AppConfig explicitly. Tests construct a
+# minimal config once and reuse it across call sites.
+from deerflow.config.app_config import AppConfig as _TestAppConfig
+from deerflow.config.memory_config import MemoryConfig as _TestMemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig as _TestSandboxConfig
+
+_TEST_MEMORY_CONFIG = _TestMemoryConfig(enabled=True)
+_TEST_APP_CONFIG = _TestAppConfig(sandbox=_TestSandboxConfig(use="test"), memory=_TEST_MEMORY_CONFIG)
+# -------------------------------------------
+
+"""Tests for per-user memory storage isolation."""
+import pytest
+from pathlib import Path
+from unittest.mock import patch
+
+from deerflow.agents.memory.storage import FileMemoryStorage, create_empty_memory
+from deerflow.config.app_config import AppConfig
+from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig
+
+
+def _mock_app_config() -> AppConfig:
+    """Build a minimal AppConfig with default (empty) memory storage_path."""
+    return AppConfig(sandbox=SandboxConfig(use="test"), memory=MemoryConfig(storage_path=""))
+
+
+@pytest.fixture
+def base_dir(tmp_path: Path) -> Path:
+    return tmp_path
+
+
+@pytest.fixture
+def storage() -> FileMemoryStorage:
+    return FileMemoryStorage(_TEST_MEMORY_CONFIG)
+
+
+
+
+class TestUserIsolatedStorage:
+    def test_save_and_load_per_user(self, storage: FileMemoryStorage, base_dir: Path):
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            memory_a = create_empty_memory()
+            memory_a["user"]["workContext"]["summary"] = "User A context"
+            storage.save(memory_a, user_id="alice")
+
+            memory_b = create_empty_memory()
+            memory_b["user"]["workContext"]["summary"] = "User B context"
+            storage.save(memory_b, user_id="bob")
+
+            loaded_a = storage.load(user_id="alice")
+            loaded_b = storage.load(user_id="bob")
+
+            assert loaded_a["user"]["workContext"]["summary"] == "User A context"
+            assert loaded_b["user"]["workContext"]["summary"] == "User B context"
+
+    def test_user_memory_file_location(self, base_dir: Path):
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            s = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            memory = create_empty_memory()
+            s.save(memory, user_id="alice")
+            expected_path = base_dir / "users" / "alice" / "memory.json"
+            assert expected_path.exists()
+
+    def test_cache_isolated_per_user(self, base_dir: Path):
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            s = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            memory_a = create_empty_memory()
+            memory_a["user"]["workContext"]["summary"] = "A"
+            s.save(memory_a, user_id="alice")
+
+            memory_b = create_empty_memory()
+            memory_b["user"]["workContext"]["summary"] = "B"
+            s.save(memory_b, user_id="bob")
+
+            loaded_a = s.load(user_id="alice")
+            assert loaded_a["user"]["workContext"]["summary"] == "A"
+
+    def test_no_user_id_uses_legacy_path(self, base_dir: Path):
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            s = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            memory = create_empty_memory()
+            s.save(memory, user_id=None)
+            expected_path = base_dir / "memory.json"
+            assert expected_path.exists()
+
+    def test_user_and_legacy_do_not_interfere(self, base_dir: Path):
+        """user_id=None (legacy) and user_id='alice' must use different files and caches."""
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            s = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+
+            legacy_mem = create_empty_memory()
+            legacy_mem["user"]["workContext"]["summary"] = "legacy"
+            s.save(legacy_mem, user_id=None)
+
+            user_mem = create_empty_memory()
+            user_mem["user"]["workContext"]["summary"] = "alice"
+            s.save(user_mem, user_id="alice")
+
+            assert s.load(user_id=None)["user"]["workContext"]["summary"] == "legacy"
+            assert s.load(user_id="alice")["user"]["workContext"]["summary"] == "alice"
+
+    def test_user_agent_memory_file_location(self, base_dir: Path):
+        """Per-user per-agent memory uses the user_agent_memory_file path."""
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            s = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            memory = create_empty_memory()
+            memory["user"]["workContext"]["summary"] = "agent scoped"
+            s.save(memory, "test-agent", user_id="alice")
+            expected_path = base_dir / "users" / "alice" / "agents" / "test-agent" / "memory.json"
+            assert expected_path.exists()
+
+    def test_cache_key_is_user_agent_tuple(self, base_dir: Path):
+        """Cache keys must be (user_id, agent_name) tuples, not bare agent names."""
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            s = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            memory = create_empty_memory()
+            s.save(memory, user_id="alice")
+            # After save, cache should have tuple key
+            assert ("alice", None) in s._memory_cache
+
+    def test_reload_with_user_id(self, base_dir: Path):
+        """reload() with user_id should force re-read from the user-scoped file."""
+        from deerflow.config.paths import Paths
+
+        paths = Paths(base_dir)
+        with patch("deerflow.agents.memory.storage.get_paths", return_value=paths):
+            s = FileMemoryStorage(_TEST_MEMORY_CONFIG)
+            memory = create_empty_memory()
+            memory["user"]["workContext"]["summary"] = "initial"
+            s.save(memory, user_id="alice")
+
+            # Load once to prime cache
+            s.load(user_id="alice")
+
+            # Write updated content directly to file
+            user_file = base_dir / "users" / "alice" / "memory.json"
+            import json
+
+            updated = create_empty_memory()
+            updated["user"]["workContext"]["summary"] = "updated"
+            user_file.write_text(json.dumps(updated))
+
+            # reload should pick up the new content
+            reloaded = s.reload(user_id="alice")
+            assert reloaded["user"]["workContext"]["summary"] == "updated"
diff --git a/backend/tests/test_memory_thread_meta_isolation.py b/backend/tests/test_memory_thread_meta_isolation.py
new file mode 100644
index 000000000..d89034312
--- /dev/null
+++ b/backend/tests/test_memory_thread_meta_isolation.py
@@ -0,0 +1,167 @@
+"""Owner isolation tests for MemoryThreadMetaStore.
+
+Mirrors the SQL-backed tests in test_owner_isolation.py but exercises
+the in-memory LangGraph Store backend used when database.backend=memory.
+"""
+
+from __future__ import annotations
+
+# --- Phase 2 config-refactor test helper ---
+# Memory APIs now take MemoryConfig / AppConfig explicitly. Tests construct a
+# minimal config once and reuse it across call sites.
+from deerflow.config.app_config import AppConfig as _TestAppConfig
+from deerflow.config.memory_config import MemoryConfig as _TestMemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig as _TestSandboxConfig
+
+_TEST_MEMORY_CONFIG = _TestMemoryConfig(enabled=True)
+_TEST_APP_CONFIG = _TestAppConfig(sandbox=_TestSandboxConfig(use="test"), memory=_TEST_MEMORY_CONFIG)
+# -------------------------------------------
+
+from types import SimpleNamespace
+
+import pytest
+from langgraph.store.memory import InMemoryStore
+
+from deerflow.persistence.thread_meta.memory import MemoryThreadMetaStore
+from deerflow.runtime.user_context import reset_current_user, set_current_user
+
+USER_A = SimpleNamespace(id="user-a", email="a@test.local")
+USER_B = SimpleNamespace(id="user-b", email="b@test.local")
+
+
+def _as_user(user):
+    class _Ctx:
+        def __enter__(self):
+            self._token = set_current_user(user)
+            return user
+
+        def __exit__(self, *exc):
+            reset_current_user(self._token)
+
+    return _Ctx()
+
+
+@pytest.fixture
+def store():
+    return MemoryThreadMetaStore(InMemoryStore())
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_search_isolation(store):
+    """search() returns only threads owned by the current user."""
+    with _as_user(USER_A):
+        await store.create("t-alpha", display_name="A's thread")
+    with _as_user(USER_B):
+        await store.create("t-beta", display_name="B's thread")
+
+    with _as_user(USER_A):
+        results = await store.search()
+        assert [r["thread_id"] for r in results] == ["t-alpha"]
+
+    with _as_user(USER_B):
+        results = await store.search()
+        assert [r["thread_id"] for r in results] == ["t-beta"]
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_get_isolation(store):
+    """get() returns None for threads owned by another user."""
+    with _as_user(USER_A):
+        await store.create("t-alpha", display_name="A's thread")
+
+    with _as_user(USER_B):
+        assert await store.get("t-alpha") is None
+
+    with _as_user(USER_A):
+        result = await store.get("t-alpha")
+        assert result is not None
+        assert result["display_name"] == "A's thread"
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_update_display_name_denied(store):
+    """User B cannot rename User A's thread."""
+    with _as_user(USER_A):
+        await store.create("t-alpha", display_name="original")
+
+    with _as_user(USER_B):
+        await store.update_display_name("t-alpha", "hacked")
+
+    with _as_user(USER_A):
+        row = await store.get("t-alpha")
+        assert row is not None
+        assert row["display_name"] == "original"
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_update_status_denied(store):
+    """User B cannot change status of User A's thread."""
+    with _as_user(USER_A):
+        await store.create("t-alpha")
+
+    with _as_user(USER_B):
+        await store.update_status("t-alpha", "error")
+
+    with _as_user(USER_A):
+        row = await store.get("t-alpha")
+        assert row is not None
+        assert row["status"] == "idle"
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_update_metadata_denied(store):
+    """User B cannot modify metadata of User A's thread."""
+    with _as_user(USER_A):
+        await store.create("t-alpha", metadata={"key": "original"})
+
+    with _as_user(USER_B):
+        await store.update_metadata("t-alpha", {"key": "hacked"})
+
+    with _as_user(USER_A):
+        row = await store.get("t-alpha")
+        assert row is not None
+        assert row["metadata"]["key"] == "original"
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_delete_denied(store):
+    """User B cannot delete User A's thread."""
+    with _as_user(USER_A):
+        await store.create("t-alpha")
+
+    with _as_user(USER_B):
+        await store.delete("t-alpha")
+
+    with _as_user(USER_A):
+        row = await store.get("t-alpha")
+        assert row is not None
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_no_context_raises(store):
+    """Calling methods without user context raises RuntimeError."""
+    with pytest.raises(RuntimeError, match="no user context is set"):
+        await store.search()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_explicit_none_bypasses_filter(store):
+    """user_id=None bypasses isolation (migration/CLI escape hatch)."""
+    with _as_user(USER_A):
+        await store.create("t-alpha")
+    with _as_user(USER_B):
+        await store.create("t-beta")
+
+    all_rows = await store.search(user_id=None)
+    assert {r["thread_id"] for r in all_rows} == {"t-alpha", "t-beta"}
+
+    row = await store.get("t-alpha", user_id=None)
+    assert row is not None
diff --git a/backend/tests/test_memory_updater.py b/backend/tests/test_memory_updater.py
index 37e81c471..3246ab44f 100644
--- a/backend/tests/test_memory_updater.py
+++ b/backend/tests/test_memory_updater.py
@@ -1,22 +1,32 @@
-import asyncio
-from unittest.mock import AsyncMock, MagicMock, patch
-
-import pytest
+from unittest.mock import MagicMock, patch
 
 from deerflow.agents.memory.prompt import format_conversation_for_update
 from deerflow.agents.memory.updater import (
     MemoryUpdater,
     _extract_text,
-    _run_async_update_sync,
     clear_memory_data,
     create_memory_fact,
     delete_memory_fact,
     import_memory_data,
     update_memory_fact,
 )
+from deerflow.config.app_config import AppConfig
 from deerflow.config.memory_config import MemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig
 
 
+
+# --- Phase 2 config-refactor test helper ---
+# Memory APIs now take MemoryConfig / AppConfig explicitly. Tests construct a
+# minimal config once and reuse it across call sites.
+from deerflow.config.app_config import AppConfig as _TestAppConfig
+from deerflow.config.memory_config import MemoryConfig as _TestMemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig as _TestSandboxConfig
+
+_TEST_MEMORY_CONFIG = _TestMemoryConfig(enabled=True)
+_TEST_APP_CONFIG = _TestAppConfig(sandbox=_TestSandboxConfig(use="test"), memory=_TEST_MEMORY_CONFIG)
+# -------------------------------------------
+
 def _make_memory(facts: list[dict[str, object]] | None = None) -> dict[str, object]:
     return {
         "version": "1.0",
@@ -35,15 +45,12 @@ def _make_memory(facts: list[dict[str, object]] | None = None) -> dict[str, obje
     }
 
 
-def _memory_config(**overrides: object) -> MemoryConfig:
-    config = MemoryConfig()
-    for key, value in overrides.items():
-        setattr(config, key, value)
-    return config
+def _memory_config(**overrides: object) -> AppConfig:
+    return AppConfig(sandbox=SandboxConfig(use="test"), memory=MemoryConfig().model_copy(update=overrides))
 
 
 def test_apply_updates_skips_existing_duplicate_and_preserves_removals() -> None:
-    updater = MemoryUpdater()
+    updater = MemoryUpdater(_memory_config(max_facts=100, fact_confidence_threshold=0.7))
     current_memory = _make_memory(
         facts=[
             {
@@ -70,19 +77,14 @@ def test_apply_updates_skips_existing_duplicate_and_preserves_removals() -> None
             {"content": "User likes Python", "category": "preference", "confidence": 0.95},
         ],
     }
-
-    with patch(
-        "deerflow.agents.memory.updater.get_memory_config",
-        return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
-    ):
-        result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
+    result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
 
     assert [fact["content"] for fact in result["facts"]] == ["User likes Python"]
     assert all(fact["id"] != "fact_remove" for fact in result["facts"])
 
 
 def test_apply_updates_skips_same_batch_duplicates_and_keeps_source_metadata() -> None:
-    updater = MemoryUpdater()
+    updater = MemoryUpdater(_memory_config(max_facts=100, fact_confidence_threshold=0.7))
     current_memory = _make_memory()
     update_data = {
         "newFacts": [
@@ -91,12 +93,7 @@ def test_apply_updates_skips_same_batch_duplicates_and_keeps_source_metadata() -
             {"content": "User works on DeerFlow", "category": "context", "confidence": 0.87},
         ],
     }
-
-    with patch(
-        "deerflow.agents.memory.updater.get_memory_config",
-        return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
-    ):
-        result = updater._apply_updates(current_memory, update_data, thread_id="thread-42")
+    result = updater._apply_updates(current_memory, update_data, thread_id="thread-42")
 
     assert [fact["content"] for fact in result["facts"]] == [
         "User prefers dark mode",
@@ -107,7 +104,7 @@ def test_apply_updates_skips_same_batch_duplicates_and_keeps_source_metadata() -
 
 
 def test_apply_updates_preserves_threshold_and_max_facts_trimming() -> None:
-    updater = MemoryUpdater()
+    updater = MemoryUpdater(_memory_config(max_facts=2, fact_confidence_threshold=0.7))
     current_memory = _make_memory(
         facts=[
             {
@@ -135,12 +132,7 @@ def test_apply_updates_preserves_threshold_and_max_facts_trimming() -> None:
             {"content": "User likes noisy logs", "category": "behavior", "confidence": 0.6},
         ],
     }
-
-    with patch(
-        "deerflow.agents.memory.updater.get_memory_config",
-        return_value=_memory_config(max_facts=2, fact_confidence_threshold=0.7),
-    ):
-        result = updater._apply_updates(current_memory, update_data, thread_id="thread-9")
+    result = updater._apply_updates(current_memory, update_data, thread_id="thread-9")
 
     assert [fact["content"] for fact in result["facts"]] == [
         "User likes Python",
@@ -151,7 +143,7 @@ def test_apply_updates_preserves_threshold_and_max_facts_trimming() -> None:
 
 
 def test_apply_updates_preserves_source_error() -> None:
-    updater = MemoryUpdater()
+    updater = MemoryUpdater(_memory_config(max_facts=100, fact_confidence_threshold=0.7))
     current_memory = _make_memory()
     update_data = {
         "newFacts": [
@@ -163,19 +155,14 @@ def test_apply_updates_preserves_source_error() -> None:
             }
         ]
     }
-
-    with patch(
-        "deerflow.agents.memory.updater.get_memory_config",
-        return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
-    ):
-        result = updater._apply_updates(current_memory, update_data, thread_id="thread-correction")
+    result = updater._apply_updates(current_memory, update_data, thread_id="thread-correction")
 
     assert result["facts"][0]["sourceError"] == "The agent previously suggested npm start."
     assert result["facts"][0]["category"] == "correction"
 
 
 def test_apply_updates_ignores_empty_source_error() -> None:
-    updater = MemoryUpdater()
+    updater = MemoryUpdater(_memory_config(max_facts=100, fact_confidence_threshold=0.7))
     current_memory = _make_memory()
     update_data = {
         "newFacts": [
@@ -187,19 +174,14 @@ def test_apply_updates_ignores_empty_source_error() -> None:
             }
         ]
     }
-
-    with patch(
-        "deerflow.agents.memory.updater.get_memory_config",
-        return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
-    ):
-        result = updater._apply_updates(current_memory, update_data, thread_id="thread-correction")
+    result = updater._apply_updates(current_memory, update_data, thread_id="thread-correction")
 
     assert "sourceError" not in result["facts"][0]
 
 
 def test_clear_memory_data_resets_all_sections() -> None:
     with patch("deerflow.agents.memory.updater._save_memory_to_file", return_value=True):
-        result = clear_memory_data()
+        result = clear_memory_data(_TEST_MEMORY_CONFIG)
 
     assert result["version"] == "1.0"
     assert result["facts"] == []
@@ -233,7 +215,7 @@ def test_delete_memory_fact_removes_only_matching_fact() -> None:
         patch("deerflow.agents.memory.updater.get_memory_data", return_value=current_memory),
         patch("deerflow.agents.memory.updater._save_memory_to_file", return_value=True),
     ):
-        result = delete_memory_fact("fact_delete")
+        result = delete_memory_fact(_TEST_MEMORY_CONFIG, "fact_delete")
 
     assert [fact["id"] for fact in result["facts"]] == ["fact_keep"]
 
@@ -243,7 +225,7 @@ def test_create_memory_fact_appends_manual_fact() -> None:
         patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
         patch("deerflow.agents.memory.updater._save_memory_to_file", return_value=True),
     ):
-        result = create_memory_fact(
+        result = create_memory_fact(_TEST_MEMORY_CONFIG, 
             content="  User prefers concise code reviews.  ",
             category="preference",
             confidence=0.88,
@@ -258,7 +240,7 @@ def test_create_memory_fact_appends_manual_fact() -> None:
 
 def test_create_memory_fact_rejects_empty_content() -> None:
     try:
-        create_memory_fact(content="   ")
+        create_memory_fact(_TEST_MEMORY_CONFIG, content="   ")
     except ValueError as exc:
         assert exc.args == ("content",)
     else:
@@ -268,7 +250,7 @@ def test_create_memory_fact_rejects_empty_content() -> None:
 def test_create_memory_fact_rejects_invalid_confidence() -> None:
     for confidence in (-0.1, 1.1, float("nan"), float("inf"), float("-inf")):
         try:
-            create_memory_fact(content="User likes tests", confidence=confidence)
+            create_memory_fact(_TEST_MEMORY_CONFIG, content="User likes tests", confidence=confidence)
         except ValueError as exc:
             assert exc.args == ("confidence",)
         else:
@@ -278,7 +260,7 @@ def test_create_memory_fact_rejects_invalid_confidence() -> None:
 def test_delete_memory_fact_raises_for_unknown_id() -> None:
     with patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()):
         try:
-            delete_memory_fact("fact_missing")
+            delete_memory_fact(_TEST_MEMORY_CONFIG, "fact_missing")
         except KeyError as exc:
             assert exc.args == ("fact_missing",)
         else:
@@ -303,10 +285,10 @@ def test_import_memory_data_saves_and_returns_imported_memory() -> None:
     mock_storage.load.return_value = imported_memory
 
     with patch("deerflow.agents.memory.updater.get_memory_storage", return_value=mock_storage):
-        result = import_memory_data(imported_memory)
+        result = import_memory_data(_TEST_MEMORY_CONFIG, imported_memory)
 
-    mock_storage.save.assert_called_once_with(imported_memory, None)
-    mock_storage.load.assert_called_once_with(None)
+    mock_storage.save.assert_called_once_with(imported_memory, None, user_id=None)
+    mock_storage.load.assert_called_once_with(None, user_id=None)
     assert result == imported_memory
 
 
@@ -336,7 +318,7 @@ def test_update_memory_fact_updates_only_matching_fact() -> None:
         patch("deerflow.agents.memory.updater.get_memory_data", return_value=current_memory),
         patch("deerflow.agents.memory.updater._save_memory_to_file", return_value=True),
     ):
-        result = update_memory_fact(
+        result = update_memory_fact(_TEST_MEMORY_CONFIG, 
             fact_id="fact_edit",
             content="User prefers spaces",
             category="workflow",
@@ -369,7 +351,7 @@ def test_update_memory_fact_preserves_omitted_fields() -> None:
         patch("deerflow.agents.memory.updater.get_memory_data", return_value=current_memory),
         patch("deerflow.agents.memory.updater._save_memory_to_file", return_value=True),
     ):
-        result = update_memory_fact(
+        result = update_memory_fact(_TEST_MEMORY_CONFIG, 
             fact_id="fact_edit",
             content="User prefers spaces",
         )
@@ -382,7 +364,7 @@ def test_update_memory_fact_preserves_omitted_fields() -> None:
 def test_update_memory_fact_raises_for_unknown_id() -> None:
     with patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()):
         try:
-            update_memory_fact(
+            update_memory_fact(_TEST_MEMORY_CONFIG, 
                 fact_id="fact_missing",
                 content="User prefers concise code reviews.",
                 category="preference",
@@ -414,7 +396,7 @@ def test_update_memory_fact_rejects_invalid_confidence() -> None:
             return_value=current_memory,
         ):
             try:
-                update_memory_fact(
+                update_memory_fact(_TEST_MEMORY_CONFIG, 
                     fact_id="fact_edit",
                     content="User prefers spaces",
                     confidence=confidence,
@@ -527,17 +509,15 @@ class TestUpdateMemoryStructuredResponse:
         model = MagicMock()
         response = MagicMock()
         response.content = content
-        model.ainvoke = AsyncMock(return_value=response)
+        model.invoke.return_value = response
         return model
 
     def test_string_response_parses(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_TEST_APP_CONFIG)
         valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
-        model = self._make_mock_model(valid_json)
 
         with (
-            patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
+            patch.object(updater, "_get_model", return_value=self._make_mock_model(valid_json)),
             patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
             patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
         ):
@@ -551,17 +531,15 @@ class TestUpdateMemoryStructuredResponse:
             result = updater.update_memory([msg, ai_msg])
 
         assert result is True
-        model.ainvoke.assert_awaited_once()
 
     def test_list_content_response_parses(self):
         """LLM response as list-of-blocks should be extracted, not repr'd."""
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_TEST_APP_CONFIG)
         valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
         list_content = [{"type": "text", "text": valid_json}]
 
         with (
             patch.object(updater, "_get_model", return_value=self._make_mock_model(list_content)),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
             patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
             patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
         ):
@@ -576,38 +554,13 @@ class TestUpdateMemoryStructuredResponse:
 
         assert result is True
 
-    def test_async_update_memory_uses_ainvoke(self):
-        updater = MemoryUpdater()
-        valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
-        model = self._make_mock_model(valid_json)
-
-        with (
-            patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
-            patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
-            patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
-        ):
-            msg = MagicMock()
-            msg.type = "human"
-            msg.content = "Hello"
-            ai_msg = MagicMock()
-            ai_msg.type = "ai"
-            ai_msg.content = "Hi there"
-            ai_msg.tool_calls = []
-            result = asyncio.run(updater.aupdate_memory([msg, ai_msg]))
-
-        assert result is True
-        model.ainvoke.assert_awaited_once()
-        assert model.ainvoke.await_args.kwargs["config"] == {"run_name": "memory_agent"}
-
     def test_correction_hint_injected_when_detected(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_TEST_APP_CONFIG)
         valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
         model = self._make_mock_model(valid_json)
 
         with (
             patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
             patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
             patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
         ):
@@ -622,17 +575,16 @@ class TestUpdateMemoryStructuredResponse:
             result = updater.update_memory([msg, ai_msg], correction_detected=True)
 
         assert result is True
-        prompt = model.ainvoke.await_args.args[0]
+        prompt = model.invoke.call_args[0][0]
         assert "Explicit correction signals were detected" in prompt
 
     def test_correction_hint_empty_when_not_detected(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_TEST_APP_CONFIG)
         valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
         model = self._make_mock_model(valid_json)
 
         with (
             patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
             patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
             patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
         ):
@@ -647,95 +599,15 @@ class TestUpdateMemoryStructuredResponse:
             result = updater.update_memory([msg, ai_msg], correction_detected=False)
 
         assert result is True
-        prompt = model.ainvoke.await_args.args[0]
+        prompt = model.invoke.call_args[0][0]
         assert "Explicit correction signals were detected" not in prompt
 
-    def test_sync_update_memory_wrapper_works_in_running_loop(self):
-        updater = MemoryUpdater()
-        valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
-        model = self._make_mock_model(valid_json)
-
-        with (
-            patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
-            patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
-            patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
-        ):
-            msg = MagicMock()
-            msg.type = "human"
-            msg.content = "Hello from loop"
-            ai_msg = MagicMock()
-            ai_msg.type = "ai"
-            ai_msg.content = "Hi"
-            ai_msg.tool_calls = []
-
-            async def run_in_loop():
-                return updater.update_memory([msg, ai_msg])
-
-            result = asyncio.run(run_in_loop())
-
-        assert result is True
-        model.ainvoke.assert_awaited_once()
-
-    def test_sync_update_memory_returns_false_when_bridge_submit_fails(self):
-        updater = MemoryUpdater()
-
-        with (
-            patch(
-                "deerflow.agents.memory.updater._SYNC_MEMORY_UPDATER_EXECUTOR.submit",
-                side_effect=RuntimeError("executor down"),
-            ),
-        ):
-            msg = MagicMock()
-            msg.type = "human"
-            msg.content = "Hello from loop"
-            ai_msg = MagicMock()
-            ai_msg.type = "ai"
-            ai_msg.content = "Hi"
-            ai_msg.tool_calls = []
-
-            async def run_in_loop():
-                return updater.update_memory([msg, ai_msg])
-
-            result = asyncio.run(run_in_loop())
-
-        assert result is False
-
-
-class TestRunAsyncUpdateSync:
-    def test_closes_unawaited_awaitable_when_bridge_fails_before_handoff(self):
-        class CloseableAwaitable:
-            def __init__(self):
-                self.closed = False
-
-            def __await__(self):
-                pytest.fail("awaitable should not have been awaited")
-                yield
-
-            def close(self):
-                self.closed = True
-
-        awaitable = CloseableAwaitable()
-
-        with patch(
-            "deerflow.agents.memory.updater._SYNC_MEMORY_UPDATER_EXECUTOR.submit",
-            side_effect=RuntimeError("executor down"),
-        ):
-
-            async def run_in_loop():
-                return _run_async_update_sync(awaitable)
-
-            result = asyncio.run(run_in_loop())
-
-        assert result is False
-        assert awaitable.closed is True
-
 
 class TestFactDeduplicationCaseInsensitive:
     """Tests that fact deduplication is case-insensitive."""
 
     def test_duplicate_fact_different_case_not_stored(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_memory_config(max_facts=100, fact_confidence_threshold=0.7))
         current_memory = _make_memory(
             facts=[
                 {
@@ -755,19 +627,14 @@ class TestFactDeduplicationCaseInsensitive:
                 {"content": "user prefers python", "category": "preference", "confidence": 0.95},
             ],
         }
-
-        with patch(
-            "deerflow.agents.memory.updater.get_memory_config",
-            return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
-        ):
-            result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
+        result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
 
         # Should still have only 1 fact (duplicate rejected)
         assert len(result["facts"]) == 1
         assert result["facts"][0]["content"] == "User prefers Python"
 
     def test_unique_fact_different_case_and_content_stored(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_memory_config(max_facts=100, fact_confidence_threshold=0.7))
         current_memory = _make_memory(
             facts=[
                 {
@@ -786,12 +653,7 @@ class TestFactDeduplicationCaseInsensitive:
                 {"content": "User prefers Go", "category": "preference", "confidence": 0.85},
             ],
         }
-
-        with patch(
-            "deerflow.agents.memory.updater.get_memory_config",
-            return_value=_memory_config(max_facts=100, fact_confidence_threshold=0.7),
-        ):
-            result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
+        result = updater._apply_updates(current_memory, update_data, thread_id="thread-b")
 
         assert len(result["facts"]) == 2
 
@@ -804,17 +666,16 @@ class TestReinforcementHint:
         model = MagicMock()
         response = MagicMock()
         response.content = f"```json\n{json_response}\n```"
-        model.ainvoke = AsyncMock(return_value=response)
+        model.invoke.return_value = response
         return model
 
     def test_reinforcement_hint_injected_when_detected(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_TEST_APP_CONFIG)
         valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
         model = self._make_mock_model(valid_json)
 
         with (
             patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
             patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
             patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
         ):
@@ -829,17 +690,16 @@ class TestReinforcementHint:
             result = updater.update_memory([msg, ai_msg], reinforcement_detected=True)
 
         assert result is True
-        prompt = model.ainvoke.await_args.args[0]
+        prompt = model.invoke.call_args[0][0]
         assert "Positive reinforcement signals were detected" in prompt
 
     def test_reinforcement_hint_absent_when_not_detected(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_TEST_APP_CONFIG)
         valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
         model = self._make_mock_model(valid_json)
 
         with (
             patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
             patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
             patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
         ):
@@ -854,17 +714,16 @@ class TestReinforcementHint:
             result = updater.update_memory([msg, ai_msg], reinforcement_detected=False)
 
         assert result is True
-        prompt = model.ainvoke.await_args.args[0]
+        prompt = model.invoke.call_args[0][0]
         assert "Positive reinforcement signals were detected" not in prompt
 
     def test_both_hints_present_when_both_detected(self):
-        updater = MemoryUpdater()
+        updater = MemoryUpdater(_TEST_APP_CONFIG)
         valid_json = '{"user": {}, "history": {}, "newFacts": [], "factsToRemove": []}'
         model = self._make_mock_model(valid_json)
 
         with (
             patch.object(updater, "_get_model", return_value=model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True)),
             patch("deerflow.agents.memory.updater.get_memory_data", return_value=_make_memory()),
             patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=MagicMock(return_value=True))),
         ):
@@ -879,56 +738,6 @@ class TestReinforcementHint:
             result = updater.update_memory([msg, ai_msg], correction_detected=True, reinforcement_detected=True)
 
         assert result is True
-        prompt = model.ainvoke.await_args.args[0]
+        prompt = model.invoke.call_args[0][0]
         assert "Explicit correction signals were detected" in prompt
         assert "Positive reinforcement signals were detected" in prompt
-
-
-class TestFinalizeCacheIsolation:
-    """_finalize_update must not mutate the cached memory object."""
-
-    def test_deepcopy_prevents_cache_corruption_on_save_failure(self):
-        """If save() fails, the in-memory snapshot used by _finalize_update
-        must remain independent of any object the storage layer may still hold in
-        its cache.  The deepcopy in _finalize_update achieves this — the object
-        passed to _apply_updates is always a fresh copy, never the cache reference.
-        """
-        updater = MemoryUpdater()
-        original_memory = _make_memory(facts=[{"id": "fact_orig", "content": "original", "category": "context", "confidence": 0.9, "createdAt": "2024-01-01T00:00:00Z", "source": "t1"}])
-
-        import json as _json
-
-        new_fact_json = _json.dumps(
-            {
-                "user": {},
-                "history": {},
-                "newFacts": [{"content": "new fact", "category": "context", "confidence": 0.9}],
-                "factsToRemove": [],
-            }
-        )
-        mock_response = MagicMock()
-        mock_response.content = new_fact_json
-        mock_model = AsyncMock()
-        mock_model.ainvoke = AsyncMock(return_value=mock_response)
-
-        saved_objects: list[dict] = []
-        save_mock = MagicMock(side_effect=lambda m, a=None: saved_objects.append(m) or False)  # always fails
-
-        with (
-            patch.object(updater, "_get_model", return_value=mock_model),
-            patch("deerflow.agents.memory.updater.get_memory_config", return_value=_memory_config(enabled=True, fact_confidence_threshold=0.7)),
-            patch("deerflow.agents.memory.updater.get_memory_data", return_value=original_memory),
-            patch("deerflow.agents.memory.updater.get_memory_storage", return_value=MagicMock(save=save_mock)),
-        ):
-            msg = MagicMock()
-            msg.type = "human"
-            msg.content = "hello"
-            ai_msg = MagicMock()
-            ai_msg.type = "ai"
-            ai_msg.content = "world"
-            ai_msg.tool_calls = []
-            updater.update_memory([msg, ai_msg], thread_id="t1")
-
-        # original_memory must not have been mutated — deepcopy isolates the mutation
-        assert len(original_memory["facts"]) == 1, "original_memory must not be mutated by _apply_updates"
-        assert original_memory["facts"][0]["content"] == "original"
diff --git a/backend/tests/test_memory_updater_user_isolation.py b/backend/tests/test_memory_updater_user_isolation.py
new file mode 100644
index 000000000..f874d1e4b
--- /dev/null
+++ b/backend/tests/test_memory_updater_user_isolation.py
@@ -0,0 +1,41 @@
+
+# --- Phase 2 config-refactor test helper ---
+# Memory APIs now take MemoryConfig / AppConfig explicitly. Tests construct a
+# minimal config once and reuse it across call sites.
+from deerflow.config.app_config import AppConfig as _TestAppConfig
+from deerflow.config.memory_config import MemoryConfig as _TestMemoryConfig
+from deerflow.config.sandbox_config import SandboxConfig as _TestSandboxConfig
+
+_TEST_MEMORY_CONFIG = _TestMemoryConfig(enabled=True)
+_TEST_APP_CONFIG = _TestAppConfig(sandbox=_TestSandboxConfig(use="test"), memory=_TEST_MEMORY_CONFIG)
+# -------------------------------------------
+
+"""Tests for user_id propagation in memory updater."""
+from unittest.mock import MagicMock, patch
+
+from deerflow.agents.memory.updater import get_memory_data, clear_memory_data, _save_memory_to_file
+
+
+def test_get_memory_data_passes_user_id():
+    mock_storage = MagicMock()
+    mock_storage.load.return_value = {"version": "1.0"}
+    with patch("deerflow.agents.memory.updater.get_memory_storage", return_value=mock_storage):
+        get_memory_data(_TEST_MEMORY_CONFIG, user_id="alice")
+        mock_storage.load.assert_called_once_with(None, user_id="alice")
+
+
+def test_save_memory_passes_user_id():
+    mock_storage = MagicMock()
+    mock_storage.save.return_value = True
+    with patch("deerflow.agents.memory.updater.get_memory_storage", return_value=mock_storage):
+        _save_memory_to_file(_TEST_MEMORY_CONFIG, {"version": "1.0"}, user_id="bob")
+        mock_storage.save.assert_called_once_with({"version": "1.0"}, None, user_id="bob")
+
+
+def test_clear_memory_data_passes_user_id():
+    mock_storage = MagicMock()
+    mock_storage.save.return_value = True
+    with patch("deerflow.agents.memory.updater.get_memory_storage", return_value=mock_storage):
+        clear_memory_data(_TEST_MEMORY_CONFIG, user_id="charlie")
+        # Verify save was called with user_id
+        assert mock_storage.save.call_args.kwargs["user_id"] == "charlie"
diff --git a/backend/tests/test_migration_user_isolation.py b/backend/tests/test_migration_user_isolation.py
new file mode 100644
index 000000000..8a07c2130
--- /dev/null
+++ b/backend/tests/test_migration_user_isolation.py
@@ -0,0 +1,116 @@
+"""Tests for per-user data migration."""
+import json
+import pytest
+from pathlib import Path
+
+from deerflow.config.paths import Paths
+
+
+@pytest.fixture
+def base_dir(tmp_path: Path) -> Path:
+    return tmp_path
+
+
+@pytest.fixture
+def paths(base_dir: Path) -> Paths:
+    return Paths(base_dir)
+
+
+class TestMigrateThreadDirs:
+    def test_moves_thread_to_user_dir(self, base_dir: Path, paths: Paths):
+        legacy = base_dir / "threads" / "t1" / "user-data" / "workspace"
+        legacy.mkdir(parents=True)
+        (legacy / "file.txt").write_text("hello")
+
+        from scripts.migrate_user_isolation import migrate_thread_dirs
+        migrate_thread_dirs(paths, thread_owner_map={"t1": "alice"})
+
+        expected = base_dir / "users" / "alice" / "threads" / "t1" / "user-data" / "workspace" / "file.txt"
+        assert expected.exists()
+        assert expected.read_text() == "hello"
+        assert not (base_dir / "threads" / "t1").exists()
+
+    def test_unowned_thread_goes_to_default(self, base_dir: Path, paths: Paths):
+        legacy = base_dir / "threads" / "t2" / "user-data" / "workspace"
+        legacy.mkdir(parents=True)
+
+        from scripts.migrate_user_isolation import migrate_thread_dirs
+        migrate_thread_dirs(paths, thread_owner_map={})
+
+        expected = base_dir / "users" / "default" / "threads" / "t2"
+        assert expected.exists()
+
+    def test_idempotent_skip_already_migrated(self, base_dir: Path, paths: Paths):
+        new_dir = base_dir / "users" / "alice" / "threads" / "t1" / "user-data" / "workspace"
+        new_dir.mkdir(parents=True)
+
+        from scripts.migrate_user_isolation import migrate_thread_dirs
+        migrate_thread_dirs(paths, thread_owner_map={"t1": "alice"})
+        assert new_dir.exists()
+
+    def test_conflict_preserved(self, base_dir: Path, paths: Paths):
+        legacy = base_dir / "threads" / "t1" / "user-data" / "workspace"
+        legacy.mkdir(parents=True)
+        (legacy / "old.txt").write_text("old")
+
+        dest = base_dir / "users" / "alice" / "threads" / "t1" / "user-data" / "workspace"
+        dest.mkdir(parents=True)
+        (dest / "new.txt").write_text("new")
+
+        from scripts.migrate_user_isolation import migrate_thread_dirs
+        migrate_thread_dirs(paths, thread_owner_map={"t1": "alice"})
+
+        assert (dest / "new.txt").read_text() == "new"
+        conflicts = base_dir / "migration-conflicts" / "t1"
+        assert conflicts.exists()
+
+    def test_cleans_up_empty_legacy_dir(self, base_dir: Path, paths: Paths):
+        legacy = base_dir / "threads" / "t1" / "user-data"
+        legacy.mkdir(parents=True)
+
+        from scripts.migrate_user_isolation import migrate_thread_dirs
+        migrate_thread_dirs(paths, thread_owner_map={})
+
+        assert not (base_dir / "threads").exists()
+
+    def test_dry_run_does_not_move(self, base_dir: Path, paths: Paths):
+        legacy = base_dir / "threads" / "t1" / "user-data"
+        legacy.mkdir(parents=True)
+
+        from scripts.migrate_user_isolation import migrate_thread_dirs
+        report = migrate_thread_dirs(paths, thread_owner_map={"t1": "alice"}, dry_run=True)
+
+        assert len(report) == 1
+        assert (base_dir / "threads" / "t1").exists()  # not moved
+        assert not (base_dir / "users" / "alice" / "threads" / "t1").exists()
+
+
+class TestMigrateMemory:
+    def test_moves_global_memory(self, base_dir: Path, paths: Paths):
+        legacy_mem = base_dir / "memory.json"
+        legacy_mem.write_text(json.dumps({"version": "1.0", "facts": []}))
+
+        from scripts.migrate_user_isolation import migrate_memory
+        migrate_memory(paths, user_id="default")
+
+        expected = base_dir / "users" / "default" / "memory.json"
+        assert expected.exists()
+        assert not legacy_mem.exists()
+
+    def test_skips_if_destination_exists(self, base_dir: Path, paths: Paths):
+        legacy_mem = base_dir / "memory.json"
+        legacy_mem.write_text(json.dumps({"version": "old"}))
+
+        dest = base_dir / "users" / "default" / "memory.json"
+        dest.parent.mkdir(parents=True)
+        dest.write_text(json.dumps({"version": "new"}))
+
+        from scripts.migrate_user_isolation import migrate_memory
+        migrate_memory(paths, user_id="default")
+
+        assert json.loads(dest.read_text())["version"] == "new"
+        assert (base_dir / "memory.legacy.json").exists()
+
+    def test_no_legacy_memory_is_noop(self, base_dir: Path, paths: Paths):
+        from scripts.migrate_user_isolation import migrate_memory
+        migrate_memory(paths, user_id="default")  # should not raise
diff --git a/backend/tests/test_model_factory.py b/backend/tests/test_model_factory.py
index b7badb991..920a4ef38 100644
--- a/backend/tests/test_model_factory.py
+++ b/backend/tests/test_model_factory.py
@@ -72,8 +72,7 @@ class FakeChatModel(BaseChatModel):
 
 
 def _patch_factory(monkeypatch, app_config: AppConfig, model_class=FakeChatModel):
-    """Patch get_app_config, resolve_class, and tracing for isolated unit tests."""
-    monkeypatch.setattr(factory_module, "get_app_config", lambda: app_config)
+    """Patch resolve_class and tracing for isolated unit tests."""
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: model_class)
     monkeypatch.setattr(factory_module, "build_tracing_callbacks", lambda: [])
 
@@ -88,7 +87,7 @@ def test_uses_first_model_when_name_is_none(monkeypatch):
     _patch_factory(monkeypatch, cfg)
 
     FakeChatModel.captured_kwargs = {}
-    factory_module.create_chat_model(name=None)
+    factory_module.create_chat_model(name=None, app_config=cfg)
 
     # resolve_class is called — if we reach here without ValueError, the correct model was used
     assert FakeChatModel.captured_kwargs.get("model") == "alpha"
@@ -96,11 +95,10 @@ def test_uses_first_model_when_name_is_none(monkeypatch):
 
 def test_raises_when_model_not_found(monkeypatch):
     cfg = _make_app_config([_make_model("only-model")])
-    monkeypatch.setattr(factory_module, "get_app_config", lambda: cfg)
     monkeypatch.setattr(factory_module, "build_tracing_callbacks", lambda: [])
 
     with pytest.raises(ValueError, match="ghost-model"):
-        factory_module.create_chat_model(name="ghost-model")
+        factory_module.create_chat_model(name="ghost-model", app_config=cfg)
 
 
 def test_appends_all_tracing_callbacks(monkeypatch):
@@ -109,7 +107,7 @@ def test_appends_all_tracing_callbacks(monkeypatch):
     monkeypatch.setattr(factory_module, "build_tracing_callbacks", lambda: ["smith-callback", "langfuse-callback"])
 
     FakeChatModel.captured_kwargs = {}
-    model = factory_module.create_chat_model(name="alpha")
+    model = factory_module.create_chat_model(name="alpha", app_config=cfg)
 
     assert model.callbacks == ["smith-callback", "langfuse-callback"]
 
@@ -127,7 +125,7 @@ def test_thinking_enabled_raises_when_not_supported_but_when_thinking_enabled_is
     _patch_factory(monkeypatch, cfg)
 
     with pytest.raises(ValueError, match="does not support thinking"):
-        factory_module.create_chat_model(name="no-think", thinking_enabled=True)
+        factory_module.create_chat_model(name="no-think", thinking_enabled=True, app_config=cfg)
 
 
 def test_thinking_enabled_raises_for_empty_when_thinking_enabled_explicitly_set(monkeypatch):
@@ -138,7 +136,7 @@ def test_thinking_enabled_raises_for_empty_when_thinking_enabled_explicitly_set(
     _patch_factory(monkeypatch, cfg)
 
     with pytest.raises(ValueError, match="does not support thinking"):
-        factory_module.create_chat_model(name="no-think-empty", thinking_enabled=True)
+        factory_module.create_chat_model(name="no-think-empty", thinking_enabled=True, app_config=cfg)
 
 
 def test_thinking_enabled_merges_when_thinking_enabled_settings(monkeypatch):
@@ -147,7 +145,7 @@ def test_thinking_enabled_merges_when_thinking_enabled_settings(monkeypatch):
     _patch_factory(monkeypatch, cfg)
 
     FakeChatModel.captured_kwargs = {}
-    factory_module.create_chat_model(name="thinker", thinking_enabled=True)
+    factory_module.create_chat_model(name="thinker", thinking_enabled=True, app_config=cfg)
 
     assert FakeChatModel.captured_kwargs.get("temperature") == 1.0
     assert FakeChatModel.captured_kwargs.get("max_tokens") == 16000
@@ -183,7 +181,7 @@ def test_thinking_disabled_openai_gateway_format(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="openai-gw", thinking_enabled=False)
+    factory_module.create_chat_model(name="openai-gw", thinking_enabled=False, app_config=cfg)
 
     assert captured.get("extra_body") == {"thinking": {"type": "disabled"}}
     assert captured.get("reasoning_effort") == "minimal"
@@ -216,7 +214,7 @@ def test_thinking_disabled_langchain_anthropic_format(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="anthropic-native", thinking_enabled=False)
+    factory_module.create_chat_model(name="anthropic-native", thinking_enabled=False, app_config=cfg)
 
     assert captured.get("thinking") == {"type": "disabled"}
     assert "extra_body" not in captured
@@ -238,7 +236,7 @@ def test_thinking_disabled_no_when_thinking_enabled_does_nothing(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="plain", thinking_enabled=False)
+    factory_module.create_chat_model(name="plain", thinking_enabled=False, app_config=cfg)
 
     assert "extra_body" not in captured
     assert "thinking" not in captured
@@ -278,7 +276,7 @@ def test_when_thinking_disabled_takes_precedence_over_hardcoded_disable(monkeypa
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="custom-disable", thinking_enabled=False)
+    factory_module.create_chat_model(name="custom-disable", thinking_enabled=False, app_config=cfg)
 
     assert captured.get("extra_body") == {"thinking": {"type": "disabled"}}
     # User overrode the hardcoded "minimal" with "low"
@@ -310,7 +308,7 @@ def test_when_thinking_disabled_not_used_when_thinking_enabled(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="wtd-ignored", thinking_enabled=True)
+    factory_module.create_chat_model(name="wtd-ignored", thinking_enabled=True, app_config=cfg)
 
     # when_thinking_enabled should apply, NOT when_thinking_disabled
     assert captured.get("extra_body") == {"thinking": {"type": "enabled"}}
@@ -339,7 +337,7 @@ def test_when_thinking_disabled_without_when_thinking_enabled_still_applies(monk
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="wtd-only", thinking_enabled=False)
+    factory_module.create_chat_model(name="wtd-only", thinking_enabled=False, app_config=cfg)
 
     # when_thinking_disabled is now gated independently of has_thinking_settings
     assert captured.get("reasoning_effort") == "low"
@@ -370,7 +368,7 @@ def test_when_thinking_disabled_excluded_from_model_dump(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="no-leak-wtd", thinking_enabled=True)
+    factory_module.create_chat_model(name="no-leak-wtd", thinking_enabled=True, app_config=cfg)
 
     # when_thinking_disabled value must NOT appear as a raw key
     assert "when_thinking_disabled" not in captured
@@ -394,7 +392,7 @@ def test_reasoning_effort_cleared_when_not_supported(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="no-effort", thinking_enabled=False)
+    factory_module.create_chat_model(name="no-effort", thinking_enabled=False, app_config=cfg)
 
     assert captured.get("reasoning_effort") is None
 
@@ -422,7 +420,7 @@ def test_reasoning_effort_preserved_when_supported(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="effort-model", thinking_enabled=False)
+    factory_module.create_chat_model(name="effort-model", thinking_enabled=False, app_config=cfg)
 
     # When supports_reasoning_effort=True, it should NOT be cleared to None
     # The disable path sets it to "minimal"; supports_reasoning_effort=True keeps it
@@ -458,7 +456,7 @@ def test_thinking_shortcut_enables_thinking_when_thinking_enabled(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="shortcut-model", thinking_enabled=True)
+    factory_module.create_chat_model(name="shortcut-model", thinking_enabled=True, app_config=cfg)
 
     assert captured.get("thinking") == thinking_settings
 
@@ -488,7 +486,7 @@ def test_thinking_shortcut_disables_thinking_when_thinking_disabled(monkeypatch)
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="shortcut-disable", thinking_enabled=False)
+    factory_module.create_chat_model(name="shortcut-disable", thinking_enabled=False, app_config=cfg)
 
     assert captured.get("thinking") == {"type": "disabled"}
     assert "extra_body" not in captured
@@ -520,7 +518,7 @@ def test_thinking_shortcut_merges_with_when_thinking_enabled(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="merge-model", thinking_enabled=True)
+    factory_module.create_chat_model(name="merge-model", thinking_enabled=True, app_config=cfg)
 
     # Both the thinking shortcut and when_thinking_enabled settings should be applied
     assert captured.get("thinking") == thinking_settings
@@ -552,7 +550,7 @@ def test_thinking_shortcut_not_leaked_into_model_when_disabled(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="no-leak", thinking_enabled=False)
+    factory_module.create_chat_model(name="no-leak", thinking_enabled=False, app_config=cfg)
 
     # The disable path should have set thinking to disabled (not the raw enabled shortcut)
     assert captured.get("thinking") == {"type": "disabled"}
@@ -590,7 +588,7 @@ def test_openai_compatible_provider_passes_base_url(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="minimax-m2.5")
+    factory_module.create_chat_model(name="minimax-m2.5", app_config=cfg)
 
     assert captured.get("model") == "MiniMax-M2.5"
     assert captured.get("base_url") == "https://api.minimax.io/v1"
@@ -731,11 +729,11 @@ def test_openai_compatible_provider_multiple_models(monkeypatch):
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
     # Create first model
-    factory_module.create_chat_model(name="minimax-m2.5")
+    factory_module.create_chat_model(name="minimax-m2.5", app_config=cfg)
     assert captured.get("model") == "MiniMax-M2.5"
 
     # Create second model
-    factory_module.create_chat_model(name="minimax-m2.5-highspeed")
+    factory_module.create_chat_model(name="minimax-m2.5-highspeed", app_config=cfg)
     assert captured.get("model") == "MiniMax-M2.5-highspeed"
 
 
@@ -763,7 +761,7 @@ def test_codex_provider_disables_reasoning_when_thinking_disabled(monkeypatch):
     monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
 
     FakeChatModel.captured_kwargs = {}
-    factory_module.create_chat_model(name="codex", thinking_enabled=False)
+    factory_module.create_chat_model(name="codex", thinking_enabled=False, app_config=cfg)
 
     assert FakeChatModel.captured_kwargs.get("reasoning_effort") == "none"
 
@@ -783,7 +781,7 @@ def test_codex_provider_preserves_explicit_reasoning_effort(monkeypatch):
     monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
 
     FakeChatModel.captured_kwargs = {}
-    factory_module.create_chat_model(name="codex", thinking_enabled=True, reasoning_effort="high")
+    factory_module.create_chat_model(name="codex", thinking_enabled=True, reasoning_effort="high", app_config=cfg)
 
     assert FakeChatModel.captured_kwargs.get("reasoning_effort") == "high"
 
@@ -803,7 +801,7 @@ def test_codex_provider_defaults_reasoning_effort_to_medium(monkeypatch):
     monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
 
     FakeChatModel.captured_kwargs = {}
-    factory_module.create_chat_model(name="codex", thinking_enabled=True)
+    factory_module.create_chat_model(name="codex", thinking_enabled=True, app_config=cfg)
 
     assert FakeChatModel.captured_kwargs.get("reasoning_effort") == "medium"
 
@@ -824,7 +822,7 @@ def test_codex_provider_strips_unsupported_max_tokens(monkeypatch):
     monkeypatch.setattr(codex_provider_module, "CodexChatModel", FakeCodexChatModel)
 
     FakeChatModel.captured_kwargs = {}
-    factory_module.create_chat_model(name="codex", thinking_enabled=True)
+    factory_module.create_chat_model(name="codex", thinking_enabled=True, app_config=cfg)
 
     assert "max_tokens" not in FakeChatModel.captured_kwargs
 
@@ -837,7 +835,7 @@ def test_thinking_disabled_vllm_chat_template_format(monkeypatch):
         supports_thinking=True,
         when_thinking_enabled=wte,
     )
-    model.extra_body = {"top_k": 20}
+    model = model.model_copy(update={"extra_body": {"top_k": 20}})
     cfg = _make_app_config([model])
     _patch_factory(monkeypatch, cfg)
 
@@ -850,7 +848,7 @@ def test_thinking_disabled_vllm_chat_template_format(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="vllm-qwen", thinking_enabled=False)
+    factory_module.create_chat_model(name="vllm-qwen", thinking_enabled=False, app_config=cfg)
 
     assert captured.get("extra_body") == {"top_k": 20, "chat_template_kwargs": {"thinking": False}}
     assert captured.get("reasoning_effort") is None
@@ -864,7 +862,7 @@ def test_thinking_disabled_vllm_enable_thinking_format(monkeypatch):
         supports_thinking=True,
         when_thinking_enabled=wte,
     )
-    model.extra_body = {"top_k": 20}
+    model = model.model_copy(update={"extra_body": {"top_k": 20}})
     cfg = _make_app_config([model])
     _patch_factory(monkeypatch, cfg)
 
@@ -877,7 +875,7 @@ def test_thinking_disabled_vllm_enable_thinking_format(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="vllm-qwen-enable", thinking_enabled=False)
+    factory_module.create_chat_model(name="vllm-qwen-enable", thinking_enabled=False, app_config=cfg)
 
     assert captured.get("extra_body") == {
         "top_k": 20,
@@ -886,6 +884,85 @@ def test_thinking_disabled_vllm_enable_thinking_format(monkeypatch):
     assert captured.get("reasoning_effort") is None
 
 
+# ---------------------------------------------------------------------------
+# stream_usage injection
+# ---------------------------------------------------------------------------
+
+
+class _FakeWithStreamUsage(FakeChatModel):
+    """Fake model that declares stream_usage in model_fields (like BaseChatOpenAI)."""
+
+    stream_usage: bool | None = None
+
+
+def test_stream_usage_injected_for_openai_compatible_model(monkeypatch):
+    """Factory should set stream_usage=True for models with stream_usage field."""
+    cfg = _make_app_config([_make_model("deepseek", use="langchain_deepseek:ChatDeepSeek")])
+    _patch_factory(monkeypatch, cfg, model_class=_FakeWithStreamUsage)
+
+    captured: dict = {}
+
+    class CapturingModel(_FakeWithStreamUsage):
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            BaseChatModel.__init__(self, **kwargs)
+
+    monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
+
+    factory_module.create_chat_model(name="deepseek", app_config=cfg)
+
+    assert captured.get("stream_usage") is True
+
+
+def test_stream_usage_not_injected_for_non_openai_model(monkeypatch):
+    """Factory should NOT inject stream_usage for models without the field."""
+    cfg = _make_app_config([_make_model("claude", use="langchain_anthropic:ChatAnthropic")])
+    _patch_factory(monkeypatch, cfg)
+
+    captured: dict = {}
+
+    class CapturingModel(FakeChatModel):
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            BaseChatModel.__init__(self, **kwargs)
+
+    monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
+
+    factory_module.create_chat_model(name="claude", app_config=cfg)
+
+    assert "stream_usage" not in captured
+
+
+def test_stream_usage_not_overridden_when_explicitly_set_in_config(monkeypatch):
+    """If config dumps stream_usage=False, factory should respect it."""
+    # Build a ModelConfig with stream_usage=False as an extra field (extra="allow").
+    model_with_stream_usage = ModelConfig(
+        name="deepseek",
+        display_name="deepseek",
+        description=None,
+        use="langchain_deepseek:ChatDeepSeek",
+        model="deepseek",
+        supports_thinking=False,
+        supports_vision=False,
+        stream_usage=False,
+    )
+    cfg = _make_app_config([model_with_stream_usage])
+    _patch_factory(monkeypatch, cfg, model_class=_FakeWithStreamUsage)
+
+    captured: dict = {}
+
+    class CapturingModel(_FakeWithStreamUsage):
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+            BaseChatModel.__init__(self, **kwargs)
+
+    monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
+
+    factory_module.create_chat_model(name="deepseek", app_config=cfg)
+
+    assert captured.get("stream_usage") is False
+
+
 def test_openai_responses_api_settings_are_passed_to_chatopenai(monkeypatch):
     model = ModelConfig(
         name="gpt-5-responses",
@@ -911,7 +988,7 @@ def test_openai_responses_api_settings_are_passed_to_chatopenai(monkeypatch):
 
     monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel)
 
-    factory_module.create_chat_model(name="gpt-5-responses")
+    factory_module.create_chat_model(name="gpt-5-responses", app_config=cfg)
 
     assert captured.get("use_responses_api") is True
     assert captured.get("output_version") == "responses/v1"
@@ -952,7 +1029,7 @@ def test_no_duplicate_kwarg_when_reasoning_effort_in_config_and_thinking_disable
     _patch_factory(monkeypatch, cfg, model_class=CapturingModel)
 
     # Must not raise TypeError
-    factory_module.create_chat_model(name="doubao-model", thinking_enabled=False)
+    factory_module.create_chat_model(name="doubao-model", thinking_enabled=False, app_config=cfg)
 
     # kwargs (runtime) takes precedence: thinking-disabled path sets reasoning_effort=minimal
     assert captured.get("reasoning_effort") == "minimal"
diff --git a/backend/tests/test_owner_isolation.py b/backend/tests/test_owner_isolation.py
new file mode 100644
index 000000000..33d21f3e3
--- /dev/null
+++ b/backend/tests/test_owner_isolation.py
@@ -0,0 +1,465 @@
+"""Cross-user isolation tests — non-negotiable safety gate.
+
+Mirrors TC-API-17..20 from backend/docs/AUTH_TEST_PLAN.md. A failure
+here means users can see each other's data; PR must not merge.
+
+Architecture note
+-----------------
+These tests bypass the HTTP layer and exercise the storage-layer
+owner filter directly by switching the ``user_context`` contextvar
+between two users. The safety property under test is:
+
+  After a repository write with user_id=A, a subsequent read with
+  user_id=B must not return the row, and vice versa.
+
+The HTTP layer is covered by test_auth_middleware.py, which proves
+that a request cookie reaches the ``set_current_user`` call. Together
+the two suites prove the full chain:
+
+  cookie → middleware → contextvar → repository → isolation
+
+Every test in this file opts out of the autouse contextvar fixture
+(``@pytest.mark.no_auto_user``) so it can set the contextvar to the
+specific users it cares about.
+"""
+
+from __future__ import annotations
+
+from types import SimpleNamespace
+
+import pytest
+
+from deerflow.runtime.user_context import (
+    reset_current_user,
+    set_current_user,
+)
+
+USER_A = SimpleNamespace(id="user-a", email="a@test.local")
+USER_B = SimpleNamespace(id="user-b", email="b@test.local")
+
+
+async def _make_engines(tmp_path):
+    """Initialize the shared engine against a per-test SQLite DB.
+
+    Returns a cleanup coroutine the caller should await at the end.
+    """
+    from deerflow.persistence.engine import close_engine, init_engine
+
+    url = f"sqlite+aiosqlite:///{tmp_path / 'isolation.db'}"
+    await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+    return close_engine
+
+
+def _as_user(user):
+    """Context manager-like helper that set/reset the contextvar."""
+
+    class _Ctx:
+        def __enter__(self):
+            self._token = set_current_user(user)
+            return user
+
+        def __exit__(self, *exc):
+            reset_current_user(self._token)
+
+    return _Ctx()
+
+
+# ── TC-API-17 — threads_meta isolation ────────────────────────────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_thread_meta_cross_user_isolation(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+
+        # User A creates a thread.
+        with _as_user(USER_A):
+            await repo.create("t-alpha", display_name="A's private thread")
+
+        # User B creates a thread.
+        with _as_user(USER_B):
+            await repo.create("t-beta", display_name="B's private thread")
+
+        # User A must see only A's thread.
+        with _as_user(USER_A):
+            a_view = await repo.get("t-alpha")
+            assert a_view is not None
+            assert a_view["display_name"] == "A's private thread"
+
+            # CRITICAL: User A must NOT see B's thread.
+            leaked = await repo.get("t-beta")
+            assert leaked is None, f"User A leaked User B's thread: {leaked}"
+
+            # Search should only return A's threads.
+            results = await repo.search()
+            assert [r["thread_id"] for r in results] == ["t-alpha"]
+
+        # User B must see only B's thread.
+        with _as_user(USER_B):
+            b_view = await repo.get("t-beta")
+            assert b_view is not None
+            assert b_view["display_name"] == "B's private thread"
+
+            leaked = await repo.get("t-alpha")
+            assert leaked is None, f"User B leaked User A's thread: {leaked}"
+
+            results = await repo.search()
+            assert [r["thread_id"] for r in results] == ["t-beta"]
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_thread_meta_cross_user_mutation_denied(tmp_path):
+    """User B cannot update or delete a thread owned by User A."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            await repo.create("t-alpha", display_name="original")
+
+        # User B tries to rename A's thread — must be a no-op.
+        with _as_user(USER_B):
+            await repo.update_display_name("t-alpha", "hacked")
+
+        # Verify the row is unchanged from A's perspective.
+        with _as_user(USER_A):
+            row = await repo.get("t-alpha")
+            assert row is not None
+            assert row["display_name"] == "original"
+
+        # User B tries to delete A's thread — must be a no-op.
+        with _as_user(USER_B):
+            await repo.delete("t-alpha")
+
+        # A's thread still exists.
+        with _as_user(USER_A):
+            row = await repo.get("t-alpha")
+            assert row is not None
+    finally:
+        await cleanup()
+
+
+# ── TC-API-18 — runs isolation ────────────────────────────────────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_runs_cross_user_isolation(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.run import RunRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = RunRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            await repo.put("run-a1", thread_id="t-alpha")
+            await repo.put("run-a2", thread_id="t-alpha")
+
+        with _as_user(USER_B):
+            await repo.put("run-b1", thread_id="t-beta")
+
+        # User A must see only A's runs.
+        with _as_user(USER_A):
+            r = await repo.get("run-a1")
+            assert r is not None
+            assert r["run_id"] == "run-a1"
+
+            leaked = await repo.get("run-b1")
+            assert leaked is None, "User A leaked User B's run"
+
+            a_runs = await repo.list_by_thread("t-alpha")
+            assert {r["run_id"] for r in a_runs} == {"run-a1", "run-a2"}
+
+            # Listing B's thread from A's perspective: empty
+            empty = await repo.list_by_thread("t-beta")
+            assert empty == []
+
+        # User B must see only B's runs.
+        with _as_user(USER_B):
+            leaked = await repo.get("run-a1")
+            assert leaked is None, "User B leaked User A's run"
+
+            b_runs = await repo.list_by_thread("t-beta")
+            assert [r["run_id"] for r in b_runs] == ["run-b1"]
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_runs_cross_user_delete_denied(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.run import RunRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = RunRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            await repo.put("run-a1", thread_id="t-alpha")
+
+        # User B tries to delete A's run — no-op.
+        with _as_user(USER_B):
+            await repo.delete("run-a1")
+
+        # A's run still exists.
+        with _as_user(USER_A):
+            row = await repo.get("run-a1")
+            assert row is not None
+    finally:
+        await cleanup()
+
+
+# ── TC-API-19 — run_events isolation (CRITICAL: content leak) ─────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_run_events_cross_user_isolation(tmp_path):
+    """run_events holds raw conversation content — most sensitive leak vector."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.runtime.events.store.db import DbRunEventStore
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        store = DbRunEventStore(get_session_factory())
+
+        with _as_user(USER_A):
+            await store.put(
+                thread_id="t-alpha",
+                run_id="run-a1",
+                event_type="human_message",
+                category="message",
+                content="User A private question",
+            )
+            await store.put(
+                thread_id="t-alpha",
+                run_id="run-a1",
+                event_type="ai_message",
+                category="message",
+                content="User A private answer",
+            )
+
+        with _as_user(USER_B):
+            await store.put(
+                thread_id="t-beta",
+                run_id="run-b1",
+                event_type="human_message",
+                category="message",
+                content="User B private question",
+            )
+
+        # User A must see only A's events — CRITICAL.
+        with _as_user(USER_A):
+            msgs = await store.list_messages("t-alpha")
+            contents = [m["content"] for m in msgs]
+            assert "User A private question" in contents
+            assert "User A private answer" in contents
+            # CRITICAL: User B's content must not appear.
+            assert "User B private question" not in contents
+
+            # Attempt to read B's thread by guessing thread_id.
+            leaked = await store.list_messages("t-beta")
+            assert leaked == [], f"User A leaked User B's messages: {leaked}"
+
+            leaked_events = await store.list_events("t-beta", "run-b1")
+            assert leaked_events == [], "User A leaked User B's events"
+
+            # count_messages must also be zero for B's thread from A's view.
+            count = await store.count_messages("t-beta")
+            assert count == 0
+
+        # User B must see only B's events.
+        with _as_user(USER_B):
+            msgs = await store.list_messages("t-beta")
+            contents = [m["content"] for m in msgs]
+            assert "User B private question" in contents
+            assert "User A private question" not in contents
+            assert "User A private answer" not in contents
+
+            count = await store.count_messages("t-alpha")
+            assert count == 0
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_run_events_cross_user_delete_denied(tmp_path):
+    """User B cannot delete User A's event stream."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.runtime.events.store.db import DbRunEventStore
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        store = DbRunEventStore(get_session_factory())
+
+        with _as_user(USER_A):
+            await store.put(
+                thread_id="t-alpha",
+                run_id="run-a1",
+                event_type="human_message",
+                category="message",
+                content="hello",
+            )
+
+        # User B tries to wipe A's thread events.
+        with _as_user(USER_B):
+            removed = await store.delete_by_thread("t-alpha")
+            assert removed == 0, f"User B deleted {removed} of User A's events"
+
+        # A's events still exist.
+        with _as_user(USER_A):
+            count = await store.count_messages("t-alpha")
+            assert count == 1
+    finally:
+        await cleanup()
+
+
+# ── TC-API-20 — feedback isolation ────────────────────────────────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_feedback_cross_user_isolation(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.feedback import FeedbackRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = FeedbackRepository(get_session_factory())
+
+        # User A submits positive feedback.
+        with _as_user(USER_A):
+            a_feedback = await repo.create(
+                run_id="run-a1",
+                thread_id="t-alpha",
+                rating=1,
+                comment="A liked this",
+            )
+
+        # User B submits negative feedback.
+        with _as_user(USER_B):
+            b_feedback = await repo.create(
+                run_id="run-b1",
+                thread_id="t-beta",
+                rating=-1,
+                comment="B disliked this",
+            )
+
+        # User A must see only A's feedback.
+        with _as_user(USER_A):
+            retrieved = await repo.get(a_feedback["feedback_id"])
+            assert retrieved is not None
+            assert retrieved["comment"] == "A liked this"
+
+            # CRITICAL: cannot read B's feedback by id.
+            leaked = await repo.get(b_feedback["feedback_id"])
+            assert leaked is None, "User A leaked User B's feedback"
+
+            # list_by_run for B's run must be empty.
+            empty = await repo.list_by_run("t-beta", "run-b1")
+            assert empty == []
+
+        # User B must see only B's feedback.
+        with _as_user(USER_B):
+            leaked = await repo.get(a_feedback["feedback_id"])
+            assert leaked is None, "User B leaked User A's feedback"
+
+            b_list = await repo.list_by_run("t-beta", "run-b1")
+            assert len(b_list) == 1
+            assert b_list[0]["comment"] == "B disliked this"
+    finally:
+        await cleanup()
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_feedback_cross_user_delete_denied(tmp_path):
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.feedback import FeedbackRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = FeedbackRepository(get_session_factory())
+
+        with _as_user(USER_A):
+            fb = await repo.create(run_id="run-a1", thread_id="t-alpha", rating=1)
+
+        # User B tries to delete A's feedback — must return False (no-op).
+        with _as_user(USER_B):
+            deleted = await repo.delete(fb["feedback_id"])
+            assert deleted is False, "User B deleted User A's feedback"
+
+        # A's feedback still retrievable.
+        with _as_user(USER_A):
+            row = await repo.get(fb["feedback_id"])
+            assert row is not None
+    finally:
+        await cleanup()
+
+
+# ── Regression: AUTO sentinel without contextvar must raise ───────────────
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_repository_without_context_raises(tmp_path):
+    """Defense-in-depth: calling repo methods without a user context errors."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+        # Contextvar is explicitly unset under @pytest.mark.no_auto_user.
+        with pytest.raises(RuntimeError, match="no user context is set"):
+            await repo.get("anything")
+    finally:
+        await cleanup()
+
+
+# ── Escape hatch: explicit user_id=None bypasses filter (for migration) ──
+
+
+@pytest.mark.anyio
+@pytest.mark.no_auto_user
+async def test_explicit_none_bypasses_filter(tmp_path):
+    """Migration scripts pass user_id=None to see all rows regardless of owner."""
+    from deerflow.persistence.engine import get_session_factory
+    from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+    cleanup = await _make_engines(tmp_path)
+    try:
+        repo = ThreadMetaRepository(get_session_factory())
+
+        # Seed data as two different users.
+        with _as_user(USER_A):
+            await repo.create("t-alpha")
+        with _as_user(USER_B):
+            await repo.create("t-beta")
+
+        # Migration-style read: no contextvar, explicit None bypass.
+        all_rows = await repo.search(user_id=None)
+        thread_ids = {r["thread_id"] for r in all_rows}
+        assert thread_ids == {"t-alpha", "t-beta"}
+
+        # Explicit get with None does not apply the filter either.
+        row_a = await repo.get("t-alpha", user_id=None)
+        assert row_a is not None
+        row_b = await repo.get("t-beta", user_id=None)
+        assert row_b is not None
+    finally:
+        await cleanup()
diff --git a/backend/tests/test_paths_user_isolation.py b/backend/tests/test_paths_user_isolation.py
new file mode 100644
index 000000000..e74276a32
--- /dev/null
+++ b/backend/tests/test_paths_user_isolation.py
@@ -0,0 +1,167 @@
+"""Tests for user-scoped path resolution in Paths."""
+import pytest
+from pathlib import Path
+
+from deerflow.config.paths import Paths
+
+
+@pytest.fixture
+def paths(tmp_path: Path) -> Paths:
+    return Paths(tmp_path)
+
+
+class TestValidateUserId:
+    def test_valid_user_id(self, paths: Paths):
+        d = paths.user_dir("u-abc-123")
+        assert d == paths.base_dir / "users" / "u-abc-123"
+
+    def test_rejects_path_traversal(self, paths: Paths):
+        with pytest.raises(ValueError, match="Invalid user_id"):
+            paths.user_dir("../escape")
+
+    def test_rejects_slash(self, paths: Paths):
+        with pytest.raises(ValueError, match="Invalid user_id"):
+            paths.user_dir("foo/bar")
+
+    def test_rejects_empty(self, paths: Paths):
+        with pytest.raises(ValueError, match="Invalid user_id"):
+            paths.user_dir("")
+
+
+class TestUserDir:
+    def test_user_dir(self, paths: Paths):
+        assert paths.user_dir("alice") == paths.base_dir / "users" / "alice"
+
+
+class TestUserMemoryFile:
+    def test_user_memory_file(self, paths: Paths):
+        assert paths.user_memory_file("bob") == paths.base_dir / "users" / "bob" / "memory.json"
+
+
+class TestUserAgentMemoryFile:
+    def test_user_agent_memory_file(self, paths: Paths):
+        expected = paths.base_dir / "users" / "bob" / "agents" / "myagent" / "memory.json"
+        assert paths.user_agent_memory_file("bob", "myagent") == expected
+
+    def test_user_agent_memory_file_lowercases_name(self, paths: Paths):
+        expected = paths.base_dir / "users" / "bob" / "agents" / "myagent" / "memory.json"
+        assert paths.user_agent_memory_file("bob", "MyAgent") == expected
+
+
+class TestUserThreadDir:
+    def test_user_thread_dir(self, paths: Paths):
+        expected = paths.base_dir / "users" / "u1" / "threads" / "t1"
+        assert paths.thread_dir("t1", user_id="u1") == expected
+
+    def test_thread_dir_no_user_id_falls_back_to_legacy(self, paths: Paths):
+        expected = paths.base_dir / "threads" / "t1"
+        assert paths.thread_dir("t1") == expected
+
+
+class TestUserSandboxDirs:
+    def test_sandbox_work_dir(self, paths: Paths):
+        expected = paths.base_dir / "users" / "u1" / "threads" / "t1" / "user-data" / "workspace"
+        assert paths.sandbox_work_dir("t1", user_id="u1") == expected
+
+    def test_sandbox_uploads_dir(self, paths: Paths):
+        expected = paths.base_dir / "users" / "u1" / "threads" / "t1" / "user-data" / "uploads"
+        assert paths.sandbox_uploads_dir("t1", user_id="u1") == expected
+
+    def test_sandbox_outputs_dir(self, paths: Paths):
+        expected = paths.base_dir / "users" / "u1" / "threads" / "t1" / "user-data" / "outputs"
+        assert paths.sandbox_outputs_dir("t1", user_id="u1") == expected
+
+    def test_sandbox_user_data_dir(self, paths: Paths):
+        expected = paths.base_dir / "users" / "u1" / "threads" / "t1" / "user-data"
+        assert paths.sandbox_user_data_dir("t1", user_id="u1") == expected
+
+    def test_acp_workspace_dir(self, paths: Paths):
+        expected = paths.base_dir / "users" / "u1" / "threads" / "t1" / "acp-workspace"
+        assert paths.acp_workspace_dir("t1", user_id="u1") == expected
+
+    def test_legacy_sandbox_work_dir(self, paths: Paths):
+        expected = paths.base_dir / "threads" / "t1" / "user-data" / "workspace"
+        assert paths.sandbox_work_dir("t1") == expected
+
+
+class TestHostPathsWithUserId:
+    def test_host_thread_dir_with_user_id(self, paths: Paths):
+        result = paths.host_thread_dir("t1", user_id="u1")
+        assert "users" in result
+        assert "u1" in result
+        assert "threads" in result
+        assert "t1" in result
+
+    def test_host_thread_dir_legacy(self, paths: Paths):
+        result = paths.host_thread_dir("t1")
+        assert "threads" in result
+        assert "t1" in result
+        assert "users" not in result
+
+    def test_host_sandbox_user_data_dir_with_user_id(self, paths: Paths):
+        result = paths.host_sandbox_user_data_dir("t1", user_id="u1")
+        assert "users" in result
+        assert "user-data" in result
+
+    def test_host_sandbox_work_dir_with_user_id(self, paths: Paths):
+        result = paths.host_sandbox_work_dir("t1", user_id="u1")
+        assert "workspace" in result
+
+    def test_host_sandbox_uploads_dir_with_user_id(self, paths: Paths):
+        result = paths.host_sandbox_uploads_dir("t1", user_id="u1")
+        assert "uploads" in result
+
+    def test_host_sandbox_outputs_dir_with_user_id(self, paths: Paths):
+        result = paths.host_sandbox_outputs_dir("t1", user_id="u1")
+        assert "outputs" in result
+
+    def test_host_acp_workspace_dir_with_user_id(self, paths: Paths):
+        result = paths.host_acp_workspace_dir("t1", user_id="u1")
+        assert "acp-workspace" in result
+
+
+class TestEnsureAndDeleteWithUserId:
+    def test_ensure_thread_dirs_creates_user_scoped(self, paths: Paths):
+        paths.ensure_thread_dirs("t1", user_id="u1")
+        assert paths.sandbox_work_dir("t1", user_id="u1").is_dir()
+        assert paths.sandbox_uploads_dir("t1", user_id="u1").is_dir()
+        assert paths.sandbox_outputs_dir("t1", user_id="u1").is_dir()
+        assert paths.acp_workspace_dir("t1", user_id="u1").is_dir()
+
+    def test_delete_thread_dir_removes_user_scoped(self, paths: Paths):
+        paths.ensure_thread_dirs("t1", user_id="u1")
+        assert paths.thread_dir("t1", user_id="u1").exists()
+        paths.delete_thread_dir("t1", user_id="u1")
+        assert not paths.thread_dir("t1", user_id="u1").exists()
+
+    def test_delete_thread_dir_idempotent(self, paths: Paths):
+        paths.delete_thread_dir("nonexistent", user_id="u1")  # should not raise
+
+    def test_ensure_thread_dirs_legacy_still_works(self, paths: Paths):
+        paths.ensure_thread_dirs("t1")
+        assert paths.sandbox_work_dir("t1").is_dir()
+
+    def test_user_scoped_and_legacy_are_independent(self, paths: Paths):
+        paths.ensure_thread_dirs("t1", user_id="u1")
+        paths.ensure_thread_dirs("t1")
+        # Both exist independently
+        assert paths.thread_dir("t1", user_id="u1").exists()
+        assert paths.thread_dir("t1").exists()
+        # Delete one doesn't affect the other
+        paths.delete_thread_dir("t1", user_id="u1")
+        assert not paths.thread_dir("t1", user_id="u1").exists()
+        assert paths.thread_dir("t1").exists()
+
+
+class TestResolveVirtualPathWithUserId:
+    def test_resolve_virtual_path_with_user_id(self, paths: Paths):
+        paths.ensure_thread_dirs("t1", user_id="u1")
+        result = paths.resolve_virtual_path("t1", "/mnt/user-data/workspace/file.txt", user_id="u1")
+        expected_base = paths.sandbox_user_data_dir("t1", user_id="u1").resolve()
+        assert str(result).startswith(str(expected_base))
+
+    def test_resolve_virtual_path_legacy(self, paths: Paths):
+        paths.ensure_thread_dirs("t1")
+        result = paths.resolve_virtual_path("t1", "/mnt/user-data/workspace/file.txt")
+        expected_base = paths.sandbox_user_data_dir("t1").resolve()
+        assert str(result).startswith(str(expected_base))
diff --git a/backend/tests/test_persistence_scaffold.py b/backend/tests/test_persistence_scaffold.py
new file mode 100644
index 000000000..178a08e84
--- /dev/null
+++ b/backend/tests/test_persistence_scaffold.py
@@ -0,0 +1,233 @@
+"""Tests for the persistence layer scaffolding.
+
+Tests:
+1. DatabaseConfig property derivation (paths, URLs)
+2. MemoryRunStore CRUD + user_id filtering
+3. Base.to_dict() via inspect mixin
+4. Engine init/close lifecycle (memory + SQLite)
+5. Postgres missing-dep error message
+"""
+
+from datetime import UTC, datetime
+
+import pytest
+
+from deerflow.config.database_config import DatabaseConfig
+from deerflow.runtime.runs.store.memory import MemoryRunStore
+
+# -- DatabaseConfig --
+
+
+class TestDatabaseConfig:
+    def test_defaults(self):
+        c = DatabaseConfig()
+        assert c.backend == "memory"
+        assert c.pool_size == 5
+
+    def test_sqlite_paths_unified(self):
+        c = DatabaseConfig(backend="sqlite", sqlite_dir="./mydata")
+        assert c.sqlite_path.endswith("deerflow.db")
+        assert "mydata" in c.sqlite_path
+        # Backward-compatible aliases point to the same file
+        assert c.checkpointer_sqlite_path == c.sqlite_path
+        assert c.app_sqlite_path == c.sqlite_path
+
+    def test_app_sqlalchemy_url_sqlite(self):
+        c = DatabaseConfig(backend="sqlite", sqlite_dir="./data")
+        url = c.app_sqlalchemy_url
+        assert url.startswith("sqlite+aiosqlite:///")
+        assert "deerflow.db" in url
+
+    def test_app_sqlalchemy_url_postgres(self):
+        c = DatabaseConfig(
+            backend="postgres",
+            postgres_url="postgresql://u:p@h:5432/db",
+        )
+        url = c.app_sqlalchemy_url
+        assert url.startswith("postgresql+asyncpg://")
+        assert "u:p@h:5432/db" in url
+
+    def test_app_sqlalchemy_url_postgres_already_asyncpg(self):
+        c = DatabaseConfig(
+            backend="postgres",
+            postgres_url="postgresql+asyncpg://u:p@h:5432/db",
+        )
+        url = c.app_sqlalchemy_url
+        assert url.count("asyncpg") == 1
+
+    def test_memory_has_no_url(self):
+        c = DatabaseConfig(backend="memory")
+        with pytest.raises(ValueError, match="No SQLAlchemy URL"):
+            _ = c.app_sqlalchemy_url
+
+
+# -- MemoryRunStore --
+
+
+class TestMemoryRunStore:
+    @pytest.fixture
+    def store(self):
+        return MemoryRunStore()
+
+    @pytest.mark.anyio
+    async def test_put_and_get(self, store):
+        await store.put("r1", thread_id="t1", status="pending")
+        row = await store.get("r1")
+        assert row is not None
+        assert row["run_id"] == "r1"
+        assert row["status"] == "pending"
+
+    @pytest.mark.anyio
+    async def test_get_missing_returns_none(self, store):
+        assert await store.get("nope") is None
+
+    @pytest.mark.anyio
+    async def test_update_status(self, store):
+        await store.put("r1", thread_id="t1")
+        await store.update_status("r1", "running")
+        assert (await store.get("r1"))["status"] == "running"
+
+    @pytest.mark.anyio
+    async def test_update_status_with_error(self, store):
+        await store.put("r1", thread_id="t1")
+        await store.update_status("r1", "error", error="boom")
+        row = await store.get("r1")
+        assert row["status"] == "error"
+        assert row["error"] == "boom"
+
+    @pytest.mark.anyio
+    async def test_list_by_thread(self, store):
+        await store.put("r1", thread_id="t1")
+        await store.put("r2", thread_id="t1")
+        await store.put("r3", thread_id="t2")
+        rows = await store.list_by_thread("t1")
+        assert len(rows) == 2
+        assert all(r["thread_id"] == "t1" for r in rows)
+
+    @pytest.mark.anyio
+    async def test_list_by_thread_owner_filter(self, store):
+        await store.put("r1", thread_id="t1", user_id="alice")
+        await store.put("r2", thread_id="t1", user_id="bob")
+        rows = await store.list_by_thread("t1", user_id="alice")
+        assert len(rows) == 1
+        assert rows[0]["user_id"] == "alice"
+
+    @pytest.mark.anyio
+    async def test_owner_none_returns_all(self, store):
+        await store.put("r1", thread_id="t1", user_id="alice")
+        await store.put("r2", thread_id="t1", user_id="bob")
+        rows = await store.list_by_thread("t1", user_id=None)
+        assert len(rows) == 2
+
+    @pytest.mark.anyio
+    async def test_delete(self, store):
+        await store.put("r1", thread_id="t1")
+        await store.delete("r1")
+        assert await store.get("r1") is None
+
+    @pytest.mark.anyio
+    async def test_delete_nonexistent_is_noop(self, store):
+        await store.delete("nope")  # should not raise
+
+    @pytest.mark.anyio
+    async def test_list_pending(self, store):
+        await store.put("r1", thread_id="t1", status="pending")
+        await store.put("r2", thread_id="t1", status="running")
+        await store.put("r3", thread_id="t2", status="pending")
+        pending = await store.list_pending()
+        assert len(pending) == 2
+        assert all(r["status"] == "pending" for r in pending)
+
+    @pytest.mark.anyio
+    async def test_list_pending_respects_before(self, store):
+        past = "2020-01-01T00:00:00+00:00"
+        future = "2099-01-01T00:00:00+00:00"
+        await store.put("r1", thread_id="t1", status="pending", created_at=past)
+        await store.put("r2", thread_id="t1", status="pending", created_at=future)
+        pending = await store.list_pending(before=datetime.now(UTC).isoformat())
+        assert len(pending) == 1
+        assert pending[0]["run_id"] == "r1"
+
+    @pytest.mark.anyio
+    async def test_list_pending_fifo_order(self, store):
+        await store.put("r2", thread_id="t1", status="pending", created_at="2024-01-02T00:00:00+00:00")
+        await store.put("r1", thread_id="t1", status="pending", created_at="2024-01-01T00:00:00+00:00")
+        pending = await store.list_pending()
+        assert pending[0]["run_id"] == "r1"
+
+
+# -- Base.to_dict mixin --
+
+
+class TestBaseToDictMixin:
+    @pytest.mark.anyio
+    async def test_to_dict_and_exclude(self, tmp_path):
+        """Create a temp SQLite DB with a minimal model, verify to_dict."""
+        from sqlalchemy import String
+        from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
+        from sqlalchemy.orm import Mapped, mapped_column
+
+        from deerflow.persistence.base import Base
+
+        class _Tmp(Base):
+            __tablename__ = "_tmp_test"
+            id: Mapped[str] = mapped_column(String(64), primary_key=True)
+            name: Mapped[str] = mapped_column(String(128))
+
+        engine = create_async_engine(f"sqlite+aiosqlite:///{tmp_path / 'test.db'}")
+        async with engine.begin() as conn:
+            await conn.run_sync(Base.metadata.create_all)
+
+        sf = async_sessionmaker(engine, expire_on_commit=False)
+        async with sf() as session:
+            session.add(_Tmp(id="1", name="hello"))
+            await session.commit()
+            obj = await session.get(_Tmp, "1")
+
+            assert obj.to_dict() == {"id": "1", "name": "hello"}
+            assert obj.to_dict(exclude={"name"}) == {"id": "1"}
+            assert "_Tmp" in repr(obj)
+
+        await engine.dispose()
+
+
+# -- Engine lifecycle --
+
+
+class TestEngineLifecycle:
+    @pytest.mark.anyio
+    async def test_memory_is_noop(self):
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+
+        await init_engine("memory")
+        assert get_session_factory() is None
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_sqlite_creates_engine(self, tmp_path):
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        sf = get_session_factory()
+        assert sf is not None
+        async with sf() as session:
+            assert session is not None
+        await close_engine()
+        assert get_session_factory() is None
+
+    @pytest.mark.anyio
+    async def test_postgres_without_asyncpg_gives_actionable_error(self):
+        """If asyncpg is not installed, error message tells user what to do."""
+        from deerflow.persistence.engine import init_engine
+
+        try:
+            import asyncpg  # noqa: F401
+
+            pytest.skip("asyncpg is installed -- cannot test missing-dep path")
+        except ImportError:
+            # asyncpg is not installed — this is the expected state for this test.
+            # We proceed to verify that init_engine raises an actionable ImportError.
+            pass  # noqa: S110 — intentionally ignored
+        with pytest.raises(ImportError, match="uv sync --extra postgres"):
+            await init_engine("postgres", url="postgresql+asyncpg://x:x@localhost/x")
diff --git a/backend/tests/test_present_file_tool_core_logic.py b/backend/tests/test_present_file_tool_core_logic.py
index df5c78229..acaac8a65 100644
--- a/backend/tests/test_present_file_tool_core_logic.py
+++ b/backend/tests/test_present_file_tool_core_logic.py
@@ -3,14 +3,24 @@
 import importlib
 from types import SimpleNamespace
 
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.sandbox_config import SandboxConfig
+
 present_file_tool_module = importlib.import_module("deerflow.tools.builtins.present_file_tool")
 
 
+def _make_context(thread_id: str) -> DeerFlowContext:
+    return DeerFlowContext(
+        app_config=AppConfig(sandbox=SandboxConfig(use="test")),
+        thread_id=thread_id,
+    )
+
+
 def _make_runtime(outputs_path: str) -> SimpleNamespace:
     return SimpleNamespace(
         state={"thread_data": {"outputs_path": outputs_path}},
-        context={"thread_id": "thread-1"},
-        config={},
+        context=_make_context("thread-1"),
     )
 
 
@@ -39,7 +49,7 @@ def test_present_files_keeps_virtual_outputs_path(tmp_path, monkeypatch):
     monkeypatch.setattr(
         present_file_tool_module,
         "get_paths",
-        lambda: SimpleNamespace(resolve_virtual_path=lambda thread_id, path: artifact_path),
+        lambda: SimpleNamespace(resolve_virtual_path=lambda thread_id, path, *, user_id=None: artifact_path),
     )
 
     result = present_file_tool_module.present_file_tool.func(
@@ -51,34 +61,6 @@ def test_present_files_keeps_virtual_outputs_path(tmp_path, monkeypatch):
     assert result.update["artifacts"] == ["/mnt/user-data/outputs/summary.json"]
 
 
-def test_present_files_uses_config_thread_id_when_context_missing(tmp_path, monkeypatch):
-    outputs_dir = tmp_path / "threads" / "thread-from-config" / "user-data" / "outputs"
-    outputs_dir.mkdir(parents=True)
-    artifact_path = outputs_dir / "summary.json"
-    artifact_path.write_text("{}")
-
-    monkeypatch.setattr(
-        present_file_tool_module,
-        "get_paths",
-        lambda: SimpleNamespace(resolve_virtual_path=lambda thread_id, path: artifact_path),
-    )
-
-    runtime = SimpleNamespace(
-        state={"thread_data": {"outputs_path": str(outputs_dir)}},
-        context={},
-        config={"configurable": {"thread_id": "thread-from-config"}},
-    )
-
-    result = present_file_tool_module.present_file_tool.func(
-        runtime=runtime,
-        filepaths=["/mnt/user-data/outputs/summary.json"],
-        tool_call_id="tc-config",
-    )
-
-    assert result.update["artifacts"] == ["/mnt/user-data/outputs/summary.json"]
-    assert result.update["messages"][0].content == "Successfully presented files"
-
-
 def test_present_files_rejects_paths_outside_outputs(tmp_path):
     outputs_dir = tmp_path / "threads" / "thread-1" / "user-data" / "outputs"
     workspace_dir = tmp_path / "threads" / "thread-1" / "user-data" / "workspace"
diff --git a/backend/tests/test_run_event_store.py b/backend/tests/test_run_event_store.py
new file mode 100644
index 000000000..2b22b2c6f
--- /dev/null
+++ b/backend/tests/test_run_event_store.py
@@ -0,0 +1,500 @@
+"""Tests for RunEventStore contract across all backends.
+
+Uses a helper to create the store for each backend type.
+Memory tests run directly; DB and JSONL tests create stores inside each test.
+"""
+
+import pytest
+
+from deerflow.runtime.events.store.memory import MemoryRunEventStore
+
+
+@pytest.fixture
+def store():
+    return MemoryRunEventStore()
+
+
+# -- Basic write and query --
+
+
+class TestPutAndSeq:
+    @pytest.mark.anyio
+    async def test_put_returns_dict_with_seq(self, store):
+        record = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="hello")
+        assert "seq" in record
+        assert record["seq"] == 1
+        assert record["thread_id"] == "t1"
+        assert record["run_id"] == "r1"
+        assert record["event_type"] == "human_message"
+        assert record["category"] == "message"
+        assert record["content"] == "hello"
+        assert "created_at" in record
+
+    @pytest.mark.anyio
+    async def test_seq_strictly_increasing_same_thread(self, store):
+        r1 = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        r2 = await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message")
+        r3 = await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace")
+        assert r1["seq"] == 1
+        assert r2["seq"] == 2
+        assert r3["seq"] == 3
+
+    @pytest.mark.anyio
+    async def test_seq_independent_across_threads(self, store):
+        r1 = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        r2 = await store.put(thread_id="t2", run_id="r2", event_type="human_message", category="message")
+        assert r1["seq"] == 1
+        assert r2["seq"] == 1
+
+    @pytest.mark.anyio
+    async def test_put_respects_provided_created_at(self, store):
+        ts = "2024-06-01T12:00:00+00:00"
+        record = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", created_at=ts)
+        assert record["created_at"] == ts
+
+    @pytest.mark.anyio
+    async def test_put_metadata_preserved(self, store):
+        meta = {"model": "gpt-4", "tokens": 100}
+        record = await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace", metadata=meta)
+        assert record["metadata"] == meta
+
+
+# -- list_messages --
+
+
+class TestListMessages:
+    @pytest.mark.anyio
+    async def test_only_returns_message_category(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace")
+        await store.put(thread_id="t1", run_id="r1", event_type="run_start", category="lifecycle")
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["category"] == "message"
+
+    @pytest.mark.anyio
+    async def test_ascending_seq_order(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="first")
+        await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="second")
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="third")
+        messages = await store.list_messages("t1")
+        seqs = [m["seq"] for m in messages]
+        assert seqs == sorted(seqs)
+
+    @pytest.mark.anyio
+    async def test_before_seq_pagination(self, store):
+        for i in range(10):
+            await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content=str(i))
+        messages = await store.list_messages("t1", before_seq=6, limit=3)
+        assert len(messages) == 3
+        assert [m["seq"] for m in messages] == [3, 4, 5]
+
+    @pytest.mark.anyio
+    async def test_after_seq_pagination(self, store):
+        for i in range(10):
+            await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content=str(i))
+        messages = await store.list_messages("t1", after_seq=7, limit=3)
+        assert len(messages) == 3
+        assert [m["seq"] for m in messages] == [8, 9, 10]
+
+    @pytest.mark.anyio
+    async def test_limit_restricts_count(self, store):
+        for _ in range(20):
+            await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        messages = await store.list_messages("t1", limit=5)
+        assert len(messages) == 5
+
+    @pytest.mark.anyio
+    async def test_cross_run_unified_ordering(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message")
+        await store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message")
+        messages = await store.list_messages("t1")
+        assert [m["seq"] for m in messages] == [1, 2, 3, 4]
+        assert messages[0]["run_id"] == "r1"
+        assert messages[2]["run_id"] == "r2"
+
+    @pytest.mark.anyio
+    async def test_default_returns_latest(self, store):
+        for _ in range(10):
+            await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        messages = await store.list_messages("t1", limit=3)
+        assert [m["seq"] for m in messages] == [8, 9, 10]
+
+
+# -- list_events --
+
+
+class TestListEvents:
+    @pytest.mark.anyio
+    async def test_returns_all_categories_for_run(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace")
+        await store.put(thread_id="t1", run_id="r1", event_type="run_start", category="lifecycle")
+        events = await store.list_events("t1", "r1")
+        assert len(events) == 3
+
+    @pytest.mark.anyio
+    async def test_event_types_filter(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="llm_start", category="trace")
+        await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace")
+        await store.put(thread_id="t1", run_id="r1", event_type="tool_start", category="trace")
+        events = await store.list_events("t1", "r1", event_types=["llm_end"])
+        assert len(events) == 1
+        assert events[0]["event_type"] == "llm_end"
+
+    @pytest.mark.anyio
+    async def test_only_returns_specified_run(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace")
+        await store.put(thread_id="t1", run_id="r2", event_type="llm_end", category="trace")
+        events = await store.list_events("t1", "r1")
+        assert len(events) == 1
+        assert events[0]["run_id"] == "r1"
+
+
+# -- list_messages_by_run --
+
+
+class TestListMessagesByRun:
+    @pytest.mark.anyio
+    async def test_only_messages_for_specified_run(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace")
+        await store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message")
+        messages = await store.list_messages_by_run("t1", "r1")
+        assert len(messages) == 1
+        assert messages[0]["run_id"] == "r1"
+        assert messages[0]["category"] == "message"
+
+
+# -- count_messages --
+
+
+class TestCountMessages:
+    @pytest.mark.anyio
+    async def test_counts_only_message_category(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message")
+        await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace")
+        assert await store.count_messages("t1") == 2
+
+
+# -- put_batch --
+
+
+class TestPutBatch:
+    @pytest.mark.anyio
+    async def test_batch_assigns_seq(self, store):
+        events = [
+            {"thread_id": "t1", "run_id": "r1", "event_type": "human_message", "category": "message", "content": "a"},
+            {"thread_id": "t1", "run_id": "r1", "event_type": "ai_message", "category": "message", "content": "b"},
+            {"thread_id": "t1", "run_id": "r1", "event_type": "llm_end", "category": "trace"},
+        ]
+        results = await store.put_batch(events)
+        assert len(results) == 3
+        assert all("seq" in r for r in results)
+
+    @pytest.mark.anyio
+    async def test_batch_seq_strictly_increasing(self, store):
+        events = [
+            {"thread_id": "t1", "run_id": "r1", "event_type": "human_message", "category": "message"},
+            {"thread_id": "t1", "run_id": "r1", "event_type": "ai_message", "category": "message"},
+        ]
+        results = await store.put_batch(events)
+        assert results[0]["seq"] == 1
+        assert results[1]["seq"] == 2
+
+
+# -- delete --
+
+
+class TestDelete:
+    @pytest.mark.anyio
+    async def test_delete_by_thread(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message")
+        await store.put(thread_id="t1", run_id="r2", event_type="llm_end", category="trace")
+        count = await store.delete_by_thread("t1")
+        assert count == 3
+        assert await store.list_messages("t1") == []
+        assert await store.count_messages("t1") == 0
+
+    @pytest.mark.anyio
+    async def test_delete_by_run(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message")
+        await store.put(thread_id="t1", run_id="r2", event_type="llm_end", category="trace")
+        count = await store.delete_by_run("t1", "r2")
+        assert count == 2
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["run_id"] == "r1"
+
+    @pytest.mark.anyio
+    async def test_delete_nonexistent_thread_returns_zero(self, store):
+        assert await store.delete_by_thread("nope") == 0
+
+    @pytest.mark.anyio
+    async def test_delete_nonexistent_run_returns_zero(self, store):
+        await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        assert await store.delete_by_run("t1", "nope") == 0
+
+    @pytest.mark.anyio
+    async def test_delete_nonexistent_thread_for_run_returns_zero(self, store):
+        assert await store.delete_by_run("nope", "r1") == 0
+
+
+# -- Edge cases --
+
+
+class TestEdgeCases:
+    @pytest.mark.anyio
+    async def test_empty_thread_list_messages(self, store):
+        assert await store.list_messages("empty") == []
+
+    @pytest.mark.anyio
+    async def test_empty_run_list_events(self, store):
+        assert await store.list_events("empty", "r1") == []
+
+    @pytest.mark.anyio
+    async def test_empty_thread_count_messages(self, store):
+        assert await store.count_messages("empty") == 0
+
+
+# -- DB-specific tests --
+
+
+class TestDbRunEventStore:
+    """Tests for DbRunEventStore with temp SQLite."""
+
+    @pytest.mark.anyio
+    async def test_basic_crud(self, tmp_path):
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        s = DbRunEventStore(get_session_factory())
+
+        r = await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="hi")
+        assert r["seq"] == 1
+        r2 = await s.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="hello")
+        assert r2["seq"] == 2
+
+        messages = await s.list_messages("t1")
+        assert len(messages) == 2
+
+        count = await s.count_messages("t1")
+        assert count == 2
+
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_trace_content_truncation(self, tmp_path):
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        s = DbRunEventStore(get_session_factory(), max_trace_content=100)
+
+        long = "x" * 200
+        r = await s.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace", content=long)
+        assert len(r["content"]) == 100
+        assert r["metadata"].get("content_truncated") is True
+
+        # message content NOT truncated
+        m = await s.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content=long)
+        assert len(m["content"]) == 200
+
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_pagination(self, tmp_path):
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        s = DbRunEventStore(get_session_factory())
+
+        for i in range(10):
+            await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content=str(i))
+
+        # before_seq
+        msgs = await s.list_messages("t1", before_seq=6, limit=3)
+        assert [m["seq"] for m in msgs] == [3, 4, 5]
+
+        # after_seq
+        msgs = await s.list_messages("t1", after_seq=7, limit=3)
+        assert [m["seq"] for m in msgs] == [8, 9, 10]
+
+        # default (latest)
+        msgs = await s.list_messages("t1", limit=3)
+        assert [m["seq"] for m in msgs] == [8, 9, 10]
+
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_delete(self, tmp_path):
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        s = DbRunEventStore(get_session_factory())
+
+        await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await s.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message")
+        c = await s.delete_by_run("t1", "r2")
+        assert c == 1
+        assert await s.count_messages("t1") == 1
+
+        c = await s.delete_by_thread("t1")
+        assert c == 1
+        assert await s.count_messages("t1") == 0
+
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_put_batch_seq_continuity(self, tmp_path):
+        """Batch write produces continuous seq values with no gaps."""
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        s = DbRunEventStore(get_session_factory())
+
+        events = [{"thread_id": "t1", "run_id": "r1", "event_type": "trace", "category": "trace"} for _ in range(50)]
+        results = await s.put_batch(events)
+        seqs = [r["seq"] for r in results]
+        assert seqs == list(range(1, 51))
+        await close_engine()
+
+
+# -- Factory tests --
+
+
+class TestMakeRunEventStore:
+    """Tests for the make_run_event_store factory function."""
+
+    @pytest.mark.anyio
+    async def test_memory_backend_default(self):
+        from deerflow.runtime.events.store import make_run_event_store
+
+        store = make_run_event_store(None)
+        assert type(store).__name__ == "MemoryRunEventStore"
+
+    @pytest.mark.anyio
+    async def test_memory_backend_explicit(self):
+        from unittest.mock import MagicMock
+
+        from deerflow.runtime.events.store import make_run_event_store
+
+        config = MagicMock()
+        config.backend = "memory"
+        store = make_run_event_store(config)
+        assert type(store).__name__ == "MemoryRunEventStore"
+
+    @pytest.mark.anyio
+    async def test_db_backend_with_engine(self, tmp_path):
+        from unittest.mock import MagicMock
+
+        from deerflow.persistence.engine import close_engine, init_engine
+        from deerflow.runtime.events.store import make_run_event_store
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+
+        config = MagicMock()
+        config.backend = "db"
+        config.max_trace_content = 10240
+        store = make_run_event_store(config)
+        assert type(store).__name__ == "DbRunEventStore"
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_db_backend_no_engine_falls_back(self):
+        """db backend without engine falls back to memory."""
+        from unittest.mock import MagicMock
+
+        from deerflow.persistence.engine import close_engine, init_engine
+        from deerflow.runtime.events.store import make_run_event_store
+
+        await init_engine("memory")  # no engine created
+
+        config = MagicMock()
+        config.backend = "db"
+        store = make_run_event_store(config)
+        assert type(store).__name__ == "MemoryRunEventStore"
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_jsonl_backend(self):
+        from unittest.mock import MagicMock
+
+        from deerflow.runtime.events.store import make_run_event_store
+
+        config = MagicMock()
+        config.backend = "jsonl"
+        store = make_run_event_store(config)
+        assert type(store).__name__ == "JsonlRunEventStore"
+
+    @pytest.mark.anyio
+    async def test_unknown_backend_raises(self):
+        from unittest.mock import MagicMock
+
+        from deerflow.runtime.events.store import make_run_event_store
+
+        config = MagicMock()
+        config.backend = "redis"
+        with pytest.raises(ValueError, match="Unknown"):
+            make_run_event_store(config)
+
+
+# -- JSONL-specific tests --
+
+
+class TestJsonlRunEventStore:
+    @pytest.mark.anyio
+    async def test_basic_crud(self, tmp_path):
+        from deerflow.runtime.events.store.jsonl import JsonlRunEventStore
+
+        s = JsonlRunEventStore(base_dir=tmp_path / "jsonl")
+        r = await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="hi")
+        assert r["seq"] == 1
+        messages = await s.list_messages("t1")
+        assert len(messages) == 1
+
+    @pytest.mark.anyio
+    async def test_file_at_correct_path(self, tmp_path):
+        from deerflow.runtime.events.store.jsonl import JsonlRunEventStore
+
+        s = JsonlRunEventStore(base_dir=tmp_path / "jsonl")
+        await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        assert (tmp_path / "jsonl" / "threads" / "t1" / "runs" / "r1.jsonl").exists()
+
+    @pytest.mark.anyio
+    async def test_cross_run_messages(self, tmp_path):
+        from deerflow.runtime.events.store.jsonl import JsonlRunEventStore
+
+        s = JsonlRunEventStore(base_dir=tmp_path / "jsonl")
+        await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await s.put(thread_id="t1", run_id="r2", event_type="human_message", category="message")
+        messages = await s.list_messages("t1")
+        assert len(messages) == 2
+        assert [m["seq"] for m in messages] == [1, 2]
+
+    @pytest.mark.anyio
+    async def test_delete_by_run(self, tmp_path):
+        from deerflow.runtime.events.store.jsonl import JsonlRunEventStore
+
+        s = JsonlRunEventStore(base_dir=tmp_path / "jsonl")
+        await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message")
+        await s.put(thread_id="t1", run_id="r2", event_type="human_message", category="message")
+        c = await s.delete_by_run("t1", "r2")
+        assert c == 1
+        assert not (tmp_path / "jsonl" / "threads" / "t1" / "runs" / "r2.jsonl").exists()
+        assert await s.count_messages("t1") == 1
diff --git a/backend/tests/test_run_event_store_pagination.py b/backend/tests/test_run_event_store_pagination.py
new file mode 100644
index 000000000..ac5ba4c2d
--- /dev/null
+++ b/backend/tests/test_run_event_store_pagination.py
@@ -0,0 +1,107 @@
+"""Tests for paginated list_messages_by_run across all RunEventStore backends."""
+import pytest
+
+from deerflow.runtime.events.store.memory import MemoryRunEventStore
+
+
+@pytest.fixture
+def base_store():
+    return MemoryRunEventStore()
+
+
+@pytest.mark.anyio
+async def test_list_messages_by_run_default_returns_all(base_store):
+    store = base_store
+    for i in range(7):
+        await store.put(
+            thread_id="t1", run_id="run-a",
+            event_type="human_message" if i % 2 == 0 else "ai_message",
+            category="message", content=f"msg-a-{i}",
+        )
+    for i in range(3):
+        await store.put(
+            thread_id="t1", run_id="run-b",
+            event_type="human_message", category="message", content=f"msg-b-{i}",
+        )
+    await store.put(thread_id="t1", run_id="run-a", event_type="tool_call", category="trace", content="trace")
+
+    msgs = await store.list_messages_by_run("t1", "run-a")
+    assert len(msgs) == 7
+    assert all(m["category"] == "message" for m in msgs)
+    assert all(m["run_id"] == "run-a" for m in msgs)
+
+
+@pytest.mark.anyio
+async def test_list_messages_by_run_with_limit(base_store):
+    store = base_store
+    for i in range(7):
+        await store.put(
+            thread_id="t1", run_id="run-a",
+            event_type="human_message" if i % 2 == 0 else "ai_message",
+            category="message", content=f"msg-a-{i}",
+        )
+
+    msgs = await store.list_messages_by_run("t1", "run-a", limit=3)
+    assert len(msgs) == 3
+    seqs = [m["seq"] for m in msgs]
+    assert seqs == sorted(seqs)
+
+
+@pytest.mark.anyio
+async def test_list_messages_by_run_after_seq(base_store):
+    store = base_store
+    for i in range(7):
+        await store.put(
+            thread_id="t1", run_id="run-a",
+            event_type="human_message" if i % 2 == 0 else "ai_message",
+            category="message", content=f"msg-a-{i}",
+        )
+
+    all_msgs = await store.list_messages_by_run("t1", "run-a")
+    cursor_seq = all_msgs[2]["seq"]
+    msgs = await store.list_messages_by_run("t1", "run-a", after_seq=cursor_seq, limit=50)
+    assert all(m["seq"] > cursor_seq for m in msgs)
+    assert len(msgs) == 4
+
+
+@pytest.mark.anyio
+async def test_list_messages_by_run_before_seq(base_store):
+    store = base_store
+    for i in range(7):
+        await store.put(
+            thread_id="t1", run_id="run-a",
+            event_type="human_message" if i % 2 == 0 else "ai_message",
+            category="message", content=f"msg-a-{i}",
+        )
+
+    all_msgs = await store.list_messages_by_run("t1", "run-a")
+    cursor_seq = all_msgs[4]["seq"]
+    msgs = await store.list_messages_by_run("t1", "run-a", before_seq=cursor_seq, limit=50)
+    assert all(m["seq"] < cursor_seq for m in msgs)
+    assert len(msgs) == 4
+
+
+@pytest.mark.anyio
+async def test_list_messages_by_run_does_not_include_other_run(base_store):
+    store = base_store
+    for i in range(7):
+        await store.put(
+            thread_id="t1", run_id="run-a",
+            event_type="human_message", category="message", content=f"msg-a-{i}",
+        )
+    for i in range(3):
+        await store.put(
+            thread_id="t1", run_id="run-b",
+            event_type="human_message", category="message", content=f"msg-b-{i}",
+        )
+
+    msgs = await store.list_messages_by_run("t1", "run-b")
+    assert len(msgs) == 3
+    assert all(m["run_id"] == "run-b" for m in msgs)
+
+
+@pytest.mark.anyio
+async def test_list_messages_by_run_empty_run(base_store):
+    store = base_store
+    msgs = await store.list_messages_by_run("t1", "nonexistent")
+    assert msgs == []
diff --git a/backend/tests/test_run_journal.py b/backend/tests/test_run_journal.py
new file mode 100644
index 000000000..b306f59ec
--- /dev/null
+++ b/backend/tests/test_run_journal.py
@@ -0,0 +1,1117 @@
+"""Tests for RunJournal callback handler.
+
+Uses MemoryRunEventStore as the backend for direct event inspection.
+"""
+
+import asyncio
+from unittest.mock import MagicMock
+from uuid import uuid4
+
+import pytest
+
+from deerflow.runtime.events.store.memory import MemoryRunEventStore
+from deerflow.runtime.journal import RunJournal
+
+
+@pytest.fixture
+def journal_setup():
+    store = MemoryRunEventStore()
+    j = RunJournal("r1", "t1", store, flush_threshold=100)
+    return j, store
+
+
+def _make_llm_response(content="Hello", usage=None, tool_calls=None, additional_kwargs=None):
+    """Create a mock LLM response with a message.
+
+    model_dump() returns checkpoint-aligned format matching real AIMessage.
+    """
+    msg = MagicMock()
+    msg.type = "ai"
+    msg.content = content
+    msg.id = f"msg-{id(msg)}"
+    msg.tool_calls = tool_calls or []
+    msg.invalid_tool_calls = []
+    msg.response_metadata = {"model_name": "test-model"}
+    msg.usage_metadata = usage
+    msg.additional_kwargs = additional_kwargs or {}
+    msg.name = None
+    # model_dump returns checkpoint-aligned format
+    msg.model_dump.return_value = {
+        "content": content,
+        "additional_kwargs": additional_kwargs or {},
+        "response_metadata": {"model_name": "test-model"},
+        "type": "ai",
+        "name": None,
+        "id": msg.id,
+        "tool_calls": tool_calls or [],
+        "invalid_tool_calls": [],
+        "usage_metadata": usage,
+    }
+
+    gen = MagicMock()
+    gen.message = msg
+
+    response = MagicMock()
+    response.generations = [[gen]]
+    return response
+
+
+class TestLlmCallbacks:
+    @pytest.mark.anyio
+    async def test_on_llm_end_produces_trace_event(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("Hi"), run_id=run_id, tags=["lead_agent"])
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        trace_events = [e for e in events if e["event_type"] == "llm_response"]
+        assert len(trace_events) == 1
+        assert trace_events[0]["category"] == "trace"
+
+    @pytest.mark.anyio
+    async def test_on_llm_end_lead_agent_produces_ai_message(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("Answer"), run_id=run_id, tags=["lead_agent"])
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["event_type"] == "ai_message"
+        # Content is checkpoint-aligned model_dump format
+        assert messages[0]["content"]["type"] == "ai"
+        assert messages[0]["content"]["content"] == "Answer"
+
+    @pytest.mark.anyio
+    async def test_on_llm_end_with_tool_calls_produces_ai_tool_call(self, journal_setup):
+        """LLM response with pending tool_calls should produce ai_tool_call event."""
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_llm_end(
+            _make_llm_response("Let me search", tool_calls=[{"id": "call_1", "name": "search", "args": {}}]),
+            run_id=run_id,
+            tags=["lead_agent"],
+        )
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["event_type"] == "ai_tool_call"
+
+    @pytest.mark.anyio
+    async def test_on_llm_end_subagent_no_ai_message(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_llm_start({}, [], run_id=run_id, tags=["subagent:research"])
+        j.on_llm_end(_make_llm_response("Sub answer"), run_id=run_id, tags=["subagent:research"])
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 0
+
+    @pytest.mark.anyio
+    async def test_token_accumulation(self, journal_setup):
+        j, store = journal_setup
+        usage1 = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        usage2 = {"input_tokens": 20, "output_tokens": 10, "total_tokens": 30}
+        j.on_llm_end(_make_llm_response("A", usage=usage1), run_id=uuid4(), tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("B", usage=usage2), run_id=uuid4(), tags=["lead_agent"])
+        assert j._total_input_tokens == 30
+        assert j._total_output_tokens == 15
+        assert j._total_tokens == 45
+        assert j._llm_call_count == 2
+
+    @pytest.mark.anyio
+    async def test_total_tokens_computed_from_input_output(self, journal_setup):
+        """If total_tokens is 0, it should be computed from input + output."""
+        j, store = journal_setup
+        j.on_llm_end(
+            _make_llm_response("Hi", usage={"input_tokens": 100, "output_tokens": 50, "total_tokens": 0}),
+            run_id=uuid4(),
+            tags=["lead_agent"],
+        )
+        assert j._total_tokens == 150
+        assert j._lead_agent_tokens == 150
+
+    @pytest.mark.anyio
+    async def test_caller_token_classification(self, journal_setup):
+        j, store = journal_setup
+        usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}
+        j.on_llm_end(_make_llm_response("A", usage=usage), run_id=uuid4(), tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("B", usage=usage), run_id=uuid4(), tags=["subagent:research"])
+        j.on_llm_end(_make_llm_response("C", usage=usage), run_id=uuid4(), tags=["middleware:summarization"])
+        assert j._lead_agent_tokens == 15
+        assert j._subagent_tokens == 15
+        assert j._middleware_tokens == 15
+
+    @pytest.mark.anyio
+    async def test_usage_metadata_none_no_crash(self, journal_setup):
+        j, store = journal_setup
+        j.on_llm_end(_make_llm_response("No usage", usage=None), run_id=uuid4(), tags=["lead_agent"])
+        await j.flush()
+
+    @pytest.mark.anyio
+    async def test_latency_tracking(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("Fast"), run_id=run_id, tags=["lead_agent"])
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        llm_resp = [e for e in events if e["event_type"] == "llm_response"][0]
+        assert "latency_ms" in llm_resp["metadata"]
+        assert llm_resp["metadata"]["latency_ms"] is not None
+
+
+class TestLifecycleCallbacks:
+    @pytest.mark.anyio
+    async def test_chain_start_end_produce_lifecycle_events(self, journal_setup):
+        j, store = journal_setup
+        j.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None)
+        j.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
+        await asyncio.sleep(0.05)
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        types = [e["event_type"] for e in events if e["category"] == "lifecycle"]
+        assert "run_start" in types
+        assert "run_end" in types
+
+    @pytest.mark.anyio
+    async def test_nested_chain_ignored(self, journal_setup):
+        j, store = journal_setup
+        parent_id = uuid4()
+        j.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=parent_id)
+        j.on_chain_end({}, run_id=uuid4(), parent_run_id=parent_id)
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        lifecycle = [e for e in events if e["category"] == "lifecycle"]
+        assert len(lifecycle) == 0
+
+
+class TestToolCallbacks:
+    @pytest.mark.anyio
+    async def test_tool_start_end_produce_trace(self, journal_setup):
+        j, store = journal_setup
+        j.on_tool_start({"name": "web_search"}, "query", run_id=uuid4())
+        j.on_tool_end("results", run_id=uuid4(), name="web_search")
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        trace_types = {e["event_type"] for e in events if e["category"] == "trace"}
+        assert "tool_start" in trace_types
+        assert "tool_end" in trace_types
+
+    @pytest.mark.anyio
+    async def test_on_tool_error(self, journal_setup):
+        j, store = journal_setup
+        j.on_tool_error(TimeoutError("timeout"), run_id=uuid4(), name="web_fetch")
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        assert any(e["event_type"] == "tool_error" for e in events)
+
+
+class TestCustomEvents:
+    @pytest.mark.anyio
+    async def test_summarization_event(self, journal_setup):
+        j, store = journal_setup
+        j.on_custom_event(
+            "summarization",
+            {"summary": "Context was summarized.", "replaced_count": 5, "replaced_message_ids": ["a", "b"]},
+            run_id=uuid4(),
+        )
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        trace = [e for e in events if e["event_type"] == "summarization"]
+        assert len(trace) == 1
+        # Summarization goes to middleware category, not message
+        mw_events = [e for e in events if e["event_type"] == "middleware:summarize"]
+        assert len(mw_events) == 1
+        assert mw_events[0]["category"] == "middleware"
+        assert mw_events[0]["content"] == {"role": "system", "content": "Context was summarized."}
+        # No message events from summarization
+        messages = await store.list_messages("t1")
+        assert len(messages) == 0
+
+    @pytest.mark.anyio
+    async def test_non_summarization_custom_event(self, journal_setup):
+        j, store = journal_setup
+        j.on_custom_event("task_running", {"task_id": "t1", "status": "running"}, run_id=uuid4())
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        assert any(e["event_type"] == "task_running" for e in events)
+
+
+class TestBufferFlush:
+    @pytest.mark.anyio
+    async def test_flush_threshold(self, journal_setup):
+        j, store = journal_setup
+        j._flush_threshold = 3
+        j.on_tool_start({"name": "a"}, "x", run_id=uuid4())
+        j.on_tool_start({"name": "b"}, "x", run_id=uuid4())
+        assert len(j._buffer) == 2
+        j.on_tool_start({"name": "c"}, "x", run_id=uuid4())
+        await asyncio.sleep(0.1)
+        events = await store.list_events("t1", "r1")
+        assert len(events) >= 3
+
+    @pytest.mark.anyio
+    async def test_events_retained_when_no_loop(self, journal_setup):
+        """Events buffered in a sync (no-loop) context should survive
+        until the async flush() in the finally block."""
+        j, store = journal_setup
+        j._flush_threshold = 1
+
+        original = asyncio.get_running_loop
+
+        def no_loop():
+            raise RuntimeError("no running event loop")
+
+        asyncio.get_running_loop = no_loop
+        try:
+            j._put(event_type="llm_response", category="trace", content="test")
+        finally:
+            asyncio.get_running_loop = original
+
+        assert len(j._buffer) == 1
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        assert any(e["event_type"] == "llm_response" for e in events)
+
+
+class TestIdentifyCaller:
+    def test_lead_agent_tag(self, journal_setup):
+        j, _ = journal_setup
+        assert j._identify_caller({"tags": ["lead_agent"]}) == "lead_agent"
+
+    def test_subagent_tag(self, journal_setup):
+        j, _ = journal_setup
+        assert j._identify_caller({"tags": ["subagent:research"]}) == "subagent:research"
+
+    def test_middleware_tag(self, journal_setup):
+        j, _ = journal_setup
+        assert j._identify_caller({"tags": ["middleware:summarization"]}) == "middleware:summarization"
+
+    def test_no_tags_returns_lead_agent(self, journal_setup):
+        j, _ = journal_setup
+        assert j._identify_caller({"tags": []}) == "lead_agent"
+        assert j._identify_caller({}) == "lead_agent"
+
+
+class TestChainErrorCallback:
+    @pytest.mark.anyio
+    async def test_on_chain_error_writes_run_error(self, journal_setup):
+        j, store = journal_setup
+        j.on_chain_error(ValueError("boom"), run_id=uuid4(), parent_run_id=None)
+        await asyncio.sleep(0.05)
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        error_events = [e for e in events if e["event_type"] == "run_error"]
+        assert len(error_events) == 1
+        assert "boom" in error_events[0]["content"]
+        assert error_events[0]["metadata"]["error_type"] == "ValueError"
+
+
+class TestTokenTrackingDisabled:
+    @pytest.mark.anyio
+    async def test_track_token_usage_false(self):
+        store = MemoryRunEventStore()
+        j = RunJournal("r1", "t1", store, track_token_usage=False, flush_threshold=100)
+        j.on_llm_end(
+            _make_llm_response("X", usage={"input_tokens": 50, "output_tokens": 50, "total_tokens": 100}),
+            run_id=uuid4(),
+            tags=["lead_agent"],
+        )
+        data = j.get_completion_data()
+        assert data["total_tokens"] == 0
+        assert data["llm_call_count"] == 0
+
+
+class TestConvenienceFields:
+    @pytest.mark.anyio
+    async def test_last_ai_message_tracks_latest(self, journal_setup):
+        j, store = journal_setup
+        j.on_llm_end(_make_llm_response("First"), run_id=uuid4(), tags=["lead_agent"])
+        j.on_llm_end(_make_llm_response("Second"), run_id=uuid4(), tags=["lead_agent"])
+        data = j.get_completion_data()
+        assert data["last_ai_message"] == "Second"
+        assert data["message_count"] == 2
+
+    @pytest.mark.anyio
+    async def test_first_human_message_via_set(self, journal_setup):
+        j, _ = journal_setup
+        j.set_first_human_message("What is AI?")
+        data = j.get_completion_data()
+        assert data["first_human_message"] == "What is AI?"
+
+    @pytest.mark.anyio
+    async def test_get_completion_data(self, journal_setup):
+        j, _ = journal_setup
+        j._total_tokens = 100
+        j._msg_count = 5
+        data = j.get_completion_data()
+        assert data["total_tokens"] == 100
+        assert data["message_count"] == 5
+
+
+class TestUnknownCallerTokens:
+    @pytest.mark.anyio
+    async def test_unknown_caller_tokens_go_to_lead(self, journal_setup):
+        j, store = journal_setup
+        j.on_llm_end(
+            _make_llm_response("X", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
+            run_id=uuid4(),
+            tags=[],
+        )
+        assert j._lead_agent_tokens == 15
+
+
+# ---------------------------------------------------------------------------
+# SQLite-backed end-to-end test
+# ---------------------------------------------------------------------------
+
+
+class TestDbBackedLifecycle:
+    @pytest.mark.anyio
+    async def test_full_lifecycle_with_sqlite(self, tmp_path):
+        """Full lifecycle with SQLite-backed RunRepository + DbRunEventStore."""
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.persistence.run import RunRepository
+        from deerflow.runtime.events.store.db import DbRunEventStore
+        from deerflow.runtime.runs.manager import RunManager
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        sf = get_session_factory()
+
+        run_store = RunRepository(sf)
+        event_store = DbRunEventStore(sf)
+        mgr = RunManager(store=run_store)
+
+        # Create run
+        record = await mgr.create("t1", "lead_agent")
+        run_id = record.run_id
+
+        # Write human_message (checkpoint-aligned format)
+        from langchain_core.messages import HumanMessage
+
+        human_msg = HumanMessage(content="Hello DB")
+        await event_store.put(thread_id="t1", run_id=run_id, event_type="human_message", category="message", content=human_msg.model_dump())
+
+        # Simulate journal
+        journal = RunJournal(run_id, "t1", event_store, flush_threshold=100)
+        journal.set_first_human_message("Hello DB")
+
+        journal.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None)
+        llm_rid = uuid4()
+        journal.on_llm_start({"name": "test"}, [], run_id=llm_rid, tags=["lead_agent"])
+        journal.on_llm_end(
+            _make_llm_response("DB response", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
+            run_id=llm_rid,
+            tags=["lead_agent"],
+        )
+        journal.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
+        await asyncio.sleep(0.05)
+        await journal.flush()
+
+        # Verify run persisted
+        row = await run_store.get(run_id)
+        assert row is not None
+        assert row["status"] == "pending"
+
+        # Update completion
+        completion = journal.get_completion_data()
+        await run_store.update_run_completion(run_id, status="success", **completion)
+        row = await run_store.get(run_id)
+        assert row["status"] == "success"
+        assert row["total_tokens"] == 15
+
+        # Verify messages from DB (checkpoint-aligned format)
+        messages = await event_store.list_messages("t1")
+        assert len(messages) == 2
+        assert messages[0]["event_type"] == "human_message"
+        assert messages[0]["content"]["type"] == "human"
+        assert messages[1]["event_type"] == "ai_message"
+        assert messages[1]["content"]["type"] == "ai"
+        assert messages[1]["content"]["content"] == "DB response"
+
+        # Verify events from DB
+        events = await event_store.list_events("t1", run_id)
+        event_types = {e["event_type"] for e in events}
+        assert "run_start" in event_types
+        assert "llm_response" in event_types
+        assert "run_end" in event_types
+
+        await close_engine()
+
+
+class TestDictContentFlag:
+    """Verify that content_is_dict metadata flag controls deserialization."""
+
+    @pytest.mark.anyio
+    async def test_db_store_str_starting_with_brace_not_deserialized(self, tmp_path):
+        """Plain string content starting with { should NOT be deserialized."""
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        sf = get_session_factory()
+        store = DbRunEventStore(sf)
+
+        await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="tool_end",
+            category="trace",
+            content="{not json, just a string}",
+        )
+        events = await store.list_events("t1", "r1")
+        assert events[0]["content"] == "{not json, just a string}"
+        assert isinstance(events[0]["content"], str)
+
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_db_store_str_starting_with_bracket_not_deserialized(self, tmp_path):
+        """Plain string content like '[1, 2, 3]' should NOT be deserialized."""
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        sf = get_session_factory()
+        store = DbRunEventStore(sf)
+
+        await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="tool_end",
+            category="trace",
+            content="[1, 2, 3]",
+        )
+        events = await store.list_events("t1", "r1")
+        assert events[0]["content"] == "[1, 2, 3]"
+        assert isinstance(events[0]["content"], str)
+
+        await close_engine()
+
+
+class TestDictContent:
+    """Verify that store backends accept str | dict content."""
+
+    @pytest.mark.anyio
+    async def test_memory_store_dict_content(self):
+        store = MemoryRunEventStore()
+        record = await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="ai_message",
+            category="message",
+            content={"role": "assistant", "content": "Hello"},
+        )
+        assert record["content"] == {"role": "assistant", "content": "Hello"}
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["content"] == {"role": "assistant", "content": "Hello"}
+
+    @pytest.mark.anyio
+    async def test_memory_store_str_content_unchanged(self):
+        store = MemoryRunEventStore()
+        record = await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="ai_message",
+            category="message",
+            content="plain string",
+        )
+        assert record["content"] == "plain string"
+        assert isinstance(record["content"], str)
+
+    @pytest.mark.anyio
+    async def test_db_store_dict_content_roundtrip(self, tmp_path):
+        """Dict content survives DB roundtrip (JSON serialize on write, deserialize on read)."""
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        sf = get_session_factory()
+        store = DbRunEventStore(sf)
+
+        nested = {"role": "assistant", "content": "Hi", "metadata": {"model": "gpt-4", "tokens": [1, 2, 3]}}
+        record = await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="ai_message",
+            category="message",
+            content=nested,
+        )
+        assert record["content"] == nested
+
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["content"] == nested
+
+        await close_engine()
+
+    @pytest.mark.anyio
+    async def test_db_store_trace_dict_truncation(self, tmp_path):
+        """Large dict trace content is truncated with metadata flag."""
+        from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
+        from deerflow.runtime.events.store.db import DbRunEventStore
+
+        url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+        await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+        sf = get_session_factory()
+        store = DbRunEventStore(sf, max_trace_content=100)
+
+        large_dict = {"role": "assistant", "content": "x" * 200}
+        record = await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="llm_end",
+            category="trace",
+            content=large_dict,
+        )
+        assert record["metadata"].get("content_truncated") is True
+        # Content should be a truncated string (serialized JSON was too long)
+        assert isinstance(record["content"], str)
+        assert len(record["content"]) <= 100
+
+        await close_engine()
+
+
+class TestCheckpointAlignedHumanMessage:
+    @pytest.mark.anyio
+    async def test_human_message_checkpoint_format(self):
+        """human_message content uses model_dump() checkpoint format."""
+        from langchain_core.messages import HumanMessage
+
+        store = MemoryRunEventStore()
+        human_msg = HumanMessage(content="What is AI?")
+        await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="human_message",
+            category="message",
+            content=human_msg.model_dump(),
+            metadata={"message_id": "msg_001"},
+        )
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["content"]["type"] == "human"
+        assert messages[0]["content"]["content"] == "What is AI?"
+
+
+class TestCheckpointAlignedMessageFormat:
+    @pytest.mark.anyio
+    async def test_ai_message_checkpoint_format(self, journal_setup):
+        """ai_message content should be checkpoint-aligned model_dump dict."""
+        j, store = journal_setup
+        j.on_llm_end(_make_llm_response("Answer"), run_id=uuid4(), tags=["lead_agent"])
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["content"]["type"] == "ai"
+        assert messages[0]["content"]["content"] == "Answer"
+        assert "response_metadata" in messages[0]["content"]
+        assert "additional_kwargs" in messages[0]["content"]
+
+    @pytest.mark.anyio
+    async def test_ai_tool_call_event(self, journal_setup):
+        """LLM response with tool_calls should produce ai_tool_call with model_dump content."""
+        j, store = journal_setup
+        tool_calls = [{"id": "call_1", "name": "search", "args": {"query": "test"}}]
+        j.on_llm_end(
+            _make_llm_response("Let me search", tool_calls=tool_calls),
+            run_id=uuid4(),
+            tags=["lead_agent"],
+        )
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["event_type"] == "ai_tool_call"
+        assert messages[0]["content"]["type"] == "ai"
+        assert messages[0]["content"]["content"] == "Let me search"
+        assert len(messages[0]["content"]["tool_calls"]) == 1
+        tc = messages[0]["content"]["tool_calls"][0]
+        assert tc["id"] == "call_1"
+        assert tc["name"] == "search"
+
+    @pytest.mark.anyio
+    async def test_ai_tool_call_only_from_lead_agent(self, journal_setup):
+        """ai_tool_call should only be emitted for lead_agent, not subagents."""
+        j, store = journal_setup
+        tool_calls = [{"id": "call_1", "name": "search", "args": {}}]
+        j.on_llm_end(
+            _make_llm_response("searching", tool_calls=tool_calls),
+            run_id=uuid4(),
+            tags=["subagent:research"],
+        )
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 0
+
+
+class TestToolResultMessage:
+    @pytest.mark.anyio
+    async def test_tool_end_produces_tool_result_message(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_tool_start({"name": "web_search"}, '{"query": "test"}', run_id=run_id, tool_call_id="call_abc")
+        j.on_tool_end("search results here", run_id=run_id, name="web_search", tool_call_id="call_abc")
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["event_type"] == "tool_result"
+        # Content is checkpoint-aligned model_dump format
+        assert messages[0]["content"]["type"] == "tool"
+        assert messages[0]["content"]["tool_call_id"] == "call_abc"
+        assert messages[0]["content"]["content"] == "search results here"
+        assert messages[0]["content"]["name"] == "web_search"
+
+    @pytest.mark.anyio
+    async def test_tool_result_missing_tool_call_id(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_tool_start({"name": "bash"}, "ls", run_id=run_id)
+        j.on_tool_end("file1.txt", run_id=run_id, name="bash")
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["content"]["type"] == "tool"
+
+    @pytest.mark.anyio
+    async def test_tool_end_extracts_from_tool_message_object(self, journal_setup):
+        """When LangChain passes a ToolMessage object as output, extract fields from it."""
+        from langchain_core.messages import ToolMessage
+
+        j, store = journal_setup
+        run_id = uuid4()
+        tool_msg = ToolMessage(
+            content="search results",
+            tool_call_id="call_from_obj",
+            name="web_search",
+            status="success",
+        )
+        j.on_tool_end(tool_msg, run_id=run_id)
+        await j.flush()
+
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["content"]["type"] == "tool"
+        assert messages[0]["content"]["tool_call_id"] == "call_from_obj"
+        assert messages[0]["content"]["content"] == "search results"
+        assert messages[0]["content"]["name"] == "web_search"
+        assert messages[0]["metadata"]["tool_name"] == "web_search"
+        assert messages[0]["metadata"]["status"] == "success"
+
+        events = await store.list_events("t1", "r1")
+        tool_end = [e for e in events if e["event_type"] == "tool_end"][0]
+        assert tool_end["metadata"]["tool_call_id"] == "call_from_obj"
+        assert tool_end["metadata"]["tool_name"] == "web_search"
+
+    @pytest.mark.anyio
+    async def test_tool_invoke_end_to_end_unwraps_command(self, journal_setup):
+        """End-to-end: invoke a real LangChain tool that returns Command(update={'messages':[ToolMessage]}).
+
+        This goes through the real LangChain callback path (tool.invoke -> CallbackManager
+        -> on_tool_start/on_tool_end), which is what the production agent uses. Mirrors
+        the ``present_files`` tool shape exactly.
+        """
+        from langchain_core.callbacks import CallbackManager
+        from langchain_core.messages import ToolMessage
+        from langchain_core.tools import tool
+        from langgraph.types import Command
+
+        j, store = journal_setup
+
+        @tool
+        def fake_present_files(filepaths: list[str]) -> Command:
+            """Fake present_files that returns a Command with an inner ToolMessage."""
+            return Command(
+                update={
+                    "artifacts": filepaths,
+                    "messages": [ToolMessage("Successfully presented files", tool_call_id="tc_123")],
+                },
+            )
+
+        # Real LangChain callback dispatch (matches production agent path)
+        cm = CallbackManager(handlers=[j])
+        fake_present_files.invoke(
+            {"filepaths": ["/mnt/user-data/outputs/report.md"]},
+            config={"callbacks": cm, "run_id": uuid4()},
+        )
+        await j.flush()
+
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1, f"expected 1 message event, got {len(messages)}: {messages}"
+        content = messages[0]["content"]
+        assert content["type"] == "tool"
+        # CRITICAL: must be the inner ToolMessage text, not str(Command(...))
+        assert content["content"] == "Successfully presented files", (
+            f"Command unwrap failed; stored content = {content['content']!r}"
+        )
+        assert "Command(update=" not in str(content["content"])
+
+    @pytest.mark.anyio
+    async def test_tool_end_unwraps_command_with_inner_tool_message(self, journal_setup):
+        """Tools like ``present_files`` return Command(update={'messages': [ToolMessage(...)]}).
+
+        LangGraph unwraps the inner ToolMessage into checkpoint state, so the
+        event store must do the same — otherwise it captures ``str(Command(...))``
+        and the /history response diverges from the real rendered message.
+        """
+        from langchain_core.messages import ToolMessage
+        from langgraph.types import Command
+
+        j, store = journal_setup
+        run_id = uuid4()
+        inner = ToolMessage(
+            content="Successfully presented files",
+            tool_call_id="call_present",
+            name="present_files",
+            status="success",
+        )
+        cmd = Command(update={"artifacts": ["/mnt/user-data/outputs/report.md"], "messages": [inner]})
+        j.on_tool_end(cmd, run_id=run_id)
+        await j.flush()
+
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        content = messages[0]["content"]
+        assert content["type"] == "tool"
+        assert content["content"] == "Successfully presented files"
+        assert content["tool_call_id"] == "call_present"
+        assert content["name"] == "present_files"
+        assert "Command(update=" not in str(content["content"])
+
+    @pytest.mark.anyio
+    async def test_tool_message_object_overrides_kwargs(self, journal_setup):
+        """ToolMessage object fields take priority over kwargs."""
+        from langchain_core.messages import ToolMessage
+
+        j, store = journal_setup
+        run_id = uuid4()
+        tool_msg = ToolMessage(
+            content="result",
+            tool_call_id="call_obj",
+            name="tool_a",
+            status="success",
+        )
+        # Pass different values in kwargs — ToolMessage should win
+        j.on_tool_end(tool_msg, run_id=run_id, name="tool_b", tool_call_id="call_kwarg")
+        await j.flush()
+
+        messages = await store.list_messages("t1")
+        assert messages[0]["content"]["tool_call_id"] == "call_obj"
+        assert messages[0]["content"]["name"] == "tool_a"
+        assert messages[0]["metadata"]["tool_name"] == "tool_a"
+
+    @pytest.mark.anyio
+    async def test_tool_message_error_status(self, journal_setup):
+        """ToolMessage with status='error' propagates status to metadata."""
+        from langchain_core.messages import ToolMessage
+
+        j, store = journal_setup
+        run_id = uuid4()
+        tool_msg = ToolMessage(
+            content="something went wrong",
+            tool_call_id="call_err",
+            name="web_fetch",
+            status="error",
+        )
+        j.on_tool_end(tool_msg, run_id=run_id)
+        await j.flush()
+
+        events = await store.list_events("t1", "r1")
+        tool_end = [e for e in events if e["event_type"] == "tool_end"][0]
+        assert tool_end["metadata"]["status"] == "error"
+
+        messages = await store.list_messages("t1")
+        assert messages[0]["content"]["status"] == "error"
+        assert messages[0]["metadata"]["status"] == "error"
+
+    @pytest.mark.anyio
+    async def test_tool_message_fallback_to_cache(self, journal_setup):
+        """If ToolMessage has empty tool_call_id, fall back to cache from on_tool_start."""
+        from langchain_core.messages import ToolMessage
+
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_tool_start({"name": "bash"}, "ls", run_id=run_id, tool_call_id="call_cached")
+        tool_msg = ToolMessage(
+            content="file list",
+            tool_call_id="",
+            name="bash",
+        )
+        j.on_tool_end(tool_msg, run_id=run_id)
+        await j.flush()
+
+        messages = await store.list_messages("t1")
+        assert messages[0]["content"]["tool_call_id"] == "call_cached"
+
+    @pytest.mark.anyio
+    async def test_tool_error_produces_tool_result_message(self, journal_setup):
+        j, store = journal_setup
+        j.on_tool_error(TimeoutError("timeout"), run_id=uuid4(), name="web_fetch", tool_call_id="call_1")
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["event_type"] == "tool_result"
+        assert messages[0]["content"]["type"] == "tool"
+        assert messages[0]["content"]["tool_call_id"] == "call_1"
+        assert "timeout" in messages[0]["content"]["content"]
+        assert messages[0]["content"]["status"] == "error"
+        assert messages[0]["metadata"]["status"] == "error"
+
+    @pytest.mark.anyio
+    async def test_tool_error_uses_cached_tool_call_id(self, journal_setup):
+        """on_tool_error should fall back to cached tool_call_id from on_tool_start."""
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_tool_start({"name": "web_fetch"}, "url", run_id=run_id, tool_call_id="call_cached")
+        j.on_tool_error(TimeoutError("timeout"), run_id=run_id, name="web_fetch")
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 1
+        assert messages[0]["content"]["tool_call_id"] == "call_cached"
+
+
+def _make_base_messages():
+    """Create mock LangChain BaseMessages for on_chat_model_start."""
+    sys_msg = MagicMock()
+    sys_msg.content = "You are helpful."
+    sys_msg.type = "system"
+    sys_msg.tool_calls = []
+    sys_msg.tool_call_id = None
+
+    user_msg = MagicMock()
+    user_msg.content = "Hello"
+    user_msg.type = "human"
+    user_msg.tool_calls = []
+    user_msg.tool_call_id = None
+
+    return [sys_msg, user_msg]
+
+
+class TestLlmRequestResponse:
+    @pytest.mark.anyio
+    async def test_llm_request_event(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        messages = _make_base_messages()
+        j.on_chat_model_start({"name": "gpt-4o"}, [messages], run_id=run_id, tags=["lead_agent"])
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        req_events = [e for e in events if e["event_type"] == "llm_request"]
+        assert len(req_events) == 1
+        content = req_events[0]["content"]
+        assert content["model"] == "gpt-4o"
+        assert len(content["messages"]) == 2
+        assert content["messages"][0]["role"] == "system"
+        assert content["messages"][1]["role"] == "user"
+
+    @pytest.mark.anyio
+    async def test_llm_response_event(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"])
+        j.on_llm_end(
+            _make_llm_response("Answer", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
+            run_id=run_id,
+            tags=["lead_agent"],
+        )
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        assert not any(e["event_type"] == "llm_end" for e in events)
+        resp_events = [e for e in events if e["event_type"] == "llm_response"]
+        assert len(resp_events) == 1
+        content = resp_events[0]["content"]
+        assert "choices" in content
+        assert content["choices"][0]["message"]["role"] == "assistant"
+        assert content["choices"][0]["message"]["content"] == "Answer"
+        assert content["usage"]["prompt_tokens"] == 10
+
+    @pytest.mark.anyio
+    async def test_llm_request_response_paired(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        messages = _make_base_messages()
+        j.on_chat_model_start({"name": "gpt-4o"}, [messages], run_id=run_id, tags=["lead_agent"])
+        j.on_llm_end(
+            _make_llm_response("Hi", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}),
+            run_id=run_id,
+            tags=["lead_agent"],
+        )
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        req = [e for e in events if e["event_type"] == "llm_request"][0]
+        resp = [e for e in events if e["event_type"] == "llm_response"][0]
+        assert req["metadata"]["llm_call_index"] == resp["metadata"]["llm_call_index"]
+
+    @pytest.mark.anyio
+    async def test_no_llm_start_event(self, journal_setup):
+        j, store = journal_setup
+        run_id = uuid4()
+        j.on_llm_start({"name": "test"}, [], run_id=run_id, tags=["lead_agent"])
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        assert not any(e["event_type"] == "llm_start" for e in events)
+
+
+class TestMiddlewareEvents:
+    @pytest.mark.anyio
+    async def test_record_middleware_uses_middleware_category(self, journal_setup):
+        j, store = journal_setup
+        j.record_middleware(
+            "title",
+            name="TitleMiddleware",
+            hook="after_model",
+            action="generate_title",
+            changes={"title": "Test Title", "thread_id": "t1"},
+        )
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        mw_events = [e for e in events if e["event_type"] == "middleware:title"]
+        assert len(mw_events) == 1
+        assert mw_events[0]["category"] == "middleware"
+        assert mw_events[0]["content"]["name"] == "TitleMiddleware"
+        assert mw_events[0]["content"]["hook"] == "after_model"
+        assert mw_events[0]["content"]["action"] == "generate_title"
+        assert mw_events[0]["content"]["changes"]["title"] == "Test Title"
+
+    @pytest.mark.anyio
+    async def test_middleware_events_not_in_messages(self, journal_setup):
+        """Middleware events should not appear in list_messages()."""
+        j, store = journal_setup
+        j.record_middleware(
+            "title",
+            name="TitleMiddleware",
+            hook="after_model",
+            action="generate_title",
+            changes={"title": "Test"},
+        )
+        await j.flush()
+        messages = await store.list_messages("t1")
+        assert len(messages) == 0
+
+    @pytest.mark.anyio
+    async def test_middleware_tag_variants(self, journal_setup):
+        """Different middleware tags produce distinct event_types."""
+        j, store = journal_setup
+        j.record_middleware("title", name="TitleMiddleware", hook="after_model", action="generate_title", changes={})
+        j.record_middleware("guardrail", name="GuardrailMiddleware", hook="before_tool", action="deny", changes={})
+        await j.flush()
+        events = await store.list_events("t1", "r1")
+        event_types = {e["event_type"] for e in events}
+        assert "middleware:title" in event_types
+        assert "middleware:guardrail" in event_types
+
+
+class TestFullRunSequence:
+    @pytest.mark.anyio
+    async def test_complete_run_event_sequence(self):
+        """Simulate a full run: user -> LLM -> tool_call -> tool_result -> LLM -> final reply.
+
+        All message events use checkpoint-aligned model_dump format.
+        """
+        from langchain_core.messages import HumanMessage
+
+        store = MemoryRunEventStore()
+        j = RunJournal("r1", "t1", store, flush_threshold=100)
+
+        # 1. Human message (written by worker, using model_dump format)
+        human_msg = HumanMessage(content="Search for quantum computing")
+        await store.put(
+            thread_id="t1",
+            run_id="r1",
+            event_type="human_message",
+            category="message",
+            content=human_msg.model_dump(),
+        )
+        j.set_first_human_message("Search for quantum computing")
+
+        # 2. Run start
+        j.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None)
+
+        # 3. First LLM call -> tool_calls
+        llm1_id = uuid4()
+        sys_msg = MagicMock(content="You are helpful.", type="system", tool_calls=[], tool_call_id=None)
+        user_msg = MagicMock(content="Search for quantum computing", type="human", tool_calls=[], tool_call_id=None)
+        j.on_chat_model_start({"name": "gpt-4o"}, [[sys_msg, user_msg]], run_id=llm1_id, tags=["lead_agent"])
+        j.on_llm_end(
+            _make_llm_response(
+                "Let me search",
+                tool_calls=[{"id": "call_1", "name": "web_search", "args": {"query": "quantum computing"}}],
+                usage={"input_tokens": 100, "output_tokens": 20, "total_tokens": 120},
+            ),
+            run_id=llm1_id,
+            tags=["lead_agent"],
+        )
+
+        # 4. Tool execution
+        tool_id = uuid4()
+        j.on_tool_start({"name": "web_search"}, '{"query": "quantum computing"}', run_id=tool_id, tool_call_id="call_1")
+        j.on_tool_end("Quantum computing results...", run_id=tool_id, name="web_search", tool_call_id="call_1")
+
+        # 5. Middleware: title generation
+        j.record_middleware("title", name="TitleMiddleware", hook="after_model", action="generate_title", changes={"title": "Quantum Computing"})
+
+        # 6. Second LLM call -> final reply
+        llm2_id = uuid4()
+        j.on_chat_model_start({"name": "gpt-4o"}, [[sys_msg, user_msg]], run_id=llm2_id, tags=["lead_agent"])
+        j.on_llm_end(
+            _make_llm_response(
+                "Here are the results about quantum computing...",
+                usage={"input_tokens": 200, "output_tokens": 100, "total_tokens": 300},
+            ),
+            run_id=llm2_id,
+            tags=["lead_agent"],
+        )
+
+        # 7. Run end
+        j.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
+        await asyncio.sleep(0.05)
+        await j.flush()
+
+        # Verify message sequence
+        messages = await store.list_messages("t1")
+        msg_types = [m["event_type"] for m in messages]
+        assert msg_types == ["human_message", "ai_tool_call", "tool_result", "ai_message"]
+
+        # Verify checkpoint-aligned format: all messages use "type" not "role"
+        assert messages[0]["content"]["type"] == "human"
+        assert messages[0]["content"]["content"] == "Search for quantum computing"
+        assert messages[1]["content"]["type"] == "ai"
+        assert "tool_calls" in messages[1]["content"]
+        assert messages[2]["content"]["type"] == "tool"
+        assert messages[2]["content"]["tool_call_id"] == "call_1"
+        assert messages[3]["content"]["type"] == "ai"
+        assert messages[3]["content"]["content"] == "Here are the results about quantum computing..."
+
+        # Verify trace events
+        events = await store.list_events("t1", "r1")
+        trace_types = [e["event_type"] for e in events if e["category"] == "trace"]
+        assert "llm_request" in trace_types
+        assert "llm_response" in trace_types
+        assert "tool_start" in trace_types
+        assert "tool_end" in trace_types
+        assert "llm_start" not in trace_types
+        assert "llm_end" not in trace_types
+
+        # Verify middleware events are in their own category
+        mw_events = [e for e in events if e["category"] == "middleware"]
+        assert len(mw_events) == 1
+        assert mw_events[0]["event_type"] == "middleware:title"
+
+        # Verify token accumulation
+        data = j.get_completion_data()
+        assert data["total_tokens"] == 420  # 120 + 300
+        assert data["llm_call_count"] == 2
+        assert data["lead_agent_tokens"] == 420
+        assert data["message_count"] == 1  # only final ai_message counts
+        assert data["last_ai_message"] == "Here are the results about quantum computing..."
+
+        # Verify all message contents are checkpoint-aligned dicts with "type" field
+        for m in messages:
+            assert isinstance(m["content"], dict)
+            assert "type" in m["content"]
diff --git a/backend/tests/test_run_repository.py b/backend/tests/test_run_repository.py
new file mode 100644
index 000000000..34ab9b492
--- /dev/null
+++ b/backend/tests/test_run_repository.py
@@ -0,0 +1,196 @@
+"""Tests for RunRepository (SQLAlchemy-backed RunStore).
+
+Uses a temp SQLite DB to test ORM-backed CRUD operations.
+"""
+
+import pytest
+
+from deerflow.persistence.run import RunRepository
+
+
+async def _make_repo(tmp_path):
+    from deerflow.persistence.engine import get_session_factory, init_engine
+
+    url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+    await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+    return RunRepository(get_session_factory())
+
+
+async def _cleanup():
+    from deerflow.persistence.engine import close_engine
+
+    await close_engine()
+
+
+class TestRunRepository:
+    @pytest.mark.anyio
+    async def test_put_and_get(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", status="pending")
+        row = await repo.get("r1")
+        assert row is not None
+        assert row["run_id"] == "r1"
+        assert row["thread_id"] == "t1"
+        assert row["status"] == "pending"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_get_missing_returns_none(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        assert await repo.get("nope") is None
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_status(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1")
+        await repo.update_status("r1", "running")
+        row = await repo.get("r1")
+        assert row["status"] == "running"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_status_with_error(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1")
+        await repo.update_status("r1", "error", error="boom")
+        row = await repo.get("r1")
+        assert row["status"] == "error"
+        assert row["error"] == "boom"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_thread(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1")
+        await repo.put("r2", thread_id="t1")
+        await repo.put("r3", thread_id="t2")
+        rows = await repo.list_by_thread("t1")
+        assert len(rows) == 2
+        assert all(r["thread_id"] == "t1" for r in rows)
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_thread_owner_filter(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", user_id="alice")
+        await repo.put("r2", thread_id="t1", user_id="bob")
+        rows = await repo.list_by_thread("t1", user_id="alice")
+        assert len(rows) == 1
+        assert rows[0]["user_id"] == "alice"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1")
+        await repo.delete("r1")
+        assert await repo.get("r1") is None
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete_nonexistent_is_noop(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.delete("nope")  # should not raise
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_pending(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", status="pending")
+        await repo.put("r2", thread_id="t1", status="running")
+        await repo.put("r3", thread_id="t2", status="pending")
+        pending = await repo.list_pending()
+        assert len(pending) == 2
+        assert all(r["status"] == "pending" for r in pending)
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_run_completion(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", status="running")
+        await repo.update_run_completion(
+            "r1",
+            status="success",
+            total_input_tokens=100,
+            total_output_tokens=50,
+            total_tokens=150,
+            llm_call_count=2,
+            lead_agent_tokens=120,
+            subagent_tokens=20,
+            middleware_tokens=10,
+            message_count=3,
+            last_ai_message="The answer is 42",
+            first_human_message="What is the meaning?",
+        )
+        row = await repo.get("r1")
+        assert row["status"] == "success"
+        assert row["total_tokens"] == 150
+        assert row["llm_call_count"] == 2
+        assert row["lead_agent_tokens"] == 120
+        assert row["message_count"] == 3
+        assert row["last_ai_message"] == "The answer is 42"
+        assert row["first_human_message"] == "What is the meaning?"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_metadata_preserved(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", metadata={"key": "value"})
+        row = await repo.get("r1")
+        assert row["metadata"] == {"key": "value"}
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_kwargs_with_non_serializable(self, tmp_path):
+        """kwargs containing non-JSON-serializable objects should be safely handled."""
+        repo = await _make_repo(tmp_path)
+
+        class Dummy:
+            pass
+
+        await repo.put("r1", thread_id="t1", kwargs={"obj": Dummy()})
+        row = await repo.get("r1")
+        assert "obj" in row["kwargs"]
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_run_completion_preserves_existing_fields(self, tmp_path):
+        """update_run_completion does not overwrite thread_id or assistant_id."""
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", assistant_id="agent1", status="running")
+        await repo.update_run_completion("r1", status="success", total_tokens=100)
+        row = await repo.get("r1")
+        assert row["thread_id"] == "t1"
+        assert row["assistant_id"] == "agent1"
+        assert row["total_tokens"] == 100
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_thread_ordered_desc(self, tmp_path):
+        """list_by_thread returns newest first."""
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", created_at="2024-01-01T00:00:00+00:00")
+        await repo.put("r2", thread_id="t1", created_at="2024-01-02T00:00:00+00:00")
+        rows = await repo.list_by_thread("t1")
+        assert rows[0]["run_id"] == "r2"
+        assert rows[1]["run_id"] == "r1"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_list_by_thread_limit(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        for i in range(5):
+            await repo.put(f"r{i}", thread_id="t1")
+        rows = await repo.list_by_thread("t1", limit=2)
+        assert len(rows) == 2
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_owner_none_returns_all(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.put("r1", thread_id="t1", user_id="alice")
+        await repo.put("r2", thread_id="t1", user_id="bob")
+        rows = await repo.list_by_thread("t1", user_id=None)
+        assert len(rows) == 2
+        await _cleanup()
diff --git a/backend/tests/test_runs_api_endpoints.py b/backend/tests/test_runs_api_endpoints.py
new file mode 100644
index 000000000..e6b73d865
--- /dev/null
+++ b/backend/tests/test_runs_api_endpoints.py
@@ -0,0 +1,243 @@
+"""Tests for GET /api/runs/{run_id}/messages and GET /api/runs/{run_id}/feedback endpoints."""
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+from _router_auth_helpers import make_authed_test_app
+from fastapi.testclient import TestClient
+
+from app.gateway.routers import runs
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_app(run_store=None, event_store=None, feedback_repo=None):
+    """Build a test FastAPI app with stub auth and mocked state."""
+    app = make_authed_test_app()
+    app.include_router(runs.router)
+
+    if run_store is not None:
+        app.state.run_store = run_store
+    if event_store is not None:
+        app.state.run_event_store = event_store
+    if feedback_repo is not None:
+        app.state.feedback_repo = feedback_repo
+
+    return app
+
+
+def _make_run_store(run_record: dict | None):
+    """Return an AsyncMock run store whose get() returns run_record."""
+    store = MagicMock()
+    store.get = AsyncMock(return_value=run_record)
+    return store
+
+
+def _make_event_store(rows: list[dict]):
+    """Return an AsyncMock event store whose list_messages_by_run() returns rows."""
+    store = MagicMock()
+    store.list_messages_by_run = AsyncMock(return_value=rows)
+    return store
+
+
+def _make_message(seq: int) -> dict:
+    return {"seq": seq, "event_type": "on_chat_model_stream", "category": "message", "content": f"msg-{seq}"}
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_run_messages_returns_envelope():
+    """GET /api/runs/{run_id}/messages returns {data: [...], has_more: bool}."""
+    rows = [_make_message(i) for i in range(1, 4)]
+    run_record = {"run_id": "run-1", "thread_id": "thread-1"}
+    app = _make_app(
+        run_store=_make_run_store(run_record),
+        event_store=_make_event_store(rows),
+    )
+    with TestClient(app) as client:
+        response = client.get("/api/runs/run-1/messages")
+    assert response.status_code == 200
+    body = response.json()
+    assert "data" in body
+    assert "has_more" in body
+    assert body["has_more"] is False
+    assert len(body["data"]) == 3
+
+
+def test_run_messages_404_when_run_not_found():
+    """Returns 404 when the run store returns None."""
+    app = _make_app(
+        run_store=_make_run_store(None),
+        event_store=_make_event_store([]),
+    )
+    with TestClient(app) as client:
+        response = client.get("/api/runs/missing-run/messages")
+    assert response.status_code == 404
+    assert "missing-run" in response.json()["detail"]
+
+
+def test_run_messages_has_more_true_when_extra_row_returned():
+    """has_more=True when event store returns limit+1 rows."""
+    # Default limit is 50; provide 51 rows
+    rows = [_make_message(i) for i in range(1, 52)]  # 51 rows
+    run_record = {"run_id": "run-2", "thread_id": "thread-2"}
+    app = _make_app(
+        run_store=_make_run_store(run_record),
+        event_store=_make_event_store(rows),
+    )
+    with TestClient(app) as client:
+        response = client.get("/api/runs/run-2/messages")
+    assert response.status_code == 200
+    body = response.json()
+    assert body["has_more"] is True
+    assert len(body["data"]) == 50  # trimmed to limit
+
+
+def test_run_messages_passes_after_seq_to_event_store():
+    """after_seq query param is forwarded to event_store.list_messages_by_run."""
+    rows = [_make_message(10)]
+    run_record = {"run_id": "run-3", "thread_id": "thread-3"}
+    event_store = _make_event_store(rows)
+    app = _make_app(
+        run_store=_make_run_store(run_record),
+        event_store=event_store,
+    )
+    with TestClient(app) as client:
+        response = client.get("/api/runs/run-3/messages?after_seq=5")
+    assert response.status_code == 200
+    event_store.list_messages_by_run.assert_awaited_once_with(
+        "thread-3", "run-3",
+        limit=51,  # default limit(50) + 1
+        before_seq=None,
+        after_seq=5,
+    )
+
+
+def test_run_messages_respects_custom_limit():
+    """Custom limit is respected and capped at 200."""
+    rows = [_make_message(i) for i in range(1, 6)]
+    run_record = {"run_id": "run-4", "thread_id": "thread-4"}
+    event_store = _make_event_store(rows)
+    app = _make_app(
+        run_store=_make_run_store(run_record),
+        event_store=event_store,
+    )
+    with TestClient(app) as client:
+        response = client.get("/api/runs/run-4/messages?limit=10")
+    assert response.status_code == 200
+    event_store.list_messages_by_run.assert_awaited_once_with(
+        "thread-4", "run-4",
+        limit=11,  # 10 + 1
+        before_seq=None,
+        after_seq=None,
+    )
+
+
+def test_run_messages_passes_before_seq_to_event_store():
+    """before_seq query param is forwarded to event_store.list_messages_by_run."""
+    rows = [_make_message(3)]
+    run_record = {"run_id": "run-5", "thread_id": "thread-5"}
+    event_store = _make_event_store(rows)
+    app = _make_app(
+        run_store=_make_run_store(run_record),
+        event_store=event_store,
+    )
+    with TestClient(app) as client:
+        response = client.get("/api/runs/run-5/messages?before_seq=10")
+    assert response.status_code == 200
+    event_store.list_messages_by_run.assert_awaited_once_with(
+        "thread-5", "run-5",
+        limit=51,
+        before_seq=10,
+        after_seq=None,
+    )
+
+
+def test_run_messages_empty_data():
+    """Returns empty data list when no messages exist."""
+    run_record = {"run_id": "run-6", "thread_id": "thread-6"}
+    app = _make_app(
+        run_store=_make_run_store(run_record),
+        event_store=_make_event_store([]),
+    )
+    with TestClient(app) as client:
+        response = client.get("/api/runs/run-6/messages")
+    assert response.status_code == 200
+    body = response.json()
+    assert body["data"] == []
+    assert body["has_more"] is False
+
+
+def _make_feedback_repo(rows: list[dict]):
+    """Return an AsyncMock feedback repo whose list_by_run() returns rows."""
+    repo = MagicMock()
+    repo.list_by_run = AsyncMock(return_value=rows)
+    return repo
+
+
+def _make_feedback(run_id: str, idx: int) -> dict:
+    return {"id": f"fb-{idx}", "run_id": run_id, "thread_id": "thread-x", "value": "up"}
+
+
+# ---------------------------------------------------------------------------
+# TestRunFeedback
+# ---------------------------------------------------------------------------
+
+
+class TestRunFeedback:
+    def test_returns_list_of_feedback_dicts(self):
+        """GET /api/runs/{run_id}/feedback returns a list of feedback dicts."""
+        run_record = {"run_id": "run-fb-1", "thread_id": "thread-fb-1"}
+        rows = [_make_feedback("run-fb-1", i) for i in range(3)]
+        app = _make_app(
+            run_store=_make_run_store(run_record),
+            feedback_repo=_make_feedback_repo(rows),
+        )
+        with TestClient(app) as client:
+            response = client.get("/api/runs/run-fb-1/feedback")
+        assert response.status_code == 200
+        body = response.json()
+        assert isinstance(body, list)
+        assert len(body) == 3
+
+    def test_404_when_run_not_found(self):
+        """Returns 404 when run store returns None."""
+        app = _make_app(
+            run_store=_make_run_store(None),
+            feedback_repo=_make_feedback_repo([]),
+        )
+        with TestClient(app) as client:
+            response = client.get("/api/runs/missing-run/feedback")
+        assert response.status_code == 404
+        assert "missing-run" in response.json()["detail"]
+
+    def test_empty_list_when_no_feedback(self):
+        """Returns empty list when no feedback exists for the run."""
+        run_record = {"run_id": "run-fb-2", "thread_id": "thread-fb-2"}
+        app = _make_app(
+            run_store=_make_run_store(run_record),
+            feedback_repo=_make_feedback_repo([]),
+        )
+        with TestClient(app) as client:
+            response = client.get("/api/runs/run-fb-2/feedback")
+        assert response.status_code == 200
+        assert response.json() == []
+
+    def test_503_when_feedback_repo_not_configured(self):
+        """Returns 503 when feedback_repo is None (no DB configured)."""
+        run_record = {"run_id": "run-fb-3", "thread_id": "thread-fb-3"}
+        app = _make_app(
+            run_store=_make_run_store(run_record),
+        )
+        # Explicitly set feedback_repo to None to simulate missing DB
+        app.state.feedback_repo = None
+        with TestClient(app) as client:
+            response = client.get("/api/runs/run-fb-3/feedback")
+        assert response.status_code == 503
diff --git a/backend/tests/test_sandbox_search_tools.py b/backend/tests/test_sandbox_search_tools.py
index 88e87a783..9c7ec1990 100644
--- a/backend/tests/test_sandbox_search_tools.py
+++ b/backend/tests/test_sandbox_search_tools.py
@@ -14,6 +14,10 @@ def _make_runtime(tmp_path):
     workspace.mkdir()
     uploads.mkdir()
     outputs.mkdir()
+    from deerflow.config.app_config import AppConfig
+    from deerflow.config.deer_flow_context import DeerFlowContext
+    from deerflow.config.sandbox_config import SandboxConfig
+
     return SimpleNamespace(
         state={
             "sandbox": {"sandbox_id": "local"},
@@ -23,7 +27,10 @@ def _make_runtime(tmp_path):
                 "outputs_path": str(outputs),
             },
         },
-        context={"thread_id": "thread-1"},
+        context=DeerFlowContext(
+            app_config=AppConfig(sandbox=SandboxConfig(use="test")),
+            thread_id="thread-1",
+        ),
     )
 
 
@@ -103,8 +110,6 @@ def test_grep_tool_truncates_results(tmp_path, monkeypatch) -> None:
     (workspace / "main.py").write_text("TODO one\nTODO two\nTODO three\n", encoding="utf-8")
 
     monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local"))
-    # Prevent config.yaml tool config from overriding the caller-supplied max_results=2.
-    monkeypatch.setattr("deerflow.sandbox.tools.get_app_config", lambda: SimpleNamespace(get_tool_config=lambda name: None))
 
     result = grep_tool.func(
         runtime=runtime,
@@ -324,10 +329,6 @@ def test_glob_tool_honors_smaller_requested_max_results(tmp_path, monkeypatch) -
     (workspace / "c.py").write_text("print('c')\n", encoding="utf-8")
 
     monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local"))
-    monkeypatch.setattr(
-        "deerflow.sandbox.tools.get_app_config",
-        lambda: SimpleNamespace(get_tool_config=lambda name: SimpleNamespace(model_extra={"max_results": 50})),
-    )
 
     result = glob_tool.func(
         runtime=runtime,
diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py
index 8c67cd50a..e74d00d3e 100644
--- a/backend/tests/test_sandbox_tools_security.py
+++ b/backend/tests/test_sandbox_tools_security.py
@@ -5,6 +5,7 @@ from unittest.mock import patch
 
 import pytest
 
+from deerflow.config.app_config import AppConfig
 from deerflow.sandbox.tools import (
     VIRTUAL_PATH_PREFIX,
     _apply_cwd_prefix,
@@ -34,6 +35,53 @@ _THREAD_DATA = {
 }
 
 
+def _make_app_config(
+    *,
+    skills_container_path: str = "/mnt/skills",
+    skills_host_path: str | None = None,
+    mounts=None,
+    mcp_servers=None,
+    tool_config_map=None,
+) -> SimpleNamespace:
+    """Build a lightweight AppConfig stand-in used by tests.
+
+    Only the attributes accessed by the helpers under test are populated;
+    everything else is omitted to keep the fake minimal and explicit.
+    """
+    skills_path = Path(skills_host_path) if skills_host_path is not None else None
+    skills_cfg = SimpleNamespace(
+        container_path=skills_container_path,
+        get_skills_path=lambda: skills_path if skills_path is not None else Path("/nonexistent-skills-root-12345"),
+    )
+    sandbox_cfg = SimpleNamespace(mounts=list(mounts) if mounts else [], bash_output_max_chars=20000)
+    extensions_cfg = SimpleNamespace(mcp_servers=dict(mcp_servers) if mcp_servers else {})
+    tool_config_map = dict(tool_config_map or {})
+    return SimpleNamespace(
+        skills=skills_cfg,
+        sandbox=sandbox_cfg,
+        extensions=extensions_cfg,
+        get_tool_config=lambda name: tool_config_map.get(name),
+    )
+
+
+_DEFAULT_APP_CONFIG = _make_app_config()
+
+
+def _make_ctx(thread_id: str = "thread-1", *, app_config=_DEFAULT_APP_CONFIG, sandbox_key: str | None = None):
+    """Build a DeerFlowContext-like object with extra attributes allowed.
+
+    ``resolve_context`` only checks ``isinstance(ctx, DeerFlowContext)``; for
+    tests that need additional attributes (``sandbox_key``) we use a subclass
+    created at runtime.
+    """
+    from deerflow.config.deer_flow_context import DeerFlowContext as _DFC
+
+    ctx = _DFC(app_config=app_config, thread_id=thread_id)
+    if sandbox_key is not None:
+        object.__setattr__(ctx, "sandbox_key", sandbox_key)
+    return ctx
+
+
 # ---------- replace_virtual_path ----------
 
 
@@ -85,7 +133,7 @@ def test_replace_virtual_path_preserves_windows_style_for_nested_subdir_trailing
 def test_replace_virtual_paths_in_command_preserves_trailing_slash() -> None:
     """Trailing slash on a virtual path inside a command must be preserved."""
     cmd = """python -c "output_dir = '/mnt/user-data/workspace/'; print(output_dir + 'some_file.txt')\""""
-    result = replace_virtual_paths_in_command(cmd, _THREAD_DATA)
+    result = replace_virtual_paths_in_command(cmd, _THREAD_DATA, _DEFAULT_APP_CONFIG)
     assert "/tmp/deer-flow/threads/t1/user-data/workspace/" in result, f"Trailing slash lost in: {result!r}"
 
 
@@ -94,7 +142,7 @@ def test_replace_virtual_paths_in_command_preserves_trailing_slash() -> None:
 
 def test_mask_local_paths_in_output_hides_host_paths() -> None:
     output = "Created: /tmp/deer-flow/threads/t1/user-data/workspace/result.txt"
-    masked = mask_local_paths_in_output(output, _THREAD_DATA)
+    masked = mask_local_paths_in_output(output, _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
     assert "/tmp/deer-flow/threads/t1/user-data" not in masked
     assert "/mnt/user-data/workspace/result.txt" in masked
@@ -107,7 +155,7 @@ def test_mask_local_paths_in_output_hides_skills_host_paths() -> None:
         patch("deerflow.sandbox.tools._get_skills_host_path", return_value="/home/user/deer-flow/skills"),
     ):
         output = "Reading: /home/user/deer-flow/skills/public/bootstrap/SKILL.md"
-        masked = mask_local_paths_in_output(output, _THREAD_DATA)
+        masked = mask_local_paths_in_output(output, _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
         assert "/home/user/deer-flow/skills" not in masked
         assert "/mnt/skills/public/bootstrap/SKILL.md" in masked
@@ -143,12 +191,12 @@ def test_reject_path_traversal_allows_normal_paths() -> None:
 
 def test_validate_local_tool_path_rejects_non_virtual_path() -> None:
     with pytest.raises(PermissionError, match="Only paths under"):
-        validate_local_tool_path("/Users/someone/config.yaml", _THREAD_DATA)
+        validate_local_tool_path("/Users/someone/config.yaml", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_tool_path_rejects_non_virtual_path_mentions_configured_mounts() -> None:
     with pytest.raises(PermissionError, match="configured mount paths"):
-        validate_local_tool_path("/Users/someone/config.yaml", _THREAD_DATA)
+        validate_local_tool_path("/Users/someone/config.yaml", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_tool_path_prioritizes_user_data_before_custom_mounts() -> None:
@@ -158,42 +206,41 @@ def test_validate_local_tool_path_prioritizes_user_data_before_custom_mounts() -
         VolumeMountConfig(host_path="/tmp/host-user-data", container_path=VIRTUAL_PATH_PREFIX, read_only=False),
     ]
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=mounts):
-        validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", _THREAD_DATA, read_only=True)
+        validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=True)
 
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=mounts):
         with pytest.raises(PermissionError, match="path traversal"):
-            validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/../../etc/passwd", _THREAD_DATA, read_only=True)
+            validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/../../etc/passwd", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=True)
 
 
 def test_validate_local_tool_path_rejects_bare_virtual_root() -> None:
     """The bare /mnt/user-data root without trailing slash is not a valid sub-path."""
     with pytest.raises(PermissionError, match="Only paths under"):
-        validate_local_tool_path(VIRTUAL_PATH_PREFIX, _THREAD_DATA)
+        validate_local_tool_path(VIRTUAL_PATH_PREFIX, _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_tool_path_allows_user_data_paths() -> None:
     # Should not raise — user-data paths are always allowed
-    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", _THREAD_DATA)
-    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/uploads/doc.pdf", _THREAD_DATA)
-    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/outputs/result.csv", _THREAD_DATA)
+    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", _THREAD_DATA, _DEFAULT_APP_CONFIG)
+    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/uploads/doc.pdf", _THREAD_DATA, _DEFAULT_APP_CONFIG)
+    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/outputs/result.csv", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_tool_path_allows_user_data_write() -> None:
     # read_only=False (default) should still work for user-data paths
-    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", _THREAD_DATA, read_only=False)
+    validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=False)
 
 
 def test_validate_local_tool_path_rejects_traversal_in_user_data() -> None:
     """Path traversal via .. in user-data paths must be rejected."""
     with pytest.raises(PermissionError, match="path traversal"):
-        validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/../../etc/passwd", _THREAD_DATA)
+        validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/../../etc/passwd", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_tool_path_rejects_traversal_in_skills() -> None:
     """Path traversal via .. in skills paths must be rejected."""
-    with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"):
-        with pytest.raises(PermissionError, match="path traversal"):
-            validate_local_tool_path("/mnt/skills/../../etc/passwd", _THREAD_DATA, read_only=True)
+    with pytest.raises(PermissionError, match="path traversal"):
+        validate_local_tool_path("/mnt/skills/../../etc/passwd", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=True)
 
 
 def test_validate_local_tool_path_rejects_none_thread_data() -> None:
@@ -201,7 +248,7 @@ def test_validate_local_tool_path_rejects_none_thread_data() -> None:
     from deerflow.sandbox.exceptions import SandboxRuntimeError
 
     with pytest.raises(SandboxRuntimeError):
-        validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", None)
+        validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", None, _DEFAULT_APP_CONFIG)
 
 
 # ---------- _resolve_skills_path ----------
@@ -209,32 +256,26 @@ def test_validate_local_tool_path_rejects_none_thread_data() -> None:
 
 def test_resolve_skills_path_resolves_correctly() -> None:
     """Skills virtual path should resolve to host path."""
-    with (
-        patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"),
-        patch("deerflow.sandbox.tools._get_skills_host_path", return_value="/home/user/deer-flow/skills"),
-    ):
-        resolved = _resolve_skills_path("/mnt/skills/public/bootstrap/SKILL.md")
-        assert resolved == "/home/user/deer-flow/skills/public/bootstrap/SKILL.md"
+    cfg = _make_app_config(skills_host_path="/home/user/deer-flow/skills")
+    # Force get_skills_path().exists() to be True without touching the FS
+    cfg.skills.get_skills_path = lambda: type("_P", (), {"exists": lambda self: True, "__str__": lambda self: "/home/user/deer-flow/skills"})()
+    resolved = _resolve_skills_path("/mnt/skills/public/bootstrap/SKILL.md", cfg)
+    assert resolved == "/home/user/deer-flow/skills/public/bootstrap/SKILL.md"
 
 
 def test_resolve_skills_path_resolves_root() -> None:
     """Skills container root should resolve to host skills directory."""
-    with (
-        patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"),
-        patch("deerflow.sandbox.tools._get_skills_host_path", return_value="/home/user/deer-flow/skills"),
-    ):
-        resolved = _resolve_skills_path("/mnt/skills")
-        assert resolved == "/home/user/deer-flow/skills"
+    cfg = _make_app_config(skills_host_path="/home/user/deer-flow/skills")
+    cfg.skills.get_skills_path = lambda: type("_P", (), {"exists": lambda self: True, "__str__": lambda self: "/home/user/deer-flow/skills"})()
+    resolved = _resolve_skills_path("/mnt/skills", cfg)
+    assert resolved == "/home/user/deer-flow/skills"
 
 
 def test_resolve_skills_path_raises_when_not_configured() -> None:
     """Should raise FileNotFoundError when skills directory is not available."""
-    with (
-        patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"),
-        patch("deerflow.sandbox.tools._get_skills_host_path", return_value=None),
-    ):
-        with pytest.raises(FileNotFoundError, match="Skills directory not available"):
-            _resolve_skills_path("/mnt/skills/public/bootstrap/SKILL.md")
+    # Default app config has no host path configured → _get_skills_host_path returns None
+    with pytest.raises(FileNotFoundError, match="Skills directory not available"):
+        _resolve_skills_path("/mnt/skills/public/bootstrap/SKILL.md", _DEFAULT_APP_CONFIG)
 
 
 # ---------- _resolve_and_validate_user_data_path ----------
@@ -249,7 +290,7 @@ def test_resolve_and_validate_user_data_path_resolves_correctly(tmp_path: Path)
         "uploads_path": str(tmp_path / "uploads"),
         "outputs_path": str(tmp_path / "outputs"),
     }
-    resolved = _resolve_and_validate_user_data_path("/mnt/user-data/workspace/hello.txt", thread_data)
+    resolved = _resolve_and_validate_user_data_path("/mnt/user-data/workspace/hello.txt", thread_data, _DEFAULT_APP_CONFIG)
     assert resolved == str(workspace / "hello.txt")
 
 
@@ -264,7 +305,7 @@ def test_resolve_and_validate_user_data_path_blocks_traversal(tmp_path: Path) ->
     }
     # This path resolves outside the allowed roots
     with pytest.raises(PermissionError):
-        _resolve_and_validate_user_data_path("/mnt/user-data/workspace/../../../etc/passwd", thread_data)
+        _resolve_and_validate_user_data_path("/mnt/user-data/workspace/../../../etc/passwd", thread_data, _DEFAULT_APP_CONFIG)
 
 
 # ---------- replace_virtual_paths_in_command ----------
@@ -277,7 +318,7 @@ def test_replace_virtual_paths_in_command_replaces_skills_paths() -> None:
         patch("deerflow.sandbox.tools._get_skills_host_path", return_value="/home/user/deer-flow/skills"),
     ):
         cmd = "cat /mnt/skills/public/bootstrap/SKILL.md"
-        result = replace_virtual_paths_in_command(cmd, _THREAD_DATA)
+        result = replace_virtual_paths_in_command(cmd, _THREAD_DATA, _DEFAULT_APP_CONFIG)
         assert "/mnt/skills" not in result
         assert "/home/user/deer-flow/skills/public/bootstrap/SKILL.md" in result
 
@@ -289,7 +330,7 @@ def test_replace_virtual_paths_in_command_replaces_both() -> None:
         patch("deerflow.sandbox.tools._get_skills_host_path", return_value="/home/user/skills"),
     ):
         cmd = "cat /mnt/skills/public/SKILL.md > /mnt/user-data/workspace/out.txt"
-        result = replace_virtual_paths_in_command(cmd, _THREAD_DATA)
+        result = replace_virtual_paths_in_command(cmd, _THREAD_DATA, _DEFAULT_APP_CONFIG)
         assert "/mnt/skills" not in result
         assert "/mnt/user-data" not in result
         assert "/home/user/skills/public/SKILL.md" in result
@@ -301,30 +342,27 @@ def test_replace_virtual_paths_in_command_replaces_both() -> None:
 
 def test_validate_local_bash_command_paths_blocks_host_paths() -> None:
     with pytest.raises(PermissionError, match="Unsafe absolute paths"):
-        validate_local_bash_command_paths("cat /etc/passwd", _THREAD_DATA)
+        validate_local_bash_command_paths("cat /etc/passwd", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_allows_https_urls() -> None:
     """URLs like https://github.com/... must not be flagged as unsafe absolute paths."""
     validate_local_bash_command_paths(
         "cd /mnt/user-data/workspace && git clone https://github.com/CherryHQ/cherry-studio.git",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_allows_http_urls() -> None:
     """HTTP URLs must not be flagged as unsafe absolute paths."""
     validate_local_bash_command_paths(
         "curl http://example.com/file.tar.gz -o /mnt/user-data/workspace/file.tar.gz",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_allows_virtual_and_system_paths() -> None:
     validate_local_bash_command_paths(
         "/bin/echo ok > /mnt/user-data/workspace/out.txt && cat /dev/null",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_blocks_traversal_in_user_data() -> None:
@@ -332,8 +370,7 @@ def test_validate_local_bash_command_paths_blocks_traversal_in_user_data() -> No
     with pytest.raises(PermissionError, match="path traversal"):
         validate_local_bash_command_paths(
             "cat /mnt/user-data/workspace/../../etc/passwd",
-            _THREAD_DATA,
-        )
+            _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_blocks_traversal_in_skills() -> None:
@@ -342,21 +379,20 @@ def test_validate_local_bash_command_paths_blocks_traversal_in_skills() -> None:
         with pytest.raises(PermissionError, match="path traversal"):
             validate_local_bash_command_paths(
                 "cat /mnt/skills/../../etc/passwd",
-                _THREAD_DATA,
-            )
+                _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> None:
     runtime = SimpleNamespace(
         state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()},
-        context={"thread_id": "thread-1"},
+        context=_make_ctx("thread-1"),
     )
 
     monkeypatch.setattr(
         "deerflow.sandbox.tools.ensure_sandbox_initialized",
         lambda runtime: SimpleNamespace(execute_command=lambda command: pytest.fail("host bash should not execute")),
     )
-    monkeypatch.setattr("deerflow.sandbox.tools.is_host_bash_allowed", lambda: False)
+    monkeypatch.setattr("deerflow.sandbox.tools.is_host_bash_allowed", lambda *a, **k: False)
 
     result = bash_tool.func(
         runtime=runtime,
@@ -371,33 +407,32 @@ def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) ->
 
 
 def test_is_skills_path_recognises_default_prefix() -> None:
-    with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"):
-        assert _is_skills_path("/mnt/skills") is True
-        assert _is_skills_path("/mnt/skills/public/bootstrap/SKILL.md") is True
-        assert _is_skills_path("/mnt/skills-extra/foo") is False
-        assert _is_skills_path("/mnt/user-data/workspace") is False
+    assert _is_skills_path("/mnt/skills", _DEFAULT_APP_CONFIG) is True
+    assert _is_skills_path("/mnt/skills/public/bootstrap/SKILL.md", _DEFAULT_APP_CONFIG) is True
+    assert _is_skills_path("/mnt/skills-extra/foo", _DEFAULT_APP_CONFIG) is False
+    assert _is_skills_path("/mnt/user-data/workspace", _DEFAULT_APP_CONFIG) is False
 
 
 def test_validate_local_tool_path_allows_skills_read_only() -> None:
     """read_file / ls should be able to access /mnt/skills paths."""
-    with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"):
-        # Should not raise
-        validate_local_tool_path(
-            "/mnt/skills/public/bootstrap/SKILL.md",
-            _THREAD_DATA,
-            read_only=True,
-        )
+    # Should not raise — default app config uses /mnt/skills as container path
+    validate_local_tool_path(
+        "/mnt/skills/public/bootstrap/SKILL.md",
+        _THREAD_DATA,
+        _DEFAULT_APP_CONFIG,
+        read_only=True,
+    )
 
 
 def test_validate_local_tool_path_blocks_skills_write() -> None:
     """write_file / str_replace must NOT write to skills paths."""
-    with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"):
-        with pytest.raises(PermissionError, match="Write access to skills path is not allowed"):
-            validate_local_tool_path(
-                "/mnt/skills/public/bootstrap/SKILL.md",
-                _THREAD_DATA,
-                read_only=False,
-            )
+    with pytest.raises(PermissionError, match="Write access to skills path is not allowed"):
+        validate_local_tool_path(
+            "/mnt/skills/public/bootstrap/SKILL.md",
+            _THREAD_DATA,
+            _DEFAULT_APP_CONFIG,
+            read_only=False,
+        )
 
 
 def test_validate_local_bash_command_paths_allows_skills_path() -> None:
@@ -405,8 +440,7 @@ def test_validate_local_bash_command_paths_allows_skills_path() -> None:
     with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"):
         validate_local_bash_command_paths(
             "cat /mnt/skills/public/bootstrap/SKILL.md",
-            _THREAD_DATA,
-        )
+            _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_allows_urls() -> None:
@@ -414,40 +448,35 @@ def test_validate_local_bash_command_paths_allows_urls() -> None:
     # HTTPS URLs
     validate_local_bash_command_paths(
         "curl -X POST https://example.com/api/v1/risk/check",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
     # HTTP URLs
     validate_local_bash_command_paths(
         "curl http://localhost:8080/health",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
     # URLs with query strings
     validate_local_bash_command_paths(
         "curl https://api.example.com/v2/search?q=test",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
     # FTP URLs
     validate_local_bash_command_paths(
         "curl ftp://ftp.example.com/pub/file.tar.gz",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
     # URL mixed with valid virtual path
     validate_local_bash_command_paths(
         "curl https://example.com/data -o /mnt/user-data/workspace/data.json",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_blocks_file_urls() -> None:
     """file:// URLs should be treated as unsafe and blocked."""
     with pytest.raises(PermissionError):
-        validate_local_bash_command_paths("curl file:///etc/passwd", _THREAD_DATA)
+        validate_local_bash_command_paths("curl file:///etc/passwd", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_blocks_file_urls_case_insensitive() -> None:
     """file:// URL detection should be case-insensitive."""
     with pytest.raises(PermissionError):
-        validate_local_bash_command_paths("curl FILE:///etc/shadow", _THREAD_DATA)
+        validate_local_bash_command_paths("curl FILE:///etc/shadow", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_blocks_file_urls_mixed_with_valid() -> None:
@@ -455,35 +484,36 @@ def test_validate_local_bash_command_paths_blocks_file_urls_mixed_with_valid() -
     with pytest.raises(PermissionError):
         validate_local_bash_command_paths(
             "curl file:///etc/passwd -o /mnt/user-data/workspace/out.txt",
-            _THREAD_DATA,
-        )
+            _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_still_blocks_other_paths() -> None:
     """Paths outside virtual and system prefixes must still be blocked."""
     with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"):
         with pytest.raises(PermissionError, match="Unsafe absolute paths"):
-            validate_local_bash_command_paths("cat /etc/shadow", _THREAD_DATA)
+            validate_local_bash_command_paths("cat /etc/shadow", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_tool_path_skills_custom_container_path() -> None:
     """Skills with a custom container_path in config should also work."""
-    with patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/custom/skills"):
-        # Should not raise
+    custom_config = _make_app_config(skills_container_path="/custom/skills")
+    # Should not raise
+    validate_local_tool_path(
+        "/custom/skills/public/my-skill/SKILL.md",
+        _THREAD_DATA,
+        custom_config,
+        read_only=True,
+    )
+
+    # The default /mnt/skills should not match since container path is /custom/skills
+    with pytest.raises(PermissionError, match="Only paths under"):
         validate_local_tool_path(
-            "/custom/skills/public/my-skill/SKILL.md",
+            "/mnt/skills/public/bootstrap/SKILL.md",
             _THREAD_DATA,
+            custom_config,
             read_only=True,
         )
 
-        # The default /mnt/skills should not match since container path is /custom/skills
-        with pytest.raises(PermissionError, match="Only paths under"):
-            validate_local_tool_path(
-                "/mnt/skills/public/bootstrap/SKILL.md",
-                _THREAD_DATA,
-                read_only=True,
-            )
-
 
 # ---------- ACP workspace path tests ----------
 
@@ -500,6 +530,7 @@ def test_validate_local_tool_path_allows_acp_workspace_read_only() -> None:
     validate_local_tool_path(
         "/mnt/acp-workspace/hello_world.py",
         _THREAD_DATA,
+        _DEFAULT_APP_CONFIG,
         read_only=True,
     )
 
@@ -510,6 +541,7 @@ def test_validate_local_tool_path_blocks_acp_workspace_write() -> None:
         validate_local_tool_path(
             "/mnt/acp-workspace/hello_world.py",
             _THREAD_DATA,
+            _DEFAULT_APP_CONFIG,
             read_only=False,
         )
 
@@ -518,8 +550,7 @@ def test_validate_local_bash_command_paths_allows_acp_workspace() -> None:
     """bash commands referencing /mnt/acp-workspace should be allowed."""
     validate_local_bash_command_paths(
         "cp /mnt/acp-workspace/hello_world.py /mnt/user-data/outputs/hello_world.py",
-        _THREAD_DATA,
-    )
+        _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_blocks_traversal_in_acp_workspace() -> None:
@@ -527,8 +558,7 @@ def test_validate_local_bash_command_paths_blocks_traversal_in_acp_workspace() -
     with pytest.raises(PermissionError, match="path traversal"):
         validate_local_bash_command_paths(
             "cat /mnt/acp-workspace/../../etc/passwd",
-            _THREAD_DATA,
-        )
+            _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_resolve_acp_workspace_path_resolves_correctly(tmp_path: Path) -> None:
@@ -570,7 +600,7 @@ def test_replace_virtual_paths_in_command_replaces_acp_workspace() -> None:
     acp_host = "/home/user/.deer-flow/acp-workspace"
     with patch("deerflow.sandbox.tools._get_acp_workspace_host_path", return_value=acp_host):
         cmd = "cp /mnt/acp-workspace/hello.py /mnt/user-data/outputs/hello.py"
-        result = replace_virtual_paths_in_command(cmd, _THREAD_DATA)
+        result = replace_virtual_paths_in_command(cmd, _THREAD_DATA, _DEFAULT_APP_CONFIG)
         assert "/mnt/acp-workspace" not in result
         assert f"{acp_host}/hello.py" in result
         assert "/tmp/deer-flow/threads/t1/user-data/outputs/hello.py" in result
@@ -581,7 +611,7 @@ def test_mask_local_paths_in_output_hides_acp_workspace_host_paths() -> None:
     acp_host = "/home/user/.deer-flow/acp-workspace"
     with patch("deerflow.sandbox.tools._get_acp_workspace_host_path", return_value=acp_host):
         output = f"Copied: {acp_host}/hello.py"
-        masked = mask_local_paths_in_output(output, _THREAD_DATA)
+        masked = mask_local_paths_in_output(output, _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
         assert acp_host not in masked
         assert "/mnt/acp-workspace/hello.py" in masked
@@ -617,39 +647,37 @@ def test_apply_cwd_prefix_quotes_path_with_spaces() -> None:
 
 def test_validate_local_bash_command_paths_allows_mcp_filesystem_paths() -> None:
     """Bash commands referencing MCP filesystem server paths should be allowed."""
+    from deerflow.config.app_config import AppConfig
     from deerflow.config.extensions_config import ExtensionsConfig, McpServerConfig
+    from deerflow.config.sandbox_config import SandboxConfig
 
-    mock_config = ExtensionsConfig(
-        mcp_servers={
-            "filesystem": McpServerConfig(
-                enabled=True,
-                command="npx",
-                args=["-y", "@modelcontextprotocol/server-filesystem", "/mnt/d/workspace"],
-            )
-        }
-    )
-    with patch("deerflow.config.extensions_config.get_extensions_config", return_value=mock_config):
-        # Should not raise - MCP filesystem paths are allowed
-        validate_local_bash_command_paths("ls /mnt/d/workspace", _THREAD_DATA)
-        validate_local_bash_command_paths("cat /mnt/d/workspace/subdir/file.txt", _THREAD_DATA)
-
-        # Path traversal should still be blocked
-        with pytest.raises(PermissionError, match="path traversal"):
-            validate_local_bash_command_paths("cat /mnt/d/workspace/../../etc/passwd", _THREAD_DATA)
-
-        # Disabled servers should not expose paths
-        disabled_config = ExtensionsConfig(
-            mcp_servers={
-                "filesystem": McpServerConfig(
-                    enabled=False,
-                    command="npx",
-                    args=["-y", "@modelcontextprotocol/server-filesystem", "/mnt/d/workspace"],
-                )
-            }
+    def _mcp_app_config(enabled: bool) -> AppConfig:
+        return AppConfig(
+            sandbox=SandboxConfig(use="test"),
+            extensions=ExtensionsConfig(
+                mcp_servers={
+                    "filesystem": McpServerConfig(
+                        enabled=enabled,
+                        command="npx",
+                        args=["-y", "@modelcontextprotocol/server-filesystem", "/mnt/d/workspace"],
+                    )
+                }
+            ),
         )
-        with patch("deerflow.config.extensions_config.get_extensions_config", return_value=disabled_config):
-            with pytest.raises(PermissionError, match="Unsafe absolute paths"):
-                validate_local_bash_command_paths("ls /mnt/d/workspace", _THREAD_DATA)
+
+    enabled_cfg = _mcp_app_config(True)
+    # Should not raise - MCP filesystem paths are allowed
+    validate_local_bash_command_paths("ls /mnt/d/workspace", _THREAD_DATA, enabled_cfg)
+    validate_local_bash_command_paths("cat /mnt/d/workspace/subdir/file.txt", _THREAD_DATA, enabled_cfg)
+
+    # Path traversal should still be blocked
+    with pytest.raises(PermissionError, match="path traversal"):
+        validate_local_bash_command_paths("cat /mnt/d/workspace/../../etc/passwd", _THREAD_DATA, enabled_cfg)
+
+    # Disabled servers should not expose paths
+    disabled_cfg = _mcp_app_config(False)
+    with pytest.raises(PermissionError, match="Unsafe absolute paths"):
+        validate_local_bash_command_paths("ls /mnt/d/workspace", _THREAD_DATA, disabled_cfg)
 
 
 # ---------- Custom mount path tests ----------
@@ -667,12 +695,12 @@ def _mock_custom_mounts():
 
 def test_is_custom_mount_path_recognises_configured_mounts() -> None:
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
-        assert _is_custom_mount_path("/mnt/code-read") is True
-        assert _is_custom_mount_path("/mnt/code-read/src/main.py") is True
-        assert _is_custom_mount_path("/mnt/data") is True
-        assert _is_custom_mount_path("/mnt/data/file.txt") is True
-        assert _is_custom_mount_path("/mnt/code-read-extra/foo") is False
-        assert _is_custom_mount_path("/mnt/other") is False
+        assert _is_custom_mount_path("/mnt/code-read", _DEFAULT_APP_CONFIG) is True
+        assert _is_custom_mount_path("/mnt/code-read/src/main.py", _DEFAULT_APP_CONFIG) is True
+        assert _is_custom_mount_path("/mnt/data", _DEFAULT_APP_CONFIG) is True
+        assert _is_custom_mount_path("/mnt/data/file.txt", _DEFAULT_APP_CONFIG) is True
+        assert _is_custom_mount_path("/mnt/code-read-extra/foo", _DEFAULT_APP_CONFIG) is False
+        assert _is_custom_mount_path("/mnt/other", _DEFAULT_APP_CONFIG) is False
 
 
 def test_get_custom_mount_for_path_returns_longest_prefix() -> None:
@@ -683,7 +711,7 @@ def test_get_custom_mount_for_path_returns_longest_prefix() -> None:
         VolumeMountConfig(host_path="/home/user/code", container_path="/mnt/code", read_only=True),
     ]
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=mounts):
-        mount = _get_custom_mount_for_path("/mnt/code/file.py")
+        mount = _get_custom_mount_for_path("/mnt/code/file.py", _DEFAULT_APP_CONFIG)
         assert mount is not None
         assert mount.container_path == "/mnt/code"
 
@@ -691,90 +719,72 @@ def test_get_custom_mount_for_path_returns_longest_prefix() -> None:
 def test_validate_local_tool_path_allows_custom_mount_read() -> None:
     """read_file / ls should be able to access custom mount paths."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
-        validate_local_tool_path("/mnt/code-read/src/main.py", _THREAD_DATA, read_only=True)
-        validate_local_tool_path("/mnt/data/file.txt", _THREAD_DATA, read_only=True)
+        validate_local_tool_path("/mnt/code-read/src/main.py", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=True)
+        validate_local_tool_path("/mnt/data/file.txt", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=True)
 
 
 def test_validate_local_tool_path_blocks_read_only_mount_write() -> None:
     """write_file / str_replace must NOT write to read-only custom mounts."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
         with pytest.raises(PermissionError, match="Write access to read-only mount is not allowed"):
-            validate_local_tool_path("/mnt/code-read/src/main.py", _THREAD_DATA, read_only=False)
+            validate_local_tool_path("/mnt/code-read/src/main.py", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=False)
 
 
 def test_validate_local_tool_path_allows_writable_mount_write() -> None:
     """write_file / str_replace should succeed on writable custom mounts."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
-        validate_local_tool_path("/mnt/data/file.txt", _THREAD_DATA, read_only=False)
+        validate_local_tool_path("/mnt/data/file.txt", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=False)
 
 
 def test_validate_local_tool_path_blocks_traversal_in_custom_mount() -> None:
     """Path traversal via .. in custom mount paths must be rejected."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
         with pytest.raises(PermissionError, match="path traversal"):
-            validate_local_tool_path("/mnt/code-read/../../etc/passwd", _THREAD_DATA, read_only=True)
+            validate_local_tool_path("/mnt/code-read/../../etc/passwd", _THREAD_DATA, _DEFAULT_APP_CONFIG, read_only=True)
 
 
 def test_validate_local_bash_command_paths_allows_custom_mount() -> None:
     """bash commands referencing custom mount paths should be allowed."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
-        validate_local_bash_command_paths("cat /mnt/code-read/src/main.py", _THREAD_DATA)
-        validate_local_bash_command_paths("ls /mnt/data", _THREAD_DATA)
+        validate_local_bash_command_paths("cat /mnt/code-read/src/main.py", _THREAD_DATA, _DEFAULT_APP_CONFIG)
+        validate_local_bash_command_paths("ls /mnt/data", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_blocks_traversal_in_custom_mount() -> None:
     """Bash commands with traversal in custom mount paths should be blocked."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
         with pytest.raises(PermissionError, match="path traversal"):
-            validate_local_bash_command_paths("cat /mnt/code-read/../../etc/passwd", _THREAD_DATA)
+            validate_local_bash_command_paths("cat /mnt/code-read/../../etc/passwd", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
 def test_validate_local_bash_command_paths_still_blocks_non_mount_paths() -> None:
     """Paths not matching any custom mount should still be blocked."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
         with pytest.raises(PermissionError, match="Unsafe absolute paths"):
-            validate_local_bash_command_paths("cat /etc/shadow", _THREAD_DATA)
+            validate_local_bash_command_paths("cat /etc/shadow", _THREAD_DATA, _DEFAULT_APP_CONFIG)
 
 
-def test_get_custom_mounts_caching(monkeypatch, tmp_path) -> None:
-    """_get_custom_mounts should cache after first successful load."""
-    # Clear any existing cache
-    if hasattr(_get_custom_mounts, "_cached"):
-        monkeypatch.delattr(_get_custom_mounts, "_cached")
-
-    # Use real directories so host_path.exists() filtering passes
+def test_get_custom_mounts_reads_from_app_config(tmp_path) -> None:
+    """_get_custom_mounts should read directly from the supplied AppConfig."""
     dir_a = tmp_path / "code-read"
     dir_a.mkdir()
     dir_b = tmp_path / "data"
     dir_b.mkdir()
 
-    from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig
+    from deerflow.config.sandbox_config import VolumeMountConfig
 
     mounts = [
         VolumeMountConfig(host_path=str(dir_a), container_path="/mnt/code-read", read_only=True),
         VolumeMountConfig(host_path=str(dir_b), container_path="/mnt/data", read_only=False),
     ]
-    mock_sandbox = SandboxConfig(use="deerflow.sandbox.local:LocalSandboxProvider", mounts=mounts)
-    mock_config = SimpleNamespace(sandbox=mock_sandbox)
-
-    with patch("deerflow.config.get_app_config", return_value=mock_config):
-        result = _get_custom_mounts()
-        assert len(result) == 2
-
-    # After caching, should return cached value even without mock
-    assert hasattr(_get_custom_mounts, "_cached")
-    assert len(_get_custom_mounts()) == 2
-
-    # Cleanup
-    monkeypatch.delattr(_get_custom_mounts, "_cached")
+    cfg = _make_app_config(mounts=mounts)
+    result = _get_custom_mounts(cfg)
+    assert len(result) == 2
 
 
-def test_get_custom_mounts_filters_nonexistent_host_path(monkeypatch, tmp_path) -> None:
+def test_get_custom_mounts_filters_nonexistent_host_path(tmp_path) -> None:
     """_get_custom_mounts should only return mounts whose host_path exists."""
-    if hasattr(_get_custom_mounts, "_cached"):
-        monkeypatch.delattr(_get_custom_mounts, "_cached")
-
-    from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig
+    from deerflow.config.sandbox_config import VolumeMountConfig
 
     existing_dir = tmp_path / "existing"
     existing_dir.mkdir()
@@ -783,22 +793,16 @@ def test_get_custom_mounts_filters_nonexistent_host_path(monkeypatch, tmp_path)
         VolumeMountConfig(host_path=str(existing_dir), container_path="/mnt/existing", read_only=True),
         VolumeMountConfig(host_path="/nonexistent/path/12345", container_path="/mnt/ghost", read_only=False),
     ]
-    mock_sandbox = SandboxConfig(use="deerflow.sandbox.local:LocalSandboxProvider", mounts=mounts)
-    mock_config = SimpleNamespace(sandbox=mock_sandbox)
-
-    with patch("deerflow.config.get_app_config", return_value=mock_config):
-        result = _get_custom_mounts()
-        assert len(result) == 1
-        assert result[0].container_path == "/mnt/existing"
-
-    # Cleanup
-    monkeypatch.delattr(_get_custom_mounts, "_cached")
+    cfg = _make_app_config(mounts=mounts)
+    result = _get_custom_mounts(cfg)
+    assert len(result) == 1
+    assert result[0].container_path == "/mnt/existing"
 
 
 def test_get_custom_mount_for_path_boundary_no_false_prefix_match() -> None:
     """_get_custom_mount_for_path must not match /mnt/code-read-extra for /mnt/code-read."""
     with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()):
-        mount = _get_custom_mount_for_path("/mnt/code-read-extra/foo")
+        mount = _get_custom_mount_for_path("/mnt/code-read-extra/foo", _DEFAULT_APP_CONFIG)
         assert mount is None
 
 
@@ -829,8 +833,8 @@ def test_str_replace_parallel_updates_should_preserve_both_edits(monkeypatch) ->
 
     sandbox = SharedSandbox()
     runtimes = [
-        SimpleNamespace(state={}, context={"thread_id": "thread-1"}, config={}),
-        SimpleNamespace(state={}, context={"thread_id": "thread-1"}, config={}),
+        SimpleNamespace(state={}, context=_make_ctx("thread-1"), config={}),
+        SimpleNamespace(state={}, context=_make_ctx("thread-1"), config={}),
     ]
     failures: list[BaseException] = []
 
@@ -905,14 +909,14 @@ def test_str_replace_parallel_updates_in_isolated_sandboxes_should_not_share_pat
         "sandbox-b": IsolatedSandbox("sandbox-b", shared_state),
     }
     runtimes = [
-        SimpleNamespace(state={}, context={"thread_id": "thread-1", "sandbox_key": "sandbox-a"}, config={}),
-        SimpleNamespace(state={}, context={"thread_id": "thread-2", "sandbox_key": "sandbox-b"}, config={}),
+        SimpleNamespace(state={}, context=_make_ctx("thread-1", sandbox_key="sandbox-a"), config={}),
+        SimpleNamespace(state={}, context=_make_ctx("thread-2", sandbox_key="sandbox-b"), config={}),
     ]
     failures: list[BaseException] = []
 
     monkeypatch.setattr(
         "deerflow.sandbox.tools.ensure_sandbox_initialized",
-        lambda runtime: sandboxes[runtime.context["sandbox_key"]],
+        lambda runtime: sandboxes[runtime.context.sandbox_key],
     )
     monkeypatch.setattr("deerflow.sandbox.tools.ensure_thread_directories_exist", lambda runtime: None)
     monkeypatch.setattr("deerflow.sandbox.tools.is_local_sandbox", lambda runtime: False)
@@ -972,8 +976,8 @@ def test_str_replace_and_append_on_same_path_should_preserve_both_updates(monkey
 
     sandbox = SharedSandbox()
     runtimes = [
-        SimpleNamespace(state={}, context={"thread_id": "thread-1"}, config={}),
-        SimpleNamespace(state={}, context={"thread_id": "thread-1"}, config={}),
+        SimpleNamespace(state={}, context=_make_ctx("thread-1"), config={}),
+        SimpleNamespace(state={}, context=_make_ctx("thread-1"), config={}),
     ]
     failures: list[BaseException] = []
 
diff --git a/backend/tests/test_security_scanner.py b/backend/tests/test_security_scanner.py
index 088cb2c11..c7e060c7e 100644
--- a/backend/tests/test_security_scanner.py
+++ b/backend/tests/test_security_scanner.py
@@ -29,10 +29,9 @@ async def test_scan_skill_content_passes_run_name_to_model(monkeypatch):
 @pytest.mark.anyio
 async def test_scan_skill_content_blocks_when_model_unavailable(monkeypatch):
     config = SimpleNamespace(skill_evolution=SimpleNamespace(moderation_model_name=None))
-    monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
     monkeypatch.setattr("deerflow.skills.security_scanner.create_chat_model", lambda **kwargs: (_ for _ in ()).throw(RuntimeError("boom")))
 
-    result = await scan_skill_content("---\nname: demo-skill\ndescription: demo\n---\n", executable=False)
+    result = await scan_skill_content(config, "---\nname: demo-skill\ndescription: demo\n---\n", executable=False)
 
     assert result.decision == "block"
     assert "manual review required" in result.reason
diff --git a/backend/tests/test_skill_manage_tool.py b/backend/tests/test_skill_manage_tool.py
index 1b16fb48f..81bf49766 100644
--- a/backend/tests/test_skill_manage_tool.py
+++ b/backend/tests/test_skill_manage_tool.py
@@ -4,9 +4,20 @@ from types import SimpleNamespace
 import anyio
 import pytest
 
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.sandbox_config import SandboxConfig
+
 skill_manage_module = importlib.import_module("deerflow.tools.skill_manage_tool")
 
 
+def _make_context(thread_id: str, app_config: object | None = None) -> DeerFlowContext:
+    return DeerFlowContext(
+        app_config=app_config if app_config is not None else AppConfig(sandbox=SandboxConfig(use="test")),
+        thread_id=thread_id,
+    )
+
+
 def _skill_content(name: str, description: str = "Demo skill") -> str:
     return f"---\nname: {name}\ndescription: {description}\n---\n\n# {name}\n"
 
@@ -23,18 +34,15 @@ def test_skill_manage_create_and_patch(monkeypatch, tmp_path):
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
     refresh_calls = []
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         refresh_calls.append("refresh")
 
     monkeypatch.setattr(skill_manage_module, "refresh_skills_system_prompt_cache_async", _refresh)
     monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
 
-    runtime = SimpleNamespace(context={"thread_id": "thread-1"}, config={"configurable": {"thread_id": "thread-1"}})
+    runtime = SimpleNamespace(context=_make_context("thread-1", config), config={"configurable": {"thread_id": "thread-1"}})
 
     result = anyio.run(
         skill_manage_module.skill_manage_tool.coroutine,
@@ -67,17 +75,14 @@ def test_skill_manage_patch_replaces_single_occurrence_by_default(monkeypatch, t
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         return None
 
     monkeypatch.setattr(skill_manage_module, "refresh_skills_system_prompt_cache_async", _refresh)
     monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
 
-    runtime = SimpleNamespace(context={"thread_id": "thread-1"}, config={"configurable": {"thread_id": "thread-1"}})
+    runtime = SimpleNamespace(context=_make_context("thread-1", config), config={"configurable": {"thread_id": "thread-1"}})
     content = _skill_content("demo-skill", "Demo skill") + "\nRepeated: Demo skill\n"
 
     anyio.run(skill_manage_module.skill_manage_tool.coroutine, runtime, "create", "demo-skill", content)
@@ -107,10 +112,8 @@ def test_skill_manage_rejects_public_skill_patch(monkeypatch, tmp_path):
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
 
-    runtime = SimpleNamespace(context={}, config={"configurable": {}})
+    runtime = SimpleNamespace(context=_make_context("", config), config={"configurable": {}})
 
     with pytest.raises(ValueError, match="built-in skill"):
         anyio.run(
@@ -131,17 +134,15 @@ def test_skill_manage_sync_wrapper_supported(monkeypatch, tmp_path):
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
     refresh_calls = []
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         refresh_calls.append("refresh")
 
     monkeypatch.setattr(skill_manage_module, "refresh_skills_system_prompt_cache_async", _refresh)
     monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
 
-    runtime = SimpleNamespace(context={"thread_id": "thread-sync"}, config={"configurable": {"thread_id": "thread-sync"}})
+    runtime = SimpleNamespace(context=_make_context("thread-sync", config), config={"configurable": {"thread_id": "thread-sync"}})
     result = skill_manage_module.skill_manage_tool.func(
         runtime=runtime,
         action="create",
@@ -159,17 +160,14 @@ def test_skill_manage_rejects_support_path_traversal(monkeypatch, tmp_path):
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config)
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         return None
 
     monkeypatch.setattr(skill_manage_module, "refresh_skills_system_prompt_cache_async", _refresh)
     monkeypatch.setattr(skill_manage_module, "scan_skill_content", lambda *args, **kwargs: _async_result("allow", "ok"))
 
-    runtime = SimpleNamespace(context={"thread_id": "thread-1"}, config={"configurable": {"thread_id": "thread-1"}})
+    runtime = SimpleNamespace(context=_make_context("thread-1", config), config={"configurable": {"thread_id": "thread-1"}})
     anyio.run(skill_manage_module.skill_manage_tool.coroutine, runtime, "create", "demo-skill", _skill_content("demo-skill"))
 
     with pytest.raises(ValueError, match="parent-directory traversal|selected support directory"):
diff --git a/backend/tests/test_skills_custom_router.py b/backend/tests/test_skills_custom_router.py
index e78eb54d7..10d8214bd 100644
--- a/backend/tests/test_skills_custom_router.py
+++ b/backend/tests/test_skills_custom_router.py
@@ -7,6 +7,9 @@ from fastapi import FastAPI
 from fastapi.testclient import TestClient
 
 from app.gateway.routers import skills as skills_router
+from deerflow.config.app_config import AppConfig
+from deerflow.config.extensions_config import ExtensionsConfig
+from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.skills.manager import get_skill_history_file
 from deerflow.skills.types import Skill
 
@@ -44,17 +47,16 @@ def test_custom_skills_router_lifecycle(monkeypatch, tmp_path):
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
     monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", lambda *args, **kwargs: _async_scan("allow", "ok"))
     refresh_calls = []
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         refresh_calls.append("refresh")
 
     monkeypatch.setattr("app.gateway.routers.skills.refresh_skills_system_prompt_cache_async", _refresh)
 
     app = FastAPI()
+    app.state.config = config
     app.include_router(skills_router.router)
 
     with TestClient(app) as client:
@@ -94,14 +96,12 @@ def test_custom_skill_rollback_blocked_by_scanner(monkeypatch, tmp_path):
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
-    get_skill_history_file("demo-skill").write_text(
+    get_skill_history_file("demo-skill", config).write_text(
         '{"action":"human_edit","prev_content":' + json.dumps(original_content) + ',"new_content":' + json.dumps(edited_content) + "}\n",
         encoding="utf-8",
     )
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         return None
 
     monkeypatch.setattr("app.gateway.routers.skills.refresh_skills_system_prompt_cache_async", _refresh)
@@ -114,6 +114,7 @@ def test_custom_skill_rollback_blocked_by_scanner(monkeypatch, tmp_path):
     monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", _scan)
 
     app = FastAPI()
+    app.state.config = config
     app.include_router(skills_router.router)
 
     with TestClient(app) as client:
@@ -136,17 +137,16 @@ def test_custom_skill_delete_preserves_history_and_allows_restore(monkeypatch, t
         skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"),
         skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None),
     )
-    monkeypatch.setattr("deerflow.config.get_app_config", lambda: config)
-    monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config)
     monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", lambda *args, **kwargs: _async_scan("allow", "ok"))
     refresh_calls = []
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         refresh_calls.append("refresh")
 
     monkeypatch.setattr("app.gateway.routers.skills.refresh_skills_system_prompt_cache_async", _refresh)
 
     app = FastAPI()
+    app.state.config = config
     app.include_router(skills_router.router)
 
     with TestClient(app) as client:
@@ -238,23 +238,25 @@ def test_update_skill_refreshes_prompt_cache_before_return(monkeypatch, tmp_path
     enabled_state = {"value": True}
     refresh_calls = []
 
-    def _load_skills(*, enabled_only: bool):
+    def _load_skills(*a, enabled_only: bool = False, **k):
         skill = _make_skill("demo-skill", enabled=enabled_state["value"])
         if enabled_only and not skill.enabled:
             return []
         return [skill]
 
-    async def _refresh():
+    async def _refresh(*a, **k):
         refresh_calls.append("refresh")
         enabled_state["value"] = False
 
+    _app_cfg = AppConfig(sandbox=SandboxConfig(use="test"), extensions=ExtensionsConfig(mcp_servers={}, skills={}))
+
     monkeypatch.setattr("app.gateway.routers.skills.load_skills", _load_skills)
-    monkeypatch.setattr("app.gateway.routers.skills.get_extensions_config", lambda: SimpleNamespace(mcp_servers={}, skills={}))
-    monkeypatch.setattr("app.gateway.routers.skills.reload_extensions_config", lambda: None)
+    monkeypatch.setattr(AppConfig, "from_file", staticmethod(lambda: _app_cfg))
     monkeypatch.setattr(skills_router.ExtensionsConfig, "resolve_config_path", staticmethod(lambda: config_path))
     monkeypatch.setattr("app.gateway.routers.skills.refresh_skills_system_prompt_cache_async", _refresh)
 
     app = FastAPI()
+    app.state.config = _app_cfg
     app.include_router(skills_router.router)
 
     with TestClient(app) as client:
diff --git a/backend/tests/test_skills_loader.py b/backend/tests/test_skills_loader.py
index 7d885444d..efc614c7b 100644
--- a/backend/tests/test_skills_loader.py
+++ b/backend/tests/test_skills_loader.py
@@ -27,7 +27,7 @@ def test_load_skills_discovers_nested_skills_and_sets_container_paths(tmp_path:
     _write_skill(skills_root / "public" / "parent" / "child-skill", "child-skill", "Child skill")
     _write_skill(skills_root / "custom" / "team" / "helper", "team-helper", "Team helper")
 
-    skills = load_skills(skills_path=skills_root, use_config=False, enabled_only=False)
+    skills = load_skills(skills_path=skills_root, enabled_only=False)
     by_name = {skill.name: skill for skill in skills}
 
     assert {"root-skill", "child-skill", "team-helper"} <= set(by_name)
@@ -57,7 +57,7 @@ def test_load_skills_skips_hidden_directories(tmp_path: Path):
         "Hidden skill",
     )
 
-    skills = load_skills(skills_path=skills_root, use_config=False, enabled_only=False)
+    skills = load_skills(skills_path=skills_root, enabled_only=False)
     names = {skill.name for skill in skills}
 
     assert "ok-skill" in names
@@ -69,7 +69,7 @@ def test_load_skills_prefers_custom_over_public_with_same_name(tmp_path: Path):
     _write_skill(skills_root / "public" / "shared-skill", "shared-skill", "Public version")
     _write_skill(skills_root / "custom" / "shared-skill", "shared-skill", "Custom version")
 
-    skills = load_skills(skills_path=skills_root, use_config=False, enabled_only=False)
+    skills = load_skills(skills_path=skills_root, enabled_only=False)
     shared = next(skill for skill in skills if skill.name == "shared-skill")
 
     assert shared.category == "custom"
diff --git a/backend/tests/test_stream_bridge.py b/backend/tests/test_stream_bridge.py
index efd5e7923..55b463812 100644
--- a/backend/tests/test_stream_bridge.py
+++ b/backend/tests/test_stream_bridge.py
@@ -6,6 +6,7 @@ import re
 import anyio
 import pytest
 
+from deerflow.config.app_config import AppConfig
 from deerflow.runtime import END_SENTINEL, HEARTBEAT_SENTINEL, MemoryStreamBridge, make_stream_bridge
 
 # ---------------------------------------------------------------------------
@@ -331,6 +332,9 @@ async def test_concurrent_tasks_end_sentinel():
 
 @pytest.mark.anyio
 async def test_make_stream_bridge_defaults():
-    """make_stream_bridge() with no config yields a MemoryStreamBridge."""
-    async with make_stream_bridge() as bridge:
+    """make_stream_bridge with a config lacking stream_bridge yields a MemoryStreamBridge."""
+    from deerflow.config.sandbox_config import SandboxConfig
+
+    config = AppConfig(sandbox=SandboxConfig(use="test"))
+    async with make_stream_bridge(config) as bridge:
         assert isinstance(bridge, MemoryStreamBridge)
diff --git a/backend/tests/test_subagent_executor.py b/backend/tests/test_subagent_executor.py
index a6a62c2b6..43ffb0663 100644
--- a/backend/tests/test_subagent_executor.py
+++ b/backend/tests/test_subagent_executor.py
@@ -21,6 +21,8 @@ from unittest.mock import MagicMock, patch
 
 import pytest
 
+_TEST_APP_CONFIG = MagicMock(name="TestAppConfig")
+
 # Module names that need to be mocked to break circular imports
 _MOCKED_MODULE_NAMES = [
     "deerflow.agents",
@@ -203,7 +205,7 @@ class TestAsyncExecutionPath:
             config=base_config,
             tools=[],
             thread_id="test-thread",
-            trace_id="test-trace",
+            trace_id="test-trace", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -232,7 +234,7 @@ class TestAsyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -259,7 +261,7 @@ class TestAsyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -285,7 +287,7 @@ class TestAsyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -306,7 +308,7 @@ class TestAsyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -327,7 +329,7 @@ class TestAsyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -348,7 +350,7 @@ class TestAsyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -384,7 +386,7 @@ class TestSyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -419,7 +421,7 @@ class TestSyncExecutionPath:
             executor = SubagentExecutor(
                 config=base_config,
                 tools=[],
-                thread_id="test-thread",
+                thread_id="test-thread", app_config=_TEST_APP_CONFIG,
             )
 
             with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -456,7 +458,7 @@ class TestSyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -477,7 +479,7 @@ class TestSyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_aexecute") as mock_aexecute:
@@ -511,7 +513,7 @@ class TestSyncExecutionPath:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -565,7 +567,7 @@ class TestAsyncToolSupport:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -602,7 +604,7 @@ class TestAsyncToolSupport:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -648,7 +650,7 @@ class TestThreadSafety:
             executor = SubagentExecutor(
                 config=base_config,
                 tools=[],
-                thread_id=f"thread-{task_id}",
+                thread_id=f"thread-{task_id}", app_config=_TEST_APP_CONFIG,
             )
 
             with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -858,7 +860,7 @@ class TestCooperativeCancellation:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -898,7 +900,7 @@ class TestCooperativeCancellation:
         executor = SubagentExecutor(
             config=base_config,
             tools=[],
-            thread_id="test-thread",
+            thread_id="test-thread", app_config=_TEST_APP_CONFIG,
         )
 
         with patch.object(executor, "_create_agent", return_value=mock_agent):
@@ -977,7 +979,7 @@ class TestCooperativeCancellation:
             config=short_config,
             tools=[],
             thread_id="test-thread",
-            trace_id="test-trace",
+            trace_id="test-trace", app_config=_TEST_APP_CONFIG,
         )
 
         # Wrap _scheduler_pool.submit so we know when run_task finishes
diff --git a/backend/tests/test_subagent_prompt_security.py b/backend/tests/test_subagent_prompt_security.py
index 015206877..d4a920f93 100644
--- a/backend/tests/test_subagent_prompt_security.py
+++ b/backend/tests/test_subagent_prompt_security.py
@@ -1,29 +1,35 @@
 """Tests for subagent availability and prompt exposure under local bash hardening."""
 
 from deerflow.agents.lead_agent import prompt as prompt_module
+from deerflow.config.app_config import AppConfig
+from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.subagents import registry as registry_module
 
 
-def test_get_available_subagent_names_hides_bash_when_host_bash_disabled(monkeypatch) -> None:
-    monkeypatch.setattr(registry_module, "is_host_bash_allowed", lambda: False)
+def _config() -> AppConfig:
+    return AppConfig(sandbox=SandboxConfig(use="test"))
 
-    names = registry_module.get_available_subagent_names()
+
+def test_get_available_subagent_names_hides_bash_when_host_bash_disabled(monkeypatch) -> None:
+    monkeypatch.setattr(registry_module, "is_host_bash_allowed", lambda *a, **k: False)
+
+    names = registry_module.get_available_subagent_names(_config())
 
     assert names == ["general-purpose"]
 
 
 def test_get_available_subagent_names_keeps_bash_when_allowed(monkeypatch) -> None:
-    monkeypatch.setattr(registry_module, "is_host_bash_allowed", lambda: True)
+    monkeypatch.setattr(registry_module, "is_host_bash_allowed", lambda *a, **k: True)
 
-    names = registry_module.get_available_subagent_names()
+    names = registry_module.get_available_subagent_names(_config())
 
     assert names == ["general-purpose", "bash"]
 
 
 def test_build_subagent_section_hides_bash_examples_when_unavailable(monkeypatch) -> None:
-    monkeypatch.setattr(prompt_module, "get_available_subagent_names", lambda: ["general-purpose"])
+    monkeypatch.setattr(prompt_module, "get_available_subagent_names", lambda *a, **k: ["general-purpose"])
 
-    section = prompt_module._build_subagent_section(3)
+    section = prompt_module._build_subagent_section(3, _config())
 
     # When bash is not available, it should not appear at all (aligned with Codex:
     # unavailable roles are omitted, not listed as disabled)
@@ -34,9 +40,9 @@ def test_build_subagent_section_hides_bash_examples_when_unavailable(monkeypatch
 
 
 def test_build_subagent_section_includes_bash_when_available(monkeypatch) -> None:
-    monkeypatch.setattr(prompt_module, "get_available_subagent_names", lambda: ["general-purpose", "bash"])
+    monkeypatch.setattr(prompt_module, "get_available_subagent_names", lambda *a, **k: ["general-purpose", "bash"])
 
-    section = prompt_module._build_subagent_section(3)
+    section = prompt_module._build_subagent_section(3, _config())
 
     assert "For command execution (git, build, test, deploy operations)" in section
     assert 'bash("npm test")' in section
diff --git a/backend/tests/test_subagent_skills_config.py b/backend/tests/test_subagent_skills_config.py
deleted file mode 100644
index f121ccf25..000000000
--- a/backend/tests/test_subagent_skills_config.py
+++ /dev/null
@@ -1,596 +0,0 @@
-"""Tests for subagent per-agent skill configuration and custom subagent types.
-
-Covers:
-- SubagentConfig.skills field
-- SubagentOverrideConfig.skills field
-- CustomSubagentConfig model validation
-- SubagentsAppConfig.custom_agents and get_skills_for()
-- Registry: custom agent lookup, skills override, merged available names
-- Skills filter passthrough in task_tool config assembly
-"""
-
-import pytest
-
-from deerflow.config.subagents_config import (
-    CustomSubagentConfig,
-    SubagentOverrideConfig,
-    SubagentsAppConfig,
-    get_subagents_app_config,
-    load_subagents_config_from_dict,
-)
-from deerflow.subagents.config import SubagentConfig
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
-
-
-def _reset_subagents_config(**kwargs) -> None:
-    """Reset global subagents config to a known state."""
-    load_subagents_config_from_dict(kwargs)
-
-
-# ---------------------------------------------------------------------------
-# SubagentConfig.skills field
-# ---------------------------------------------------------------------------
-
-
-class TestSubagentConfigSkills:
-    def test_default_skills_is_none(self):
-        config = SubagentConfig(name="test", description="test", system_prompt="test")
-        assert config.skills is None
-
-    def test_skills_whitelist(self):
-        config = SubagentConfig(
-            name="test",
-            description="test",
-            system_prompt="test",
-            skills=["data-analysis", "visualization"],
-        )
-        assert config.skills == ["data-analysis", "visualization"]
-
-    def test_skills_empty_list_means_no_skills(self):
-        config = SubagentConfig(
-            name="test",
-            description="test",
-            system_prompt="test",
-            skills=[],
-        )
-        assert config.skills == []
-
-
-# ---------------------------------------------------------------------------
-# SubagentOverrideConfig.skills field
-# ---------------------------------------------------------------------------
-
-
-class TestSubagentOverrideConfigSkills:
-    def test_default_skills_is_none(self):
-        override = SubagentOverrideConfig()
-        assert override.skills is None
-
-    def test_skills_whitelist(self):
-        override = SubagentOverrideConfig(skills=["web-search", "data-analysis"])
-        assert override.skills == ["web-search", "data-analysis"]
-
-    def test_skills_empty_list(self):
-        override = SubagentOverrideConfig(skills=[])
-        assert override.skills == []
-
-    def test_skills_coexists_with_other_fields(self):
-        override = SubagentOverrideConfig(
-            timeout_seconds=300,
-            model="gpt-5",
-            skills=["my-skill"],
-        )
-        assert override.timeout_seconds == 300
-        assert override.model == "gpt-5"
-        assert override.skills == ["my-skill"]
-
-
-# ---------------------------------------------------------------------------
-# CustomSubagentConfig model
-# ---------------------------------------------------------------------------
-
-
-class TestCustomSubagentConfig:
-    def test_minimal_valid(self):
-        config = CustomSubagentConfig(
-            description="A test agent",
-            system_prompt="You are a test agent.",
-        )
-        assert config.description == "A test agent"
-        assert config.system_prompt == "You are a test agent."
-        assert config.tools is None
-        assert config.disallowed_tools == ["task", "ask_clarification", "present_files"]
-        assert config.skills is None
-        assert config.model == "inherit"
-        assert config.max_turns == 50
-        assert config.timeout_seconds == 900
-
-    def test_full_configuration(self):
-        config = CustomSubagentConfig(
-            description="Data analysis specialist",
-            system_prompt="You are a data analysis subagent.",
-            tools=["bash", "read_file", "write_file"],
-            disallowed_tools=["task"],
-            skills=["data-analysis", "visualization"],
-            model="qwen3:32b",
-            max_turns=80,
-            timeout_seconds=600,
-        )
-        assert config.tools == ["bash", "read_file", "write_file"]
-        assert config.skills == ["data-analysis", "visualization"]
-        assert config.model == "qwen3:32b"
-        assert config.max_turns == 80
-        assert config.timeout_seconds == 600
-
-    def test_skills_empty_list_no_skills(self):
-        config = CustomSubagentConfig(
-            description="test",
-            system_prompt="test",
-            skills=[],
-        )
-        assert config.skills == []
-
-    def test_rejects_zero_max_turns(self):
-        with pytest.raises(ValueError):
-            CustomSubagentConfig(
-                description="test",
-                system_prompt="test",
-                max_turns=0,
-            )
-
-    def test_rejects_zero_timeout(self):
-        with pytest.raises(ValueError):
-            CustomSubagentConfig(
-                description="test",
-                system_prompt="test",
-                timeout_seconds=0,
-            )
-
-
-# ---------------------------------------------------------------------------
-# SubagentsAppConfig.custom_agents and get_skills_for()
-# ---------------------------------------------------------------------------
-
-
-class TestSubagentsAppConfigCustomAgents:
-    def test_default_custom_agents_empty(self):
-        config = SubagentsAppConfig()
-        assert config.custom_agents == {}
-
-    def test_custom_agents_loaded(self):
-        config = SubagentsAppConfig(
-            custom_agents={
-                "analysis": CustomSubagentConfig(
-                    description="Analysis agent",
-                    system_prompt="You analyze data.",
-                    skills=["data-analysis"],
-                ),
-            }
-        )
-        assert "analysis" in config.custom_agents
-        assert config.custom_agents["analysis"].skills == ["data-analysis"]
-
-    def test_multiple_custom_agents(self):
-        config = SubagentsAppConfig(
-            custom_agents={
-                "analysis": CustomSubagentConfig(
-                    description="Analysis",
-                    system_prompt="analyze",
-                    skills=["data-analysis"],
-                ),
-                "researcher": CustomSubagentConfig(
-                    description="Research",
-                    system_prompt="research",
-                    skills=["web-search"],
-                ),
-            }
-        )
-        assert len(config.custom_agents) == 2
-
-
-class TestGetSkillsFor:
-    def test_returns_none_when_no_override(self):
-        config = SubagentsAppConfig()
-        assert config.get_skills_for("general-purpose") is None
-        assert config.get_skills_for("unknown") is None
-
-    def test_returns_skills_whitelist(self):
-        config = SubagentsAppConfig(
-            agents={
-                "general-purpose": SubagentOverrideConfig(skills=["web-search", "coding"]),
-            }
-        )
-        assert config.get_skills_for("general-purpose") == ["web-search", "coding"]
-
-    def test_returns_empty_list_for_no_skills(self):
-        config = SubagentsAppConfig(
-            agents={
-                "bash": SubagentOverrideConfig(skills=[]),
-            }
-        )
-        assert config.get_skills_for("bash") == []
-
-    def test_returns_none_for_unrelated_agent(self):
-        config = SubagentsAppConfig(
-            agents={
-                "bash": SubagentOverrideConfig(skills=["web-search"]),
-            }
-        )
-        assert config.get_skills_for("general-purpose") is None
-
-    def test_returns_none_when_skills_not_set(self):
-        config = SubagentsAppConfig(
-            agents={
-                "bash": SubagentOverrideConfig(timeout_seconds=300),
-            }
-        )
-        assert config.get_skills_for("bash") is None
-
-
-# ---------------------------------------------------------------------------
-# load_subagents_config_from_dict with skills and custom_agents
-# ---------------------------------------------------------------------------
-
-
-class TestLoadSubagentsConfigWithSkills:
-    def teardown_method(self):
-        _reset_subagents_config()
-
-    def test_load_with_skills_override(self):
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {
-                    "general-purpose": {"skills": ["web-search", "data-analysis"]},
-                },
-            }
-        )
-        cfg = get_subagents_app_config()
-        assert cfg.get_skills_for("general-purpose") == ["web-search", "data-analysis"]
-
-    def test_load_with_empty_skills(self):
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {
-                    "bash": {"skills": []},
-                },
-            }
-        )
-        cfg = get_subagents_app_config()
-        assert cfg.get_skills_for("bash") == []
-
-    def test_load_with_custom_agents(self):
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "custom_agents": {
-                    "analysis": {
-                        "description": "Data analysis specialist",
-                        "system_prompt": "You are a data analysis subagent.",
-                        "skills": ["data-analysis", "visualization"],
-                        "tools": ["bash", "read_file"],
-                        "max_turns": 80,
-                        "timeout_seconds": 600,
-                    },
-                },
-            }
-        )
-        cfg = get_subagents_app_config()
-        assert "analysis" in cfg.custom_agents
-        custom = cfg.custom_agents["analysis"]
-        assert custom.skills == ["data-analysis", "visualization"]
-        assert custom.tools == ["bash", "read_file"]
-        assert custom.max_turns == 80
-        assert custom.timeout_seconds == 600
-
-    def test_load_with_both_overrides_and_custom(self):
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {
-                    "general-purpose": {"skills": ["web-search"]},
-                },
-                "custom_agents": {
-                    "analysis": {
-                        "description": "Analysis",
-                        "system_prompt": "Analyze.",
-                        "skills": ["data-analysis"],
-                    },
-                },
-            }
-        )
-        cfg = get_subagents_app_config()
-        assert cfg.get_skills_for("general-purpose") == ["web-search"]
-        assert cfg.custom_agents["analysis"].skills == ["data-analysis"]
-
-
-# ---------------------------------------------------------------------------
-# Registry: custom agent lookup
-# ---------------------------------------------------------------------------
-
-
-class TestRegistryCustomAgentLookup:
-    def teardown_method(self):
-        _reset_subagents_config()
-
-    def test_custom_agent_found(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "custom_agents": {
-                    "analysis": {
-                        "description": "Data analysis specialist",
-                        "system_prompt": "You are a data analysis subagent.",
-                        "skills": ["data-analysis"],
-                        "tools": ["bash", "read_file"],
-                        "max_turns": 80,
-                        "timeout_seconds": 600,
-                    },
-                },
-            }
-        )
-        config = get_subagent_config("analysis")
-        assert config is not None
-        assert config.name == "analysis"
-        assert config.skills == ["data-analysis"]
-        assert config.tools == ["bash", "read_file"]
-        assert config.max_turns == 80
-        assert config.timeout_seconds == 600
-        assert config.model == "inherit"
-
-    def test_custom_agent_not_found(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        _reset_subagents_config()
-        assert get_subagent_config("nonexistent") is None
-
-    def test_builtin_takes_priority_over_custom(self):
-        """If a custom agent has the same name as a builtin, builtin wins."""
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "custom_agents": {
-                    "general-purpose": {
-                        "description": "Custom override attempt",
-                        "system_prompt": "Should not be used",
-                    },
-                },
-            }
-        )
-        config = get_subagent_config("general-purpose")
-        # Should get the builtin description, not the custom one
-        assert config.description == BUILTIN_SUBAGENTS["general-purpose"].description
-
-    def test_custom_agent_with_override(self):
-        """Per-agent overrides also apply to custom agents."""
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "custom_agents": {
-                    "analysis": {
-                        "description": "Analysis",
-                        "system_prompt": "Analyze.",
-                        "timeout_seconds": 600,
-                    },
-                },
-                "agents": {
-                    "analysis": {"timeout_seconds": 300, "skills": ["overridden-skill"]},
-                },
-            }
-        )
-        config = get_subagent_config("analysis")
-        assert config is not None
-        assert config.timeout_seconds == 300  # Override applied
-        assert config.skills == ["overridden-skill"]  # Override applied
-
-
-# ---------------------------------------------------------------------------
-# Registry: skills override on builtin agents
-# ---------------------------------------------------------------------------
-
-
-class TestRegistrySkillsOverride:
-    def teardown_method(self):
-        _reset_subagents_config()
-
-    def test_skills_override_applied_to_builtin(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "agents": {
-                    "general-purpose": {"skills": ["web-search", "data-analysis"]},
-                },
-            }
-        )
-        config = get_subagent_config("general-purpose")
-        assert config.skills == ["web-search", "data-analysis"]
-
-    def test_empty_skills_override(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "agents": {
-                    "bash": {"skills": []},
-                },
-            }
-        )
-        config = get_subagent_config("bash")
-        assert config.skills == []
-
-    def test_no_skills_override_keeps_default(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        _reset_subagents_config()
-        config = get_subagent_config("general-purpose")
-        assert config.skills is None  # Default: inherit all
-
-    def test_skills_override_does_not_mutate_builtin(self):
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "agents": {
-                    "general-purpose": {"skills": ["web-search"]},
-                },
-            }
-        )
-        _ = get_subagent_config("general-purpose")
-        assert BUILTIN_SUBAGENTS["general-purpose"].skills is None
-
-
-# ---------------------------------------------------------------------------
-# Registry: get_available_subagent_names merges custom types
-# ---------------------------------------------------------------------------
-
-
-class TestRegistryAvailableNames:
-    def teardown_method(self):
-        _reset_subagents_config()
-
-    def test_includes_builtin_names(self):
-        from deerflow.subagents.registry import get_subagent_names
-
-        _reset_subagents_config()
-        names = get_subagent_names()
-        assert "general-purpose" in names
-        assert "bash" in names
-
-    def test_includes_custom_names(self):
-        from deerflow.subagents.registry import get_subagent_names
-
-        load_subagents_config_from_dict(
-            {
-                "custom_agents": {
-                    "analysis": {
-                        "description": "Analysis",
-                        "system_prompt": "Analyze.",
-                    },
-                    "researcher": {
-                        "description": "Research",
-                        "system_prompt": "Research.",
-                    },
-                },
-            }
-        )
-        names = get_subagent_names()
-        assert "general-purpose" in names
-        assert "bash" in names
-        assert "analysis" in names
-        assert "researcher" in names
-
-    def test_no_duplicates_when_custom_name_matches_builtin(self):
-        from deerflow.subagents.registry import get_subagent_names
-
-        load_subagents_config_from_dict(
-            {
-                "custom_agents": {
-                    "general-purpose": {
-                        "description": "Duplicate name",
-                        "system_prompt": "test",
-                    },
-                },
-            }
-        )
-        names = get_subagent_names()
-        assert names.count("general-purpose") == 1
-
-
-# ---------------------------------------------------------------------------
-# Registry: list_subagents includes custom agents
-# ---------------------------------------------------------------------------
-
-
-class TestRegistryListSubagentsWithCustom:
-    def teardown_method(self):
-        _reset_subagents_config()
-
-    def test_list_includes_custom_agents(self):
-        from deerflow.subagents.registry import list_subagents
-
-        load_subagents_config_from_dict(
-            {
-                "custom_agents": {
-                    "analysis": {
-                        "description": "Analysis",
-                        "system_prompt": "Analyze.",
-                        "skills": ["data-analysis"],
-                    },
-                },
-            }
-        )
-        configs = list_subagents()
-        names = {c.name for c in configs}
-        assert "general-purpose" in names
-        assert "bash" in names
-        assert "analysis" in names
-
-    def test_list_custom_agent_has_correct_skills(self):
-        from deerflow.subagents.registry import list_subagents
-
-        load_subagents_config_from_dict(
-            {
-                "custom_agents": {
-                    "analysis": {
-                        "description": "Analysis",
-                        "system_prompt": "Analyze.",
-                        "skills": ["data-analysis", "visualization"],
-                    },
-                },
-            }
-        )
-        by_name = {c.name: c for c in list_subagents()}
-        assert by_name["analysis"].skills == ["data-analysis", "visualization"]
-
-
-# ---------------------------------------------------------------------------
-# Skills filter passthrough: verify config.skills is used in task_tool assembly
-# ---------------------------------------------------------------------------
-
-
-class TestSkillsFilterPassthrough:
-    """Test that SubagentConfig.skills is correctly passed to get_skills_prompt_section."""
-
-    def test_none_skills_passes_none_to_prompt(self):
-        """When config.skills is None, available_skills=None should be passed (inherit all)."""
-        config = SubagentConfig(
-            name="test",
-            description="test",
-            system_prompt="test",
-            skills=None,
-        )
-        # Verify: set(None) would raise, so the code must check for None first
-        available = set(config.skills) if config.skills is not None else None
-        assert available is None
-
-    def test_empty_skills_passes_empty_set(self):
-        """When config.skills is [], available_skills=set() should be passed (no skills)."""
-        config = SubagentConfig(
-            name="test",
-            description="test",
-            system_prompt="test",
-            skills=[],
-        )
-        available = set(config.skills) if config.skills is not None else None
-        assert available == set()
-
-    def test_skills_whitelist_passes_correct_set(self):
-        """When config.skills has values, those should be passed as available_skills."""
-        config = SubagentConfig(
-            name="test",
-            description="test",
-            system_prompt="test",
-            skills=["data-analysis", "web-search"],
-        )
-        available = set(config.skills) if config.skills is not None else None
-        assert available == {"data-analysis", "web-search"}
diff --git a/backend/tests/test_subagent_timeout_config.py b/backend/tests/test_subagent_timeout_config.py
index b20bbe7a9..d3f695534 100644
--- a/backend/tests/test_subagent_timeout_config.py
+++ b/backend/tests/test_subagent_timeout_config.py
@@ -3,7 +3,7 @@
 Covers:
 - SubagentsAppConfig / SubagentOverrideConfig model validation and defaults
 - get_timeout_for() / get_max_turns_for() resolution logic
-- load_subagents_config_from_dict() and get_subagents_app_config() singleton
+- AppConfig.subagents field access
 - registry.get_subagent_config() applies config overrides
 - registry.list_subagents() applies overrides for all agents
 - Polling timeout calculation in task_tool is consistent with config
@@ -11,32 +11,28 @@ Covers:
 
 import pytest
 
+from deerflow.config.app_config import AppConfig
+from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.config.subagents_config import (
     SubagentOverrideConfig,
     SubagentsAppConfig,
-    get_subagents_app_config,
-    load_subagents_config_from_dict,
 )
-from deerflow.subagents.config import SubagentConfig
-
-# ---------------------------------------------------------------------------
-# Helpers
-# ---------------------------------------------------------------------------
 
 
-def _reset_subagents_config(
+def _make_config(
     timeout_seconds: int = 900,
     *,
     max_turns: int | None = None,
     agents: dict | None = None,
-) -> None:
-    """Reset global subagents config to a known state."""
-    load_subagents_config_from_dict(
-        {
-            "timeout_seconds": timeout_seconds,
-            "max_turns": max_turns,
-            "agents": agents or {},
-        }
+) -> AppConfig:
+    """Build an AppConfig with the given subagents settings."""
+    return AppConfig(
+        sandbox=SandboxConfig(use="test"),
+        subagents=SubagentsAppConfig(
+            timeout_seconds=timeout_seconds,
+            max_turns=max_turns,
+            agents={k: SubagentOverrideConfig(**v) for k, v in (agents or {}).items()},
+        ),
     )
 
 
@@ -50,523 +46,131 @@ class TestSubagentOverrideConfig:
         override = SubagentOverrideConfig()
         assert override.timeout_seconds is None
         assert override.max_turns is None
-        assert override.model is None
 
-    def test_explicit_value(self):
-        override = SubagentOverrideConfig(timeout_seconds=300, max_turns=42, model="gpt-5.4")
-        assert override.timeout_seconds == 300
-        assert override.max_turns == 42
-        assert override.model == "gpt-5.4"
-
-    def test_model_accepts_any_non_empty_string(self):
-        """Model name is a free-form non-empty string; cross-reference validation
-        against the `models:` section happens at registry lookup time."""
-        override = SubagentOverrideConfig(model="any-arbitrary-model-name")
-        assert override.model == "any-arbitrary-model-name"
-
-    def test_rejects_zero(self):
-        with pytest.raises(ValueError):
-            SubagentOverrideConfig(timeout_seconds=0)
-        with pytest.raises(ValueError):
-            SubagentOverrideConfig(max_turns=0)
-
-    def test_rejects_negative(self):
-        with pytest.raises(ValueError):
-            SubagentOverrideConfig(timeout_seconds=-1)
-        with pytest.raises(ValueError):
-            SubagentOverrideConfig(max_turns=-1)
-
-    def test_rejects_empty_model(self):
-        """Empty-string model would silently bypass the `is not None` check and
-        reach `create_chat_model(name="")` as a runtime error. Reject at load time
-        instead, symmetric with the `ge=1` guard on timeout_seconds / max_turns."""
-        with pytest.raises(ValueError):
-            SubagentOverrideConfig(model="")
-
-    def test_minimum_valid_value(self):
-        override = SubagentOverrideConfig(timeout_seconds=1, max_turns=1)
-        assert override.timeout_seconds == 1
-        assert override.max_turns == 1
-
-
-# ---------------------------------------------------------------------------
-# SubagentsAppConfig – defaults and validation
-# ---------------------------------------------------------------------------
-
-
-class TestSubagentsAppConfigDefaults:
-    def test_default_timeout(self):
-        config = SubagentsAppConfig()
-        assert config.timeout_seconds == 900
-
-    def test_default_max_turns_override_is_none(self):
-        config = SubagentsAppConfig()
-        assert config.max_turns is None
-
-    def test_default_agents_empty(self):
-        config = SubagentsAppConfig()
-        assert config.agents == {}
-
-    def test_custom_global_runtime_overrides(self):
-        config = SubagentsAppConfig(timeout_seconds=1800, max_turns=120)
-        assert config.timeout_seconds == 1800
-        assert config.max_turns == 120
-
-    def test_rejects_zero_timeout(self):
-        with pytest.raises(ValueError):
-            SubagentsAppConfig(timeout_seconds=0)
-        with pytest.raises(ValueError):
-            SubagentsAppConfig(max_turns=0)
+    def test_explicit_values(self):
+        override = SubagentOverrideConfig(timeout_seconds=120, max_turns=50)
+        assert override.timeout_seconds == 120
+        assert override.max_turns == 50
 
     def test_rejects_negative_timeout(self):
-        with pytest.raises(ValueError):
-            SubagentsAppConfig(timeout_seconds=-60)
-        with pytest.raises(ValueError):
-            SubagentsAppConfig(max_turns=-60)
+        with pytest.raises(Exception):
+            SubagentOverrideConfig(timeout_seconds=-1)
+
+    def test_rejects_zero_timeout(self):
+        with pytest.raises(Exception):
+            SubagentOverrideConfig(timeout_seconds=0)
 
 
 # ---------------------------------------------------------------------------
-# SubagentsAppConfig resolution helpers
+# SubagentsAppConfig model
 # ---------------------------------------------------------------------------
 
 
-class TestRuntimeResolution:
-    def test_returns_global_default_when_no_override(self):
+class TestSubagentsAppConfig:
+    def test_default_timeout_is_900(self):
+        config = SubagentsAppConfig()
+        assert config.timeout_seconds == 900
+        assert config.max_turns is None
+        assert config.agents == {}
+
+    def test_custom_defaults(self):
+        config = SubagentsAppConfig(timeout_seconds=300, max_turns=50)
+        assert config.timeout_seconds == 300
+        assert config.max_turns == 50
+
+
+# ---------------------------------------------------------------------------
+# get_timeout_for / get_max_turns_for
+# ---------------------------------------------------------------------------
+
+
+class TestTimeoutResolution:
+    def test_global_timeout_for_unknown_agent(self):
         config = SubagentsAppConfig(timeout_seconds=600)
+        assert config.get_timeout_for("unknown") == 600
+
+    def test_per_agent_timeout_overrides_global(self):
+        config = SubagentsAppConfig(
+            timeout_seconds=600,
+            agents={"bash": SubagentOverrideConfig(timeout_seconds=120)},
+        )
+        assert config.get_timeout_for("bash") == 120
         assert config.get_timeout_for("general-purpose") == 600
+
+    def test_per_agent_override_none_falls_back_to_global(self):
+        config = SubagentsAppConfig(
+            timeout_seconds=600,
+            agents={"bash": SubagentOverrideConfig(timeout_seconds=None)},
+        )
         assert config.get_timeout_for("bash") == 600
-        assert config.get_timeout_for("unknown-agent") == 600
-        assert config.get_max_turns_for("general-purpose", 100) == 100
+
+
+class TestMaxTurnsResolution:
+    def test_builtin_default_when_no_override(self):
+        config = SubagentsAppConfig()
         assert config.get_max_turns_for("bash", 60) == 60
 
-    def test_returns_per_agent_override_when_set(self):
-        config = SubagentsAppConfig(
-            timeout_seconds=900,
-            max_turns=120,
-            agents={"bash": SubagentOverrideConfig(timeout_seconds=300, max_turns=80)},
-        )
-        assert config.get_timeout_for("bash") == 300
-        assert config.get_max_turns_for("bash", 60) == 80
+    def test_global_max_turns_overrides_builtin(self):
+        config = SubagentsAppConfig(max_turns=100)
+        assert config.get_max_turns_for("bash", 60) == 100
 
-    def test_other_agents_still_use_global_default(self):
+    def test_per_agent_max_turns_overrides_global(self):
         config = SubagentsAppConfig(
-            timeout_seconds=900,
-            max_turns=140,
-            agents={"bash": SubagentOverrideConfig(timeout_seconds=300, max_turns=80)},
+            max_turns=100,
+            agents={"bash": SubagentOverrideConfig(max_turns=30)},
         )
-        assert config.get_timeout_for("general-purpose") == 900
-        assert config.get_max_turns_for("general-purpose", 100) == 140
+        assert config.get_max_turns_for("bash", 60) == 30
+        assert config.get_max_turns_for("general-purpose", 60) == 100
 
-    def test_agent_with_none_override_falls_back_to_global(self):
+    def test_per_agent_override_none_falls_back(self):
         config = SubagentsAppConfig(
-            timeout_seconds=900,
-            max_turns=150,
-            agents={"general-purpose": SubagentOverrideConfig(timeout_seconds=None, max_turns=None)},
+            max_turns=100,
+            agents={"bash": SubagentOverrideConfig(max_turns=None)},
         )
-        assert config.get_timeout_for("general-purpose") == 900
-        assert config.get_max_turns_for("general-purpose", 100) == 150
-
-    def test_multiple_per_agent_overrides(self):
-        config = SubagentsAppConfig(
-            timeout_seconds=900,
-            max_turns=120,
-            agents={
-                "general-purpose": SubagentOverrideConfig(timeout_seconds=1800, max_turns=200),
-                "bash": SubagentOverrideConfig(timeout_seconds=120, max_turns=80),
-            },
-        )
-        assert config.get_timeout_for("general-purpose") == 1800
-        assert config.get_timeout_for("bash") == 120
-        assert config.get_max_turns_for("general-purpose", 100) == 200
-        assert config.get_max_turns_for("bash", 60) == 80
-
-    def test_get_model_for_returns_none_when_no_override(self):
-        """No per-agent model override -> returns None so callers fall back to builtin/parent."""
-        config = SubagentsAppConfig(timeout_seconds=900)
-        assert config.get_model_for("general-purpose") is None
-        assert config.get_model_for("bash") is None
-        assert config.get_model_for("unknown-agent") is None
-
-    def test_get_model_for_returns_override_when_set(self):
-        config = SubagentsAppConfig(
-            timeout_seconds=900,
-            agents={
-                "general-purpose": SubagentOverrideConfig(model="qwen3.5-35b-a3b"),
-                "bash": SubagentOverrideConfig(model="gpt-5.4"),
-            },
-        )
-        assert config.get_model_for("general-purpose") == "qwen3.5-35b-a3b"
-        assert config.get_model_for("bash") == "gpt-5.4"
-
-    def test_get_model_for_returns_none_for_omitted_agent(self):
-        """An agent not listed in overrides returns None even when other agents have model overrides."""
-        config = SubagentsAppConfig(
-            timeout_seconds=900,
-            agents={"bash": SubagentOverrideConfig(model="gpt-5.4")},
-        )
-        assert config.get_model_for("general-purpose") is None
-
-    def test_get_model_for_handles_explicit_none(self):
-        """Explicit model=None in the override is equivalent to no override."""
-        config = SubagentsAppConfig(
-            timeout_seconds=900,
-            agents={"bash": SubagentOverrideConfig(timeout_seconds=300, model=None)},
-        )
-        assert config.get_model_for("bash") is None
-        # Timeout override is still applied even when model is None.
-        assert config.get_timeout_for("bash") == 300
+        assert config.get_max_turns_for("bash", 60) == 100
 
 
 # ---------------------------------------------------------------------------
-# load_subagents_config_from_dict / get_subagents_app_config singleton
+# AppConfig.subagents
 # ---------------------------------------------------------------------------
 
 
-class TestLoadSubagentsConfig:
-    def teardown_method(self):
-        """Restore defaults after each test."""
-        _reset_subagents_config()
-
+class TestAppConfigSubagents:
     def test_load_global_timeout(self):
-        load_subagents_config_from_dict({"timeout_seconds": 300, "max_turns": 120})
-        assert get_subagents_app_config().timeout_seconds == 300
-        assert get_subagents_app_config().max_turns == 120
+        cfg = _make_config(timeout_seconds=300, max_turns=120)
+        sub = cfg.subagents
+        assert sub.timeout_seconds == 300
+        assert sub.max_turns == 120
 
     def test_load_with_per_agent_overrides(self):
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "max_turns": 120,
-                "agents": {
-                    "general-purpose": {"timeout_seconds": 1800, "max_turns": 200},
-                    "bash": {"timeout_seconds": 60, "max_turns": 80},
-                },
-            }
+        cfg = _make_config(
+            timeout_seconds=900,
+            max_turns=120,
+            agents={
+                "general-purpose": {"timeout_seconds": 1800, "max_turns": 200},
+                "bash": {"timeout_seconds": 60, "max_turns": 80},
+            },
         )
-        cfg = get_subagents_app_config()
-        assert cfg.get_timeout_for("general-purpose") == 1800
-        assert cfg.get_timeout_for("bash") == 60
-        assert cfg.get_max_turns_for("general-purpose", 100) == 200
-        assert cfg.get_max_turns_for("bash", 60) == 80
+        sub = cfg.subagents
+        assert sub.get_timeout_for("general-purpose") == 1800
+        assert sub.get_timeout_for("bash") == 60
+        assert sub.get_max_turns_for("general-purpose", 100) == 200
+        assert sub.get_max_turns_for("bash", 60) == 80
 
     def test_load_partial_override(self):
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 600,
-                "agents": {"bash": {"timeout_seconds": 120, "max_turns": 70}},
-            }
+        cfg = _make_config(
+            timeout_seconds=600,
+            agents={"bash": {"timeout_seconds": 120, "max_turns": 70}},
         )
-        cfg = get_subagents_app_config()
-        assert cfg.get_timeout_for("general-purpose") == 600
-        assert cfg.get_timeout_for("bash") == 120
-        assert cfg.get_max_turns_for("general-purpose", 100) == 100
-        assert cfg.get_max_turns_for("bash", 60) == 70
+        sub = cfg.subagents
+        assert sub.get_timeout_for("general-purpose") == 600
+        assert sub.get_timeout_for("bash") == 120
+        assert sub.get_max_turns_for("general-purpose", 100) == 100
+        assert sub.get_max_turns_for("bash", 60) == 70
 
-    def test_load_with_model_overrides(self):
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {
-                    "general-purpose": {"model": "qwen3.5-35b-a3b"},
-                    "bash": {"model": "gpt-5.4", "timeout_seconds": 300},
-                },
-            }
-        )
-        cfg = get_subagents_app_config()
-        assert cfg.get_model_for("general-purpose") == "qwen3.5-35b-a3b"
-        assert cfg.get_model_for("bash") == "gpt-5.4"
-        # Other override fields on the same agent must still load correctly.
-        assert cfg.get_timeout_for("bash") == 300
-
-    def test_load_empty_dict_uses_defaults(self):
-        load_subagents_config_from_dict({})
-        cfg = get_subagents_app_config()
-        assert cfg.timeout_seconds == 900
-        assert cfg.max_turns is None
-        assert cfg.agents == {}
-
-    def test_load_replaces_previous_config(self):
-        load_subagents_config_from_dict({"timeout_seconds": 100, "max_turns": 90})
-        assert get_subagents_app_config().timeout_seconds == 100
-        assert get_subagents_app_config().max_turns == 90
-
-        load_subagents_config_from_dict({"timeout_seconds": 200, "max_turns": 110})
-        assert get_subagents_app_config().timeout_seconds == 200
-        assert get_subagents_app_config().max_turns == 110
-
-    def test_singleton_returns_same_instance_between_calls(self):
-        load_subagents_config_from_dict({"timeout_seconds": 777, "max_turns": 123})
-        assert get_subagents_app_config() is get_subagents_app_config()
-
-
-# ---------------------------------------------------------------------------
-# registry.get_subagent_config – runtime overrides applied
-# ---------------------------------------------------------------------------
-
-
-class TestRegistryGetSubagentConfig:
-    def teardown_method(self):
-        _reset_subagents_config()
-
-    def test_returns_none_for_unknown_agent(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        assert get_subagent_config("nonexistent") is None
-
-    def test_returns_config_for_builtin_agents(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        assert get_subagent_config("general-purpose") is not None
-        assert get_subagent_config("bash") is not None
-
-    def test_default_timeout_preserved_when_no_config(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        _reset_subagents_config(timeout_seconds=900)
-        config = get_subagent_config("general-purpose")
-        assert config.timeout_seconds == 900
-        assert config.max_turns == 100
-
-    def test_global_timeout_override_applied(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        _reset_subagents_config(timeout_seconds=1800, max_turns=140)
-        config = get_subagent_config("general-purpose")
-        assert config.timeout_seconds == 1800
-        assert config.max_turns == 140
-
-    def test_per_agent_runtime_override_applied(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "max_turns": 120,
-                "agents": {"bash": {"timeout_seconds": 120, "max_turns": 80}},
-            }
-        )
-        bash_config = get_subagent_config("bash")
-        assert bash_config.timeout_seconds == 120
-        assert bash_config.max_turns == 80
-
-    def test_per_agent_override_does_not_affect_other_agents(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "max_turns": 120,
-                "agents": {"bash": {"timeout_seconds": 120, "max_turns": 80}},
-            }
-        )
-        gp_config = get_subagent_config("general-purpose")
-        assert gp_config.timeout_seconds == 900
-        assert gp_config.max_turns == 120
-
-    def test_per_agent_model_override_applied(self):
-        from deerflow.subagents.registry import get_subagent_config
-
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {"bash": {"model": "gpt-5.4-mini"}},
-            }
-        )
-        bash_config = get_subagent_config("bash")
-        assert bash_config.model == "gpt-5.4-mini"
-
-    def test_omitted_model_keeps_builtin_value(self):
-        """When config.yaml has no `model` field for an agent, the builtin default must be preserved."""
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        builtin_bash_model = BUILTIN_SUBAGENTS["bash"].model
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {"bash": {"timeout_seconds": 300}},
-            }
-        )
-        bash_config = get_subagent_config("bash")
-        assert bash_config.model == builtin_bash_model
-
-    def test_explicit_null_model_keeps_builtin_value(self):
-        """An explicit `model: null` in config.yaml is equivalent to omission — builtin wins."""
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        builtin_bash_model = BUILTIN_SUBAGENTS["bash"].model
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {"bash": {"model": None}},
-            }
-        )
-        bash_config = get_subagent_config("bash")
-        assert bash_config.model == builtin_bash_model
-
-    def test_model_override_does_not_affect_other_agents(self):
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        builtin_gp_model = BUILTIN_SUBAGENTS["general-purpose"].model
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {"bash": {"model": "gpt-5.4"}},
-            }
-        )
-        gp_config = get_subagent_config("general-purpose")
-        assert gp_config.model == builtin_gp_model
-
-    def test_model_override_preserves_other_fields(self):
-        """Applying a model override must leave timeout_seconds / max_turns / name intact."""
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        original = BUILTIN_SUBAGENTS["bash"]
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {"bash": {"model": "gpt-5.4-mini"}},
-            }
-        )
-        overridden = get_subagent_config("bash")
-        assert overridden.model == "gpt-5.4-mini"
-        assert overridden.name == original.name
-        assert overridden.description == original.description
-        # No timeout / max_turns override was set, so they use global default / builtin.
-        assert overridden.timeout_seconds == 900
-        assert overridden.max_turns == original.max_turns
-
-    def test_model_override_does_not_mutate_builtin(self):
-        """Registry must return a new object, leaving the builtin default intact."""
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        original_bash_model = BUILTIN_SUBAGENTS["bash"].model
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "agents": {"bash": {"model": "gpt-5.4-mini"}},
-            }
-        )
-        _ = get_subagent_config("bash")
-        assert BUILTIN_SUBAGENTS["bash"].model == original_bash_model
-
-    def test_builtin_config_object_is_not_mutated(self):
-        """Registry must return a new object, leaving the builtin default intact."""
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        original_timeout = BUILTIN_SUBAGENTS["bash"].timeout_seconds
-        original_max_turns = BUILTIN_SUBAGENTS["bash"].max_turns
-        load_subagents_config_from_dict({"timeout_seconds": 42, "max_turns": 88})
-
-        returned = get_subagent_config("bash")
-        assert returned.timeout_seconds == 42
-        assert returned.max_turns == 88
-        assert BUILTIN_SUBAGENTS["bash"].timeout_seconds == original_timeout
-        assert BUILTIN_SUBAGENTS["bash"].max_turns == original_max_turns
-
-    def test_config_preserves_other_fields(self):
-        """Applying runtime overrides must not change other SubagentConfig fields."""
-        from deerflow.subagents.builtins import BUILTIN_SUBAGENTS
-        from deerflow.subagents.registry import get_subagent_config
-
-        _reset_subagents_config(timeout_seconds=300, max_turns=140)
-        original = BUILTIN_SUBAGENTS["general-purpose"]
-        overridden = get_subagent_config("general-purpose")
-
-        assert overridden.name == original.name
-        assert overridden.description == original.description
-        assert overridden.max_turns == 140
-        assert overridden.model == original.model
-        assert overridden.tools == original.tools
-        assert overridden.disallowed_tools == original.disallowed_tools
-
-
-# ---------------------------------------------------------------------------
-# registry.list_subagents – all agents get overrides
-# ---------------------------------------------------------------------------
-
-
-class TestRegistryListSubagents:
-    def teardown_method(self):
-        _reset_subagents_config()
-
-    def test_lists_both_builtin_agents(self):
-        from deerflow.subagents.registry import list_subagents
-
-        names = {cfg.name for cfg in list_subagents()}
-        assert "general-purpose" in names
-        assert "bash" in names
-
-    def test_all_returned_configs_get_global_override(self):
-        from deerflow.subagents.registry import list_subagents
-
-        _reset_subagents_config(timeout_seconds=123, max_turns=77)
-        for cfg in list_subagents():
-            assert cfg.timeout_seconds == 123, f"{cfg.name} has wrong timeout"
-            assert cfg.max_turns == 77, f"{cfg.name} has wrong max_turns"
-
-    def test_per_agent_overrides_reflected_in_list(self):
-        from deerflow.subagents.registry import list_subagents
-
-        load_subagents_config_from_dict(
-            {
-                "timeout_seconds": 900,
-                "max_turns": 120,
-                "agents": {
-                    "general-purpose": {"timeout_seconds": 1800, "max_turns": 200},
-                    "bash": {"timeout_seconds": 60, "max_turns": 80},
-                },
-            }
-        )
-        by_name = {cfg.name: cfg for cfg in list_subagents()}
-        assert by_name["general-purpose"].timeout_seconds == 1800
-        assert by_name["bash"].timeout_seconds == 60
-        assert by_name["general-purpose"].max_turns == 200
-        assert by_name["bash"].max_turns == 80
-
-
-# ---------------------------------------------------------------------------
-# Polling timeout calculation (logic extracted from task_tool)
-# ---------------------------------------------------------------------------
-
-
-class TestPollingTimeoutCalculation:
-    """Verify the formula (timeout_seconds + 60) // 5 is correct for various inputs."""
-
-    @pytest.mark.parametrize(
-        "timeout_seconds, expected_max_polls",
-        [
-            (900, 192),  # default 15 min → (900+60)//5 = 192
-            (300, 72),  # 5 min → (300+60)//5 = 72
-            (1800, 372),  # 30 min → (1800+60)//5 = 372
-            (60, 24),  # 1 min → (60+60)//5 = 24
-            (1, 12),  # minimum → (1+60)//5 = 12
-        ],
-    )
-    def test_polling_timeout_formula(self, timeout_seconds: int, expected_max_polls: int):
-        dummy_config = SubagentConfig(
-            name="test",
-            description="test",
-            system_prompt="test",
-            timeout_seconds=timeout_seconds,
-        )
-        max_poll_count = (dummy_config.timeout_seconds + 60) // 5
-        assert max_poll_count == expected_max_polls
-
-    def test_polling_timeout_exceeds_execution_timeout(self):
-        """Safety-net polling window must always be longer than the execution timeout."""
-        for timeout_seconds in [60, 300, 900, 1800]:
-            dummy_config = SubagentConfig(
-                name="test",
-                description="test",
-                system_prompt="test",
-                timeout_seconds=timeout_seconds,
-            )
-            max_poll_count = (dummy_config.timeout_seconds + 60) // 5
-            polling_window_seconds = max_poll_count * 5
-            assert polling_window_seconds > timeout_seconds
+    def test_load_empty_uses_defaults(self):
+        cfg = _make_config()
+        sub = cfg.subagents
+        assert sub.timeout_seconds == 900
+        assert sub.max_turns is None
+        assert sub.agents == {}
diff --git a/backend/tests/test_suggestions_router.py b/backend/tests/test_suggestions_router.py
index 0e70b45d6..d662c3751 100644
--- a/backend/tests/test_suggestions_router.py
+++ b/backend/tests/test_suggestions_router.py
@@ -46,7 +46,9 @@ def test_generate_suggestions_parses_and_limits(monkeypatch):
     fake_model.ainvoke = AsyncMock(return_value=MagicMock(content='```json\n["Q1", "Q2", "Q3", "Q4"]\n```'))
     monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
 
-    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+    # Bypass the require_permission decorator (which needs request +
+    # thread_store) — these tests cover the parsing logic.
+    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))
 
     assert result.suggestions == ["Q1", "Q2", "Q3"]
     fake_model.ainvoke.assert_awaited_once()
@@ -66,7 +68,9 @@ def test_generate_suggestions_parses_list_block_content(monkeypatch):
     fake_model.ainvoke = AsyncMock(return_value=MagicMock(content=[{"type": "text", "text": '```json\n["Q1", "Q2"]\n```'}]))
     monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
 
-    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+    # Bypass the require_permission decorator (which needs request +
+    # thread_store) — these tests cover the parsing logic.
+    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))
 
     assert result.suggestions == ["Q1", "Q2"]
     fake_model.ainvoke.assert_awaited_once()
@@ -86,7 +90,9 @@ def test_generate_suggestions_parses_output_text_block_content(monkeypatch):
     fake_model.ainvoke = AsyncMock(return_value=MagicMock(content=[{"type": "output_text", "text": '```json\n["Q1", "Q2"]\n```'}]))
     monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
 
-    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+    # Bypass the require_permission decorator (which needs request +
+    # thread_store) — these tests cover the parsing logic.
+    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))
 
     assert result.suggestions == ["Q1", "Q2"]
     fake_model.ainvoke.assert_awaited_once()
@@ -103,6 +109,8 @@ def test_generate_suggestions_returns_empty_on_model_error(monkeypatch):
     fake_model.ainvoke = AsyncMock(side_effect=RuntimeError("boom"))
     monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
 
-    result = asyncio.run(suggestions.generate_suggestions("t1", req))
+    # Bypass the require_permission decorator (which needs request +
+    # thread_store) — these tests cover the parsing logic.
+    result = asyncio.run(suggestions.generate_suggestions.__wrapped__("t1", req, request=None))
 
     assert result.suggestions == []
diff --git a/backend/tests/test_task_tool_core_logic.py b/backend/tests/test_task_tool_core_logic.py
index 1ae008df2..2c1d0d9d0 100644
--- a/backend/tests/test_task_tool_core_logic.py
+++ b/backend/tests/test_task_tool_core_logic.py
@@ -8,6 +8,9 @@ from unittest.mock import MagicMock
 
 import pytest
 
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.sandbox_config import SandboxConfig
 from deerflow.subagents.config import SubagentConfig
 
 # Use module import so tests can patch the exact symbols referenced inside task_tool().
@@ -24,6 +27,13 @@ class FakeSubagentStatus(Enum):
     TIMED_OUT = "timed_out"
 
 
+def _make_context(thread_id: str) -> DeerFlowContext:
+    return DeerFlowContext(
+        app_config=AppConfig(sandbox=SandboxConfig(use="test")),
+        thread_id=thread_id,
+    )
+
+
 def _make_runtime() -> SimpleNamespace:
     # Minimal ToolRuntime-like object; task_tool only reads these three attributes.
     return SimpleNamespace(
@@ -35,7 +45,7 @@ def _make_runtime() -> SimpleNamespace:
                 "outputs_path": "/tmp/outputs",
             },
         },
-        context={"thread_id": "thread-1"},
+        context=_make_context("thread-1"),
         config={"metadata": {"model_name": "ark-model", "trace_id": "trace-1"}},
     )
 
@@ -83,11 +93,11 @@ class _DummyScheduledTask:
 
 
 def test_task_tool_returns_error_for_unknown_subagent(monkeypatch):
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: None)
-    monkeypatch.setattr(task_tool_module, "get_available_subagent_names", lambda: ["general-purpose"])
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: None)
+    monkeypatch.setattr(task_tool_module, "get_available_subagent_names", lambda *a, **k: ["general-purpose"])
 
     result = _run_task_tool(
-        runtime=None,
+        runtime=_make_runtime(),
         description="执行任务",
         prompt="do work",
         subagent_type="general-purpose",
@@ -98,8 +108,8 @@ def test_task_tool_returns_error_for_unknown_subagent(monkeypatch):
 
 
 def test_task_tool_rejects_bash_subagent_when_host_bash_disabled(monkeypatch):
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: _make_subagent_config())
-    monkeypatch.setattr(task_tool_module, "is_host_bash_allowed", lambda: False)
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: _make_subagent_config())
+    monkeypatch.setattr(task_tool_module, "is_host_bash_allowed", lambda *a, **k: False)
 
     result = _run_task_tool(
         runtime=_make_runtime(),
@@ -142,9 +152,9 @@ def test_task_tool_emits_running_and_completed_events(monkeypatch):
 
     monkeypatch.setattr(task_tool_module, "SubagentStatus", FakeSubagentStatus)
     monkeypatch.setattr(task_tool_module, "SubagentExecutor", DummyExecutor)
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
-    monkeypatch.setattr(task_tool_module, "get_background_task_result", lambda _: next(responses))
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "Skills Appendix")
+    monkeypatch.setattr(task_tool_module, "get_background_task_result", lambda *a, **k: next(responses))
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
     # task_tool lazily imports from deerflow.tools at call time, so patch that module-level function.
@@ -165,225 +175,18 @@ def test_task_tool_emits_running_and_completed_events(monkeypatch):
     assert captured["executor_kwargs"]["thread_id"] == "thread-1"
     assert captured["executor_kwargs"]["parent_model"] == "ark-model"
     assert captured["executor_kwargs"]["config"].max_turns == 7
-    # Skills are no longer appended to system_prompt; they are loaded per-session
-    # by SubagentExecutor and injected as conversation items (Codex pattern).
-    assert captured["executor_kwargs"]["config"].system_prompt == "Base system prompt"
+    assert "Skills Appendix" in captured["executor_kwargs"]["config"].system_prompt
 
-    get_available_tools.assert_called_once_with(model_name="ark-model", groups=None, subagent_enabled=False)
+    from unittest.mock import ANY
+
+    get_available_tools.assert_called_once_with(model_name="ark-model", subagent_enabled=False, app_config=ANY)
 
     event_types = [e["type"] for e in events]
     assert event_types == ["task_started", "task_running", "task_running", "task_completed"]
     assert events[-1]["result"] == "all done"
 
 
-def test_task_tool_propagates_tool_groups_to_subagent(monkeypatch):
-    """Verify tool_groups from parent metadata are passed to get_available_tools(groups=...)."""
-    config = _make_subagent_config()
-    parent_tool_groups = ["file:read", "file:write", "bash"]
-    runtime = SimpleNamespace(
-        state={
-            "sandbox": {"sandbox_id": "local"},
-            "thread_data": {"workspace_path": "/tmp/workspace"},
-        },
-        context={"thread_id": "thread-1"},
-        config={"metadata": {"model_name": "ark-model", "trace_id": "trace-1", "tool_groups": parent_tool_groups}},
-    )
-    events = []
-    get_available_tools = MagicMock(return_value=["tool-a"])
-
-    class DummyExecutor:
-        def __init__(self, **kwargs):
-            pass
-
-        def execute_async(self, prompt, task_id=None):
-            return task_id or "generated-task-id"
-
-    monkeypatch.setattr(task_tool_module, "SubagentStatus", FakeSubagentStatus)
-    monkeypatch.setattr(task_tool_module, "SubagentExecutor", DummyExecutor)
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-    monkeypatch.setattr(
-        task_tool_module,
-        "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.COMPLETED, result="done"),
-    )
-    monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
-    monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
-    monkeypatch.setattr("deerflow.tools.get_available_tools", get_available_tools)
-
-    output = _run_task_tool(
-        runtime=runtime,
-        description="执行任务",
-        prompt="file work only",
-        subagent_type="general-purpose",
-        tool_call_id="tc-groups",
-    )
-
-    assert output == "Task Succeeded. Result: done"
-    # The key assertion: groups should be propagated from parent metadata
-    get_available_tools.assert_called_once_with(model_name="ark-model", groups=parent_tool_groups, subagent_enabled=False)
-
-
-def test_task_tool_inherits_parent_skill_allowlist_for_default_subagent(monkeypatch):
-    config = _make_subagent_config()
-    runtime = _make_runtime()
-    runtime.config["metadata"]["available_skills"] = ["safe-skill"]
-    events = []
-    captured = {}
-
-    class DummyExecutor:
-        def __init__(self, **kwargs):
-            captured["config"] = kwargs["config"]
-
-        def execute_async(self, prompt, task_id=None):
-            return task_id or "generated-task-id"
-
-    monkeypatch.setattr(task_tool_module, "SubagentStatus", FakeSubagentStatus)
-    monkeypatch.setattr(task_tool_module, "SubagentExecutor", DummyExecutor)
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-    monkeypatch.setattr(
-        task_tool_module,
-        "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.COMPLETED, result="done"),
-    )
-    monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
-    monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
-    monkeypatch.setattr("deerflow.tools.get_available_tools", MagicMock(return_value=[]))
-
-    output = _run_task_tool(
-        runtime=runtime,
-        description="执行任务",
-        prompt="use skills",
-        subagent_type="general-purpose",
-        tool_call_id="tc-skills",
-    )
-
-    assert output == "Task Succeeded. Result: done"
-    assert captured["config"].skills == ["safe-skill"]
-
-
-def test_task_tool_intersects_parent_and_subagent_skill_allowlists(monkeypatch):
-    config = _make_subagent_config()
-    config = SubagentConfig(
-        name=config.name,
-        description=config.description,
-        system_prompt=config.system_prompt,
-        max_turns=config.max_turns,
-        timeout_seconds=config.timeout_seconds,
-        skills=["safe-skill", "other-skill"],
-    )
-    runtime = _make_runtime()
-    runtime.config["metadata"]["available_skills"] = ["safe-skill"]
-    events = []
-    captured = {}
-
-    class DummyExecutor:
-        def __init__(self, **kwargs):
-            captured["config"] = kwargs["config"]
-
-        def execute_async(self, prompt, task_id=None):
-            return task_id or "generated-task-id"
-
-    monkeypatch.setattr(task_tool_module, "SubagentStatus", FakeSubagentStatus)
-    monkeypatch.setattr(task_tool_module, "SubagentExecutor", DummyExecutor)
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-    monkeypatch.setattr(
-        task_tool_module,
-        "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.COMPLETED, result="done"),
-    )
-    monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
-    monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
-    monkeypatch.setattr("deerflow.tools.get_available_tools", MagicMock(return_value=[]))
-
-    output = _run_task_tool(
-        runtime=runtime,
-        description="执行任务",
-        prompt="use skills",
-        subagent_type="general-purpose",
-        tool_call_id="tc-skills-intersection",
-    )
-
-    assert output == "Task Succeeded. Result: done"
-    assert captured["config"].skills == ["safe-skill"]
-
-
-def test_task_tool_no_tool_groups_passes_none(monkeypatch):
-    """Verify that when metadata has no tool_groups, groups=None is passed (backward compat)."""
-    config = _make_subagent_config()
-    # Default _make_runtime() has no tool_groups in metadata
-    runtime = _make_runtime()
-    events = []
-    get_available_tools = MagicMock(return_value=[])
-
-    class DummyExecutor:
-        def __init__(self, **kwargs):
-            pass
-
-        def execute_async(self, prompt, task_id=None):
-            return task_id or "generated-task-id"
-
-    monkeypatch.setattr(task_tool_module, "SubagentStatus", FakeSubagentStatus)
-    monkeypatch.setattr(task_tool_module, "SubagentExecutor", DummyExecutor)
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-    monkeypatch.setattr(
-        task_tool_module,
-        "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.COMPLETED, result="ok"),
-    )
-    monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
-    monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
-    monkeypatch.setattr("deerflow.tools.get_available_tools", get_available_tools)
-
-    output = _run_task_tool(
-        runtime=runtime,
-        description="执行任务",
-        prompt="normal work",
-        subagent_type="general-purpose",
-        tool_call_id="tc-no-groups",
-    )
-
-    assert output == "Task Succeeded. Result: ok"
-    # No tool_groups in metadata → groups=None (default behavior preserved)
-    get_available_tools.assert_called_once_with(model_name="ark-model", groups=None, subagent_enabled=False)
-
-
-def test_task_tool_runtime_none_passes_groups_none(monkeypatch):
-    """Verify that when runtime is None, groups=None is passed (e.g., unknown subagent path exits early, but tools still load correctly)."""
-    config = _make_subagent_config()
-    events = []
-    get_available_tools = MagicMock(return_value=[])
-
-    class DummyExecutor:
-        def __init__(self, **kwargs):
-            pass
-
-        def execute_async(self, prompt, task_id=None):
-            return task_id or "generated-task-id"
-
-    monkeypatch.setattr(task_tool_module, "SubagentStatus", FakeSubagentStatus)
-    monkeypatch.setattr(task_tool_module, "SubagentExecutor", DummyExecutor)
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-    monkeypatch.setattr(
-        task_tool_module,
-        "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.COMPLETED, result="ok"),
-    )
-    monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
-    monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
-    monkeypatch.setattr("deerflow.tools.get_available_tools", get_available_tools)
-
-    output = _run_task_tool(
-        runtime=None,
-        description="执行任务",
-        prompt="no runtime",
-        subagent_type="general-purpose",
-        tool_call_id="tc-no-runtime",
-    )
-
-    assert output == "Task Succeeded. Result: ok"
-    # runtime is None → metadata is empty dict → groups=None
-    get_available_tools.assert_called_once_with(model_name=None, groups=None, subagent_enabled=False)
-
+def test_task_tool_returns_failed_message(monkeypatch):
     config = _make_subagent_config()
     events = []
 
@@ -393,12 +196,12 @@ def test_task_tool_runtime_none_passes_groups_none(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.FAILED, error="subagent crashed"),
+        lambda *a, **k: _make_result(FakeSubagentStatus.FAILED, error="subagent crashed"),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
@@ -427,12 +230,12 @@ def test_task_tool_returns_timed_out_message(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.TIMED_OUT, error="timeout"),
+        lambda *a, **k: _make_result(FakeSubagentStatus.TIMED_OUT, error="timeout"),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
@@ -463,12 +266,12 @@ def test_task_tool_polling_safety_timeout(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
+        lambda *a, **k: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
@@ -499,12 +302,12 @@ def test_cleanup_called_on_completed(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.COMPLETED, result="done"),
+        lambda *a, **k: _make_result(FakeSubagentStatus.COMPLETED, result="done"),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
@@ -539,12 +342,12 @@ def test_cleanup_called_on_failed(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.FAILED, error="error"),
+        lambda *a, **k: _make_result(FakeSubagentStatus.FAILED, error="error"),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
@@ -579,12 +382,12 @@ def test_cleanup_called_on_timed_out(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.TIMED_OUT, error="timeout"),
+        lambda *a, **k: _make_result(FakeSubagentStatus.TIMED_OUT, error="timeout"),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
@@ -626,12 +429,12 @@ def test_cleanup_not_called_on_polling_safety_timeout(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
+        lambda *a, **k: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
@@ -679,8 +482,8 @@ def test_cleanup_scheduled_on_cancellation(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(task_tool_module, "get_background_task_result", get_result)
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", cancel_on_first_sleep)
@@ -730,12 +533,12 @@ def test_cancelled_cleanup_stops_after_timeout(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
+        lambda *a, **k: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", cancel_on_first_sleep)
@@ -785,12 +588,12 @@ def test_cancellation_calls_request_cancel(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
     monkeypatch.setattr(
         task_tool_module,
         "get_background_task_result",
-        lambda _: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
+        lambda *a, **k: _make_result(FakeSubagentStatus.RUNNING, ai_messages=[]),
     )
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", cancel_on_first_sleep)
@@ -843,9 +646,9 @@ def test_task_tool_returns_cancelled_message(monkeypatch):
         "SubagentExecutor",
         type("DummyExecutor", (), {"__init__": lambda self, **kwargs: None, "execute_async": lambda self, prompt, task_id=None: task_id}),
     )
-    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda _: config)
-
-    monkeypatch.setattr(task_tool_module, "get_background_task_result", lambda _: next(responses))
+    monkeypatch.setattr(task_tool_module, "get_subagent_config", lambda *a, **k: config)
+    monkeypatch.setattr(task_tool_module, "get_skills_prompt_section", lambda _cfg: "")
+    monkeypatch.setattr(task_tool_module, "get_background_task_result", lambda *a, **k: next(responses))
     monkeypatch.setattr(task_tool_module, "get_stream_writer", lambda: events.append)
     monkeypatch.setattr(task_tool_module.asyncio, "sleep", _no_sleep)
     monkeypatch.setattr("deerflow.tools.get_available_tools", lambda **kwargs: [])
diff --git a/backend/tests/test_thread_data_middleware.py b/backend/tests/test_thread_data_middleware.py
index ef3e440f7..4cc289b2d 100644
--- a/backend/tests/test_thread_data_middleware.py
+++ b/backend/tests/test_thread_data_middleware.py
@@ -1,58 +1,55 @@
 import pytest
-from langgraph.runtime import Runtime
 
 from deerflow.agents.middlewares.thread_data_middleware import ThreadDataMiddleware
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.sandbox_config import SandboxConfig
 
 
 def _as_posix(path: str) -> str:
     return path.replace("\\", "/")
 
 
+def _make_context(thread_id: str) -> DeerFlowContext:
+    return DeerFlowContext(
+        app_config=AppConfig(sandbox=SandboxConfig(use="test")),
+        thread_id=thread_id,
+    )
+
+
 class TestThreadDataMiddleware:
     def test_before_agent_returns_paths_when_thread_id_present_in_context(self, tmp_path):
         middleware = ThreadDataMiddleware(base_dir=str(tmp_path), lazy_init=True)
+        from langgraph.runtime import Runtime
 
-        result = middleware.before_agent(state={}, runtime=Runtime(context={"thread_id": "thread-123"}))
+        result = middleware.before_agent(state={}, runtime=Runtime(context=_make_context("thread-123")))
 
         assert result is not None
         assert _as_posix(result["thread_data"]["workspace_path"]).endswith("threads/thread-123/user-data/workspace")
         assert _as_posix(result["thread_data"]["uploads_path"]).endswith("threads/thread-123/user-data/uploads")
         assert _as_posix(result["thread_data"]["outputs_path"]).endswith("threads/thread-123/user-data/outputs")
 
-    def test_before_agent_uses_thread_id_from_configurable_when_context_is_none(self, tmp_path, monkeypatch):
+    def test_before_agent_uses_thread_id_from_context(self, tmp_path):
         middleware = ThreadDataMiddleware(base_dir=str(tmp_path), lazy_init=True)
-        runtime = Runtime(context=None)
-        monkeypatch.setattr(
-            "deerflow.agents.middlewares.thread_data_middleware.get_config",
-            lambda: {"configurable": {"thread_id": "thread-from-config"}},
-        )
+        from langgraph.runtime import Runtime
 
-        result = middleware.before_agent(state={}, runtime=runtime)
+        result = middleware.before_agent(state={}, runtime=Runtime(context=_make_context("thread-from-config")))
 
         assert result is not None
         assert _as_posix(result["thread_data"]["workspace_path"]).endswith("threads/thread-from-config/user-data/workspace")
-        assert runtime.context is None
 
-    def test_before_agent_uses_thread_id_from_configurable_when_context_missing_thread_id(self, tmp_path, monkeypatch):
+    def test_before_agent_uses_thread_id_from_typed_context(self, tmp_path):
         middleware = ThreadDataMiddleware(base_dir=str(tmp_path), lazy_init=True)
-        runtime = Runtime(context={})
-        monkeypatch.setattr(
-            "deerflow.agents.middlewares.thread_data_middleware.get_config",
-            lambda: {"configurable": {"thread_id": "thread-from-config"}},
-        )
+        from langgraph.runtime import Runtime
 
-        result = middleware.before_agent(state={}, runtime=runtime)
+        result = middleware.before_agent(state={}, runtime=Runtime(context=_make_context("thread-from-dict")))
 
         assert result is not None
-        assert _as_posix(result["thread_data"]["uploads_path"]).endswith("threads/thread-from-config/user-data/uploads")
-        assert runtime.context == {}
+        assert _as_posix(result["thread_data"]["uploads_path"]).endswith("threads/thread-from-dict/user-data/uploads")
 
-    def test_before_agent_raises_clear_error_when_thread_id_missing_everywhere(self, tmp_path, monkeypatch):
+    def test_before_agent_raises_clear_error_when_thread_id_missing(self, tmp_path):
         middleware = ThreadDataMiddleware(base_dir=str(tmp_path), lazy_init=True)
-        monkeypatch.setattr(
-            "deerflow.agents.middlewares.thread_data_middleware.get_config",
-            lambda: {"configurable": {}},
-        )
+        from langgraph.runtime import Runtime
 
-        with pytest.raises(ValueError, match="Thread ID is required in runtime context or config.configurable"):
-            middleware.before_agent(state={}, runtime=Runtime(context=None))
+        with pytest.raises(ValueError, match="Thread ID is required"):
+            middleware.before_agent(state={}, runtime=Runtime(context=_make_context("")))
diff --git a/backend/tests/test_thread_meta_repo.py b/backend/tests/test_thread_meta_repo.py
new file mode 100644
index 000000000..3a6532567
--- /dev/null
+++ b/backend/tests/test_thread_meta_repo.py
@@ -0,0 +1,178 @@
+"""Tests for ThreadMetaRepository (SQLAlchemy-backed)."""
+
+import pytest
+
+from deerflow.persistence.thread_meta import ThreadMetaRepository
+
+
+async def _make_repo(tmp_path):
+    from deerflow.persistence.engine import get_session_factory, init_engine
+
+    url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
+    await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
+    return ThreadMetaRepository(get_session_factory())
+
+
+async def _cleanup():
+    from deerflow.persistence.engine import close_engine
+
+    await close_engine()
+
+
+class TestThreadMetaRepository:
+    @pytest.mark.anyio
+    async def test_create_and_get(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        record = await repo.create("t1")
+        assert record["thread_id"] == "t1"
+        assert record["status"] == "idle"
+        assert "created_at" in record
+
+        fetched = await repo.get("t1")
+        assert fetched is not None
+        assert fetched["thread_id"] == "t1"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_with_assistant_id(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        record = await repo.create("t1", assistant_id="agent1")
+        assert record["assistant_id"] == "agent1"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_with_owner_and_display_name(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        record = await repo.create("t1", user_id="user1", display_name="My Thread")
+        assert record["user_id"] == "user1"
+        assert record["display_name"] == "My Thread"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_create_with_metadata(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        record = await repo.create("t1", metadata={"key": "value"})
+        assert record["metadata"] == {"key": "value"}
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_get_nonexistent(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        assert await repo.get("nonexistent") is None
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_no_record_allows(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        assert await repo.check_access("unknown", "user1") is True
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_owner_matches(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1", user_id="user1")
+        assert await repo.check_access("t1", "user1") is True
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_owner_mismatch(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1", user_id="user1")
+        assert await repo.check_access("t1", "user2") is False
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_no_owner_allows_all(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        # Explicit user_id=None to bypass the new AUTO default that
+        # would otherwise pick up the test user from the autouse fixture.
+        await repo.create("t1", user_id=None)
+        assert await repo.check_access("t1", "anyone") is True
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_strict_missing_row_denied(self, tmp_path):
+        """require_existing=True flips the missing-row case to *denied*.
+
+        Closes the delete-idempotence cross-user gap: after a thread is
+        deleted, the row is gone, and the permissive default would let any
+        caller "claim" it as untracked. The strict mode demands a row.
+        """
+        repo = await _make_repo(tmp_path)
+        assert await repo.check_access("never-existed", "user1", require_existing=True) is False
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_strict_owner_match_allowed(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1", user_id="user1")
+        assert await repo.check_access("t1", "user1", require_existing=True) is True
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_strict_owner_mismatch_denied(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1", user_id="user1")
+        assert await repo.check_access("t1", "user2", require_existing=True) is False
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_check_access_strict_null_owner_still_allowed(self, tmp_path):
+        """Even in strict mode, a row with NULL user_id stays shared.
+
+        The strict flag tightens the *missing row* case, not the *shared
+        row* case — legacy pre-auth rows that survived a clean migration
+        without an owner are still everyone's.
+        """
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1", user_id=None)
+        assert await repo.check_access("t1", "anyone", require_existing=True) is True
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_status(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1")
+        await repo.update_status("t1", "busy")
+        record = await repo.get("t1")
+        assert record["status"] == "busy"
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1")
+        await repo.delete("t1")
+        assert await repo.get("t1") is None
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_delete_nonexistent_is_noop(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.delete("nonexistent")  # should not raise
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_metadata_merges(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1", metadata={"a": 1, "b": 2})
+        await repo.update_metadata("t1", {"b": 99, "c": 3})
+        record = await repo.get("t1")
+        # Existing key preserved, overlapping key overwritten, new key added
+        assert record["metadata"] == {"a": 1, "b": 99, "c": 3}
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_metadata_on_empty(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.create("t1")
+        await repo.update_metadata("t1", {"k": "v"})
+        record = await repo.get("t1")
+        assert record["metadata"] == {"k": "v"}
+        await _cleanup()
+
+    @pytest.mark.anyio
+    async def test_update_metadata_nonexistent_is_noop(self, tmp_path):
+        repo = await _make_repo(tmp_path)
+        await repo.update_metadata("nonexistent", {"k": "v"})  # should not raise
+        await _cleanup()
diff --git a/backend/tests/test_thread_run_messages_pagination.py b/backend/tests/test_thread_run_messages_pagination.py
new file mode 100644
index 000000000..f00100cad
--- /dev/null
+++ b/backend/tests/test_thread_run_messages_pagination.py
@@ -0,0 +1,128 @@
+"""Tests for paginated GET /api/threads/{thread_id}/runs/{run_id}/messages endpoint."""
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import pytest
+from _router_auth_helpers import make_authed_test_app
+from fastapi.testclient import TestClient
+
+from app.gateway.routers import thread_runs
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_app(event_store=None):
+    """Build a test FastAPI app with stub auth and mocked state."""
+    app = make_authed_test_app()
+    app.include_router(thread_runs.router)
+
+    if event_store is not None:
+        app.state.run_event_store = event_store
+
+    return app
+
+
+def _make_event_store(rows: list[dict]):
+    """Return an AsyncMock event store whose list_messages_by_run() returns rows."""
+    store = MagicMock()
+    store.list_messages_by_run = AsyncMock(return_value=rows)
+    return store
+
+
+def _make_message(seq: int) -> dict:
+    return {"seq": seq, "event_type": "ai_message", "category": "message", "content": f"msg-{seq}"}
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_returns_paginated_envelope():
+    """GET /api/threads/{tid}/runs/{rid}/messages returns {data: [...], has_more: bool}."""
+    rows = [_make_message(i) for i in range(1, 4)]
+    app = _make_app(event_store=_make_event_store(rows))
+    with TestClient(app) as client:
+        response = client.get("/api/threads/thread-1/runs/run-1/messages")
+    assert response.status_code == 200
+    body = response.json()
+    assert "data" in body
+    assert "has_more" in body
+    assert body["has_more"] is False
+    assert len(body["data"]) == 3
+
+
+def test_has_more_true_when_extra_row_returned():
+    """has_more=True when event store returns limit+1 rows."""
+    # Default limit is 50; provide 51 rows
+    rows = [_make_message(i) for i in range(1, 52)]  # 51 rows
+    app = _make_app(event_store=_make_event_store(rows))
+    with TestClient(app) as client:
+        response = client.get("/api/threads/thread-2/runs/run-2/messages")
+    assert response.status_code == 200
+    body = response.json()
+    assert body["has_more"] is True
+    assert len(body["data"]) == 50  # trimmed to limit
+
+
+def test_after_seq_forwarded_to_event_store():
+    """after_seq query param is forwarded to event_store.list_messages_by_run."""
+    rows = [_make_message(10)]
+    event_store = _make_event_store(rows)
+    app = _make_app(event_store=event_store)
+    with TestClient(app) as client:
+        response = client.get("/api/threads/thread-3/runs/run-3/messages?after_seq=5")
+    assert response.status_code == 200
+    event_store.list_messages_by_run.assert_awaited_once_with(
+        "thread-3", "run-3",
+        limit=51,  # default limit(50) + 1
+        before_seq=None,
+        after_seq=5,
+    )
+
+
+def test_before_seq_forwarded_to_event_store():
+    """before_seq query param is forwarded to event_store.list_messages_by_run."""
+    rows = [_make_message(3)]
+    event_store = _make_event_store(rows)
+    app = _make_app(event_store=event_store)
+    with TestClient(app) as client:
+        response = client.get("/api/threads/thread-4/runs/run-4/messages?before_seq=10")
+    assert response.status_code == 200
+    event_store.list_messages_by_run.assert_awaited_once_with(
+        "thread-4", "run-4",
+        limit=51,
+        before_seq=10,
+        after_seq=None,
+    )
+
+
+def test_custom_limit_forwarded_to_event_store():
+    """Custom limit is forwarded as limit+1 to the event store."""
+    rows = [_make_message(i) for i in range(1, 6)]
+    event_store = _make_event_store(rows)
+    app = _make_app(event_store=event_store)
+    with TestClient(app) as client:
+        response = client.get("/api/threads/thread-5/runs/run-5/messages?limit=10")
+    assert response.status_code == 200
+    event_store.list_messages_by_run.assert_awaited_once_with(
+        "thread-5", "run-5",
+        limit=11,  # 10 + 1
+        before_seq=None,
+        after_seq=None,
+    )
+
+
+def test_empty_data_when_no_messages():
+    """Returns empty data list with has_more=False when no messages exist."""
+    app = _make_app(event_store=_make_event_store([]))
+    with TestClient(app) as client:
+        response = client.get("/api/threads/thread-6/runs/run-6/messages")
+    assert response.status_code == 200
+    body = response.json()
+    assert body["data"] == []
+    assert body["has_more"] is False
diff --git a/backend/tests/test_threads_router.py b/backend/tests/test_threads_router.py
index ad3abe4e9..4ffa28a8c 100644
--- a/backend/tests/test_threads_router.py
+++ b/backend/tests/test_threads_router.py
@@ -1,7 +1,8 @@
 from unittest.mock import patch
 
 import pytest
-from fastapi import FastAPI, HTTPException
+from _router_auth_helpers import make_authed_test_app
+from fastapi import HTTPException
 from fastapi.testclient import TestClient
 
 from app.gateway.routers import threads
@@ -49,12 +50,15 @@ def test_delete_thread_data_rejects_invalid_thread_id(tmp_path):
 
 
 def test_delete_thread_route_cleans_thread_directory(tmp_path):
-    paths = Paths(tmp_path)
-    thread_dir = paths.thread_dir("thread-route")
-    paths.sandbox_work_dir("thread-route").mkdir(parents=True, exist_ok=True)
-    (paths.sandbox_work_dir("thread-route") / "notes.txt").write_text("hello", encoding="utf-8")
+    from deerflow.runtime.user_context import get_effective_user_id
 
-    app = FastAPI()
+    paths = Paths(tmp_path)
+    user_id = get_effective_user_id()
+    thread_dir = paths.thread_dir("thread-route", user_id=user_id)
+    paths.sandbox_work_dir("thread-route", user_id=user_id).mkdir(parents=True, exist_ok=True)
+    (paths.sandbox_work_dir("thread-route", user_id=user_id) / "notes.txt").write_text("hello", encoding="utf-8")
+
+    app = make_authed_test_app()
     app.include_router(threads.router)
 
     with patch("app.gateway.routers.threads.get_paths", return_value=paths):
@@ -69,7 +73,7 @@ def test_delete_thread_route_cleans_thread_directory(tmp_path):
 def test_delete_thread_route_rejects_invalid_thread_id(tmp_path):
     paths = Paths(tmp_path)
 
-    app = FastAPI()
+    app = make_authed_test_app()
     app.include_router(threads.router)
 
     with patch("app.gateway.routers.threads.get_paths", return_value=paths):
@@ -82,7 +86,7 @@ def test_delete_thread_route_rejects_invalid_thread_id(tmp_path):
 def test_delete_thread_route_returns_422_for_route_safe_invalid_id(tmp_path):
     paths = Paths(tmp_path)
 
-    app = FastAPI()
+    app = make_authed_test_app()
     app.include_router(threads.router)
 
     with patch("app.gateway.routers.threads.get_paths", return_value=paths):
@@ -107,3 +111,28 @@ def test_delete_thread_data_returns_generic_500_error(tmp_path):
     assert exc_info.value.detail == "Failed to delete local thread data."
     assert "/secret/path" not in exc_info.value.detail
     log_exception.assert_called_once_with("Failed to delete thread data for %s", "thread-cleanup")
+
+
+# ── Server-reserved metadata key stripping ──────────────────────────────────
+
+
+def test_strip_reserved_metadata_removes_user_id():
+    """Client-supplied user_id is dropped to prevent reflection attacks."""
+    out = threads._strip_reserved_metadata({"user_id": "victim-id", "title": "ok"})
+    assert out == {"title": "ok"}
+
+
+def test_strip_reserved_metadata_passes_through_safe_keys():
+    """Non-reserved keys are preserved verbatim."""
+    md = {"title": "ok", "tags": ["a", "b"], "custom": {"x": 1}}
+    assert threads._strip_reserved_metadata(md) == md
+
+
+def test_strip_reserved_metadata_empty_input():
+    """Empty / None metadata returns same object — no crash."""
+    assert threads._strip_reserved_metadata({}) == {}
+
+
+def test_strip_reserved_metadata_strips_all_reserved_keys():
+    out = threads._strip_reserved_metadata({"user_id": "x", "keep": "me"})
+    assert out == {"keep": "me"}
diff --git a/backend/tests/test_title_generation.py b/backend/tests/test_title_generation.py
index 53b0a5010..ba5b8e856 100644
--- a/backend/tests/test_title_generation.py
+++ b/backend/tests/test_title_generation.py
@@ -3,7 +3,7 @@
 import pytest
 
 from deerflow.agents.middlewares.title_middleware import TitleMiddleware
-from deerflow.config.title_config import TitleConfig, get_title_config, set_title_config
+from deerflow.config.title_config import TitleConfig
 
 
 class TestTitleConfig:
@@ -44,21 +44,6 @@ class TestTitleConfig:
         with pytest.raises(ValueError):
             TitleConfig(max_chars=201)
 
-    def test_get_set_config(self):
-        """Test global config getter and setter."""
-        original_config = get_title_config()
-
-        # Set new config
-        new_config = TitleConfig(enabled=False, max_words=10)
-        set_title_config(new_config)
-
-        # Verify it was set
-        assert get_title_config().enabled is False
-        assert get_title_config().max_words == 10
-
-        # Restore original config
-        set_title_config(original_config)
-
 
 class TestTitleMiddleware:
     """Tests for TitleMiddleware."""
@@ -68,23 +53,3 @@ class TestTitleMiddleware:
         middleware = TitleMiddleware()
         assert middleware is not None
         assert middleware.state_schema is not None
-
-    # TODO: Add integration tests with mock Runtime
-    # def test_should_generate_title(self):
-    #     """Test title generation trigger logic."""
-    #     pass
-
-    # def test_generate_title(self):
-    #     """Test title generation."""
-    #     pass
-
-    # def test_after_agent_hook(self):
-    #     """Test after_agent hook."""
-    #     pass
-
-
-# TODO: Add integration tests
-# - Test with real LangGraph runtime
-# - Test title persistence with checkpointer
-# - Test fallback behavior when LLM fails
-# - Test concurrent title generation
diff --git a/backend/tests/test_title_middleware_core_logic.py b/backend/tests/test_title_middleware_core_logic.py
index 684de2345..ce813a89c 100644
--- a/backend/tests/test_title_middleware_core_logic.py
+++ b/backend/tests/test_title_middleware_core_logic.py
@@ -1,38 +1,32 @@
 """Core behavior tests for TitleMiddleware."""
 
 import asyncio
+from types import SimpleNamespace
 from unittest.mock import AsyncMock, MagicMock
 
 from langchain_core.messages import AIMessage, HumanMessage
 
 from deerflow.agents.middlewares import title_middleware as title_middleware_module
 from deerflow.agents.middlewares.title_middleware import TitleMiddleware
-from deerflow.config.title_config import TitleConfig, get_title_config, set_title_config
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
+from deerflow.config.sandbox_config import SandboxConfig
+from deerflow.config.title_config import TitleConfig
 
 
-def _clone_title_config(config: TitleConfig) -> TitleConfig:
-    # Avoid mutating shared global config objects across tests.
-    return TitleConfig(**config.model_dump())
+def _make_title_config(**overrides) -> TitleConfig:
+    return TitleConfig(**overrides)
 
 
-def _set_test_title_config(**overrides) -> TitleConfig:
-    config = _clone_title_config(get_title_config())
-    for key, value in overrides.items():
-        setattr(config, key, value)
-    set_title_config(config)
-    return config
+def _make_runtime(**title_overrides) -> SimpleNamespace:
+    """Build a runtime whose context carries a DeerFlowContext with the given TitleConfig."""
+    app_config = AppConfig(sandbox=SandboxConfig(use="test"), title=TitleConfig(**title_overrides))
+    ctx = DeerFlowContext(app_config=app_config, thread_id="t1")
+    return SimpleNamespace(context=ctx)
 
 
 class TestTitleMiddlewareCoreLogic:
-    def setup_method(self):
-        # Title config is a global singleton; snapshot and restore for test isolation.
-        self._original = _clone_title_config(get_title_config())
-
-    def teardown_method(self):
-        set_title_config(self._original)
-
     def test_should_generate_title_for_first_complete_exchange(self):
-        _set_test_title_config(enabled=True)
         middleware = TitleMiddleware()
         state = {
             "messages": [
@@ -41,27 +35,24 @@ class TestTitleMiddlewareCoreLogic:
             ]
         }
 
-        assert middleware._should_generate_title(state) is True
+        assert middleware._should_generate_title(state, _make_title_config(enabled=True)) is True
 
     def test_should_not_generate_title_when_disabled_or_already_set(self):
         middleware = TitleMiddleware()
 
-        _set_test_title_config(enabled=False)
         disabled_state = {
             "messages": [HumanMessage(content="Q"), AIMessage(content="A")],
             "title": None,
         }
-        assert middleware._should_generate_title(disabled_state) is False
+        assert middleware._should_generate_title(disabled_state, _make_title_config(enabled=False)) is False
 
-        _set_test_title_config(enabled=True)
         titled_state = {
             "messages": [HumanMessage(content="Q"), AIMessage(content="A")],
             "title": "Existing Title",
         }
-        assert middleware._should_generate_title(titled_state) is False
+        assert middleware._should_generate_title(titled_state, _make_title_config(enabled=True)) is False
 
     def test_should_not_generate_title_after_second_user_turn(self):
-        _set_test_title_config(enabled=True)
         middleware = TitleMiddleware()
         state = {
             "messages": [
@@ -72,10 +63,9 @@ class TestTitleMiddlewareCoreLogic:
             ]
         }
 
-        assert middleware._should_generate_title(state) is False
+        assert middleware._should_generate_title(state, _make_title_config(enabled=True)) is False
 
     def test_generate_title_uses_async_model_and_respects_max_chars(self, monkeypatch):
-        _set_test_title_config(max_chars=12)
         middleware = TitleMiddleware()
         model = MagicMock()
         model.ainvoke = AsyncMock(return_value=AIMessage(content="短标题"))
@@ -87,16 +77,17 @@ class TestTitleMiddlewareCoreLogic:
                 AIMessage(content="好的，先确认需求"),
             ]
         }
-        result = asyncio.run(middleware._agenerate_title_result(state))
+        result = asyncio.run(middleware._agenerate_title_result(state, AppConfig(sandbox=SandboxConfig(use="test"), title=_make_title_config(max_chars=12))))
         title = result["title"]
 
         assert title == "短标题"
-        title_middleware_module.create_chat_model.assert_called_once_with(thinking_enabled=False)
+        from unittest.mock import ANY
+
+        title_middleware_module.create_chat_model.assert_called_once_with(thinking_enabled=False, app_config=ANY)
         model.ainvoke.assert_awaited_once()
         assert model.ainvoke.await_args.kwargs["config"] == {"run_name": "title_agent"}
 
     def test_generate_title_normalizes_structured_message_content(self, monkeypatch):
-        _set_test_title_config(max_chars=20)
         middleware = TitleMiddleware()
         model = MagicMock()
         model.ainvoke = AsyncMock(return_value=AIMessage(content="请帮我总结这段代码"))
@@ -109,13 +100,12 @@ class TestTitleMiddlewareCoreLogic:
             ]
         }
 
-        result = asyncio.run(middleware._agenerate_title_result(state))
+        result = asyncio.run(middleware._agenerate_title_result(state, AppConfig(sandbox=SandboxConfig(use="test"), title=_make_title_config(max_chars=20))))
         title = result["title"]
 
         assert title == "请帮我总结这段代码"
 
     def test_generate_title_fallback_for_long_message(self, monkeypatch):
-        _set_test_title_config(max_chars=20)
         middleware = TitleMiddleware()
         model = MagicMock()
         model.ainvoke = AsyncMock(side_effect=RuntimeError("model unavailable"))
@@ -127,7 +117,7 @@ class TestTitleMiddlewareCoreLogic:
                 AIMessage(content="收到"),
             ]
         }
-        result = asyncio.run(middleware._agenerate_title_result(state))
+        result = asyncio.run(middleware._agenerate_title_result(state, AppConfig(sandbox=SandboxConfig(use="test"), title=_make_title_config(max_chars=20))))
         title = result["title"]
 
         # Assert behavior (truncated fallback + ellipsis) without overfitting exact text.
@@ -138,25 +128,24 @@ class TestTitleMiddlewareCoreLogic:
         middleware = TitleMiddleware()
 
         monkeypatch.setattr(middleware, "_agenerate_title_result", AsyncMock(return_value={"title": "异步标题"}))
-        result = asyncio.run(middleware.aafter_model({"messages": []}, runtime=MagicMock()))
+        result = asyncio.run(middleware.aafter_model({"messages": []}, runtime=_make_runtime()))
         assert result == {"title": "异步标题"}
 
         monkeypatch.setattr(middleware, "_agenerate_title_result", AsyncMock(return_value=None))
-        assert asyncio.run(middleware.aafter_model({"messages": []}, runtime=MagicMock())) is None
+        assert asyncio.run(middleware.aafter_model({"messages": []}, runtime=_make_runtime())) is None
 
     def test_after_model_sync_delegates_to_sync_helper(self, monkeypatch):
         middleware = TitleMiddleware()
 
         monkeypatch.setattr(middleware, "_generate_title_result", MagicMock(return_value={"title": "同步标题"}))
-        result = middleware.after_model({"messages": []}, runtime=MagicMock())
+        result = middleware.after_model({"messages": []}, runtime=_make_runtime())
         assert result == {"title": "同步标题"}
 
         monkeypatch.setattr(middleware, "_generate_title_result", MagicMock(return_value=None))
-        assert middleware.after_model({"messages": []}, runtime=MagicMock()) is None
+        assert middleware.after_model({"messages": []}, runtime=_make_runtime()) is None
 
     def test_sync_generate_title_uses_fallback_without_model(self):
         """Sync path avoids LLM calls and derives a local fallback title."""
-        _set_test_title_config(max_chars=20)
         middleware = TitleMiddleware()
 
         state = {
@@ -165,12 +154,11 @@ class TestTitleMiddlewareCoreLogic:
                 AIMessage(content="好的"),
             ]
         }
-        result = middleware._generate_title_result(state)
+        result = middleware._generate_title_result(state, _make_title_config(max_chars=20))
         assert result == {"title": "请帮我写测试"}
 
     def test_sync_generate_title_respects_fallback_truncation(self):
         """Sync fallback path still respects max_chars truncation rules."""
-        _set_test_title_config(max_chars=50)
         middleware = TitleMiddleware()
 
         state = {
@@ -179,7 +167,7 @@ class TestTitleMiddlewareCoreLogic:
                 AIMessage(content="回复"),
             ]
         }
-        result = middleware._generate_title_result(state)
+        result = middleware._generate_title_result(state, _make_title_config(max_chars=50))
         assert result["title"].endswith("...")
         assert result["title"].startswith("这是一个非常长的问题描述")
 
diff --git a/backend/tests/test_token_usage.py b/backend/tests/test_token_usage.py
index bec9e9ac3..977756157 100644
--- a/backend/tests/test_token_usage.py
+++ b/backend/tests/test_token_usage.py
@@ -154,8 +154,7 @@ class TestStreamUsageIntegration:
     """Test that stream() emits usage_metadata in messages-tuple and end events."""
 
     def _make_client(self):
-        with patch("deerflow.client.get_app_config", return_value=_mock_app_config()):
-            return DeerFlowClient()
+        return DeerFlowClient()
 
     def test_stream_emits_usage_in_messages_tuple(self):
         """messages-tuple AI event should include usage_metadata when present."""
diff --git a/backend/tests/test_tool_search.py b/backend/tests/test_tool_search.py
index 428bfec3d..705a35339 100644
--- a/backend/tests/test_tool_search.py
+++ b/backend/tests/test_tool_search.py
@@ -8,7 +8,7 @@ import pytest
 from langchain_core.messages import ToolMessage
 from langchain_core.tools import tool as langchain_tool
 
-from deerflow.config.tool_search_config import ToolSearchConfig, load_tool_search_config_from_dict
+from deerflow.config.tool_search_config import ToolSearchConfig
 from deerflow.tools.builtins.tool_search import (
     DeferredToolRegistry,
     get_deferred_registry,
@@ -64,12 +64,12 @@ class TestToolSearchConfig:
         config = ToolSearchConfig(enabled=True)
         assert config.enabled is True
 
-    def test_load_from_dict(self):
-        config = load_tool_search_config_from_dict({"enabled": True})
+    def test_validate_from_dict(self):
+        config = ToolSearchConfig.model_validate({"enabled": True})
         assert config.enabled is True
 
-    def test_load_from_empty_dict(self):
-        config = load_tool_search_config_from_dict({})
+    def test_validate_from_empty_dict(self):
+        config = ToolSearchConfig.model_validate({})
         assert config.enabled is False
 
 
@@ -266,48 +266,42 @@ class TestToolSearchTool:
 
 
 class TestDeferredToolsPromptSection:
-    @pytest.fixture(autouse=True)
-    def _mock_app_config(self, monkeypatch):
+    @pytest.fixture
+    def mock_config(self):
         """Provide a minimal AppConfig mock so tests don't need config.yaml."""
         from unittest.mock import MagicMock
 
         from deerflow.config.tool_search_config import ToolSearchConfig
 
-        mock_config = MagicMock()
-        mock_config.tool_search = ToolSearchConfig()  # disabled by default
-        monkeypatch.setattr("deerflow.config.get_app_config", lambda: mock_config)
+        config = MagicMock()
+        config.tool_search = ToolSearchConfig()  # disabled by default
+        return config
 
-    def test_empty_when_disabled(self):
+    def test_empty_when_disabled(self, mock_config):
         from deerflow.agents.lead_agent.prompt import get_deferred_tools_prompt_section
 
         # tool_search.enabled defaults to False
-        section = get_deferred_tools_prompt_section()
+        section = get_deferred_tools_prompt_section(mock_config)
         assert section == ""
 
-    def test_empty_when_enabled_but_no_registry(self, monkeypatch):
+    def test_empty_when_enabled_but_no_registry(self, mock_config):
         from deerflow.agents.lead_agent.prompt import get_deferred_tools_prompt_section
-        from deerflow.config import get_app_config
-
-        monkeypatch.setattr(get_app_config().tool_search, "enabled", True)
-        section = get_deferred_tools_prompt_section()
+        mock_config.tool_search = ToolSearchConfig(enabled=True)
+        section = get_deferred_tools_prompt_section(mock_config)
         assert section == ""
 
-    def test_empty_when_enabled_but_empty_registry(self, monkeypatch):
+    def test_empty_when_enabled_but_empty_registry(self, mock_config):
         from deerflow.agents.lead_agent.prompt import get_deferred_tools_prompt_section
-        from deerflow.config import get_app_config
-
-        monkeypatch.setattr(get_app_config().tool_search, "enabled", True)
+        mock_config.tool_search = ToolSearchConfig(enabled=True)
         set_deferred_registry(DeferredToolRegistry())
-        section = get_deferred_tools_prompt_section()
+        section = get_deferred_tools_prompt_section(mock_config)
         assert section == ""
 
-    def test_lists_tool_names(self, registry, monkeypatch):
+    def test_lists_tool_names(self, registry, mock_config):
         from deerflow.agents.lead_agent.prompt import get_deferred_tools_prompt_section
-        from deerflow.config import get_app_config
-
-        monkeypatch.setattr(get_app_config().tool_search, "enabled", True)
+        mock_config.tool_search = ToolSearchConfig(enabled=True)
         set_deferred_registry(registry)
-        section = get_deferred_tools_prompt_section()
+        section = get_deferred_tools_prompt_section(mock_config)
         assert "<available-deferred-tools>" in section
         assert "</available-deferred-tools>" in section
         assert "github_create_issue" in section
diff --git a/backend/tests/test_uploads_middleware_core_logic.py b/backend/tests/test_uploads_middleware_core_logic.py
index 1837c1286..89a04a15d 100644
--- a/backend/tests/test_uploads_middleware_core_logic.py
+++ b/backend/tests/test_uploads_middleware_core_logic.py
@@ -13,7 +13,10 @@ from unittest.mock import MagicMock
 from langchain_core.messages import AIMessage, HumanMessage
 
 from deerflow.agents.middlewares.uploads_middleware import UploadsMiddleware
+from deerflow.config.app_config import AppConfig
+from deerflow.config.deer_flow_context import DeerFlowContext
 from deerflow.config.paths import Paths
+from deerflow.config.sandbox_config import SandboxConfig
 
 THREAD_ID = "thread-abc123"
 
@@ -23,18 +26,27 @@ THREAD_ID = "thread-abc123"
 # ---------------------------------------------------------------------------
 
 
+def _make_context(thread_id: str) -> DeerFlowContext:
+    return DeerFlowContext(
+        app_config=AppConfig(sandbox=SandboxConfig(use="test")),
+        thread_id=thread_id,
+    )
+
+
 def _middleware(tmp_path: Path) -> UploadsMiddleware:
     return UploadsMiddleware(base_dir=str(tmp_path))
 
 
 def _runtime(thread_id: str | None = THREAD_ID) -> MagicMock:
     rt = MagicMock()
-    rt.context = {"thread_id": thread_id}
+    rt.context = _make_context(thread_id or "")
     return rt
 
 
 def _uploads_dir(tmp_path: Path, thread_id: str = THREAD_ID) -> Path:
-    d = Paths(str(tmp_path)).sandbox_uploads_dir(thread_id)
+    from deerflow.runtime.user_context import get_effective_user_id
+
+    d = Paths(str(tmp_path)).sandbox_uploads_dir(thread_id, user_id=get_effective_user_id())
     d.mkdir(parents=True, exist_ok=True)
     return d
 
diff --git a/backend/tests/test_uploads_router.py b/backend/tests/test_uploads_router.py
index f305b998f..e2f51625d 100644
--- a/backend/tests/test_uploads_router.py
+++ b/backend/tests/test_uploads_router.py
@@ -4,6 +4,7 @@ from io import BytesIO
 from pathlib import Path
 from unittest.mock import AsyncMock, MagicMock, patch
 
+from _router_auth_helpers import call_unwrapped
 from fastapi import UploadFile
 
 from app.gateway.routers import uploads
@@ -25,7 +26,7 @@ def test_upload_files_writes_thread_storage_and_skips_local_sandbox_sync(tmp_pat
         patch.object(uploads, "get_sandbox_provider", return_value=provider),
     ):
         file = UploadFile(filename="notes.txt", file=BytesIO(b"hello uploads"))
-        result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
+        result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-local", request=MagicMock(), files=[file]))
 
     assert result.success is True
     assert len(result.files) == 1
@@ -107,7 +108,7 @@ def test_upload_files_syncs_non_local_sandbox_and_marks_markdown_file(tmp_path):
         patch.object(uploads, "convert_file_to_markdown", AsyncMock(side_effect=fake_convert)),
     ):
         file = UploadFile(filename="report.pdf", file=BytesIO(b"pdf-bytes"))
-        result = asyncio.run(uploads.upload_files("thread-aio", files=[file]))
+        result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-aio", request=MagicMock(), files=[file]))
 
     assert result.success is True
     assert len(result.files) == 1
@@ -146,7 +147,7 @@ def test_upload_files_makes_non_local_files_sandbox_writable(tmp_path):
         patch.object(uploads, "_make_file_sandbox_writable") as make_writable,
     ):
         file = UploadFile(filename="report.pdf", file=BytesIO(b"pdf-bytes"))
-        result = asyncio.run(uploads.upload_files("thread-aio", files=[file]))
+        result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-aio", request=MagicMock(), files=[file]))
 
     assert result.success is True
     make_writable.assert_any_call(thread_uploads_dir / "report.pdf")
@@ -170,7 +171,7 @@ def test_upload_files_does_not_adjust_permissions_for_local_sandbox(tmp_path):
         patch.object(uploads, "_make_file_sandbox_writable") as make_writable,
     ):
         file = UploadFile(filename="notes.txt", file=BytesIO(b"hello uploads"))
-        result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
+        result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-local", request=MagicMock(), files=[file]))
 
     assert result.success is True
     make_writable.assert_not_called()
@@ -221,13 +222,13 @@ def test_upload_files_rejects_dotdot_and_dot_filenames(tmp_path):
         # These filenames must be rejected outright
         for bad_name in ["..", "."]:
             file = UploadFile(filename=bad_name, file=BytesIO(b"data"))
-            result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
+            result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-local", request=MagicMock(), files=[file]))
             assert result.success is True
             assert result.files == [], f"Expected no files for unsafe filename {bad_name!r}"
 
         # Path-traversal prefixes are stripped to the basename and accepted safely
         file = UploadFile(filename="../etc/passwd", file=BytesIO(b"data"))
-        result = asyncio.run(uploads.upload_files("thread-local", files=[file]))
+        result = asyncio.run(call_unwrapped(uploads.upload_files, "thread-local", request=MagicMock(), files=[file]))
         assert result.success is True
         assert len(result.files) == 1
         assert result.files[0]["filename"] == "passwd"
@@ -243,7 +244,7 @@ def test_delete_uploaded_file_removes_generated_markdown_companion(tmp_path):
     (thread_uploads_dir / "report.md").write_text("converted", encoding="utf-8")
 
     with patch.object(uploads, "get_uploads_dir", return_value=thread_uploads_dir):
-        result = asyncio.run(uploads.delete_uploaded_file("thread-aio", "report.pdf"))
+        result = asyncio.run(call_unwrapped(uploads.delete_uploaded_file, "thread-aio", "report.pdf", request=MagicMock()))
 
     assert result == {"success": True, "message": "Deleted report.pdf"}
     assert not (thread_uploads_dir / "report.pdf").exists()
diff --git a/backend/tests/test_user_context.py b/backend/tests/test_user_context.py
new file mode 100644
index 000000000..8c7cbd13c
--- /dev/null
+++ b/backend/tests/test_user_context.py
@@ -0,0 +1,110 @@
+"""Tests for runtime.user_context — contextvar three-state semantics.
+
+These tests opt out of the autouse contextvar fixture (added in
+commit 6) because they explicitly test the cases where the contextvar
+is set or unset.
+"""
+
+from types import SimpleNamespace
+
+import pytest
+
+from deerflow.runtime.user_context import (
+    CurrentUser,
+    DEFAULT_USER_ID,
+    get_current_user,
+    get_effective_user_id,
+    require_current_user,
+    reset_current_user,
+    set_current_user,
+)
+
+
+@pytest.mark.no_auto_user
+def test_default_is_none():
+    """Before any set, contextvar returns None."""
+    assert get_current_user() is None
+
+
+@pytest.mark.no_auto_user
+def test_set_and_reset_roundtrip():
+    """set_current_user returns a token that reset restores."""
+    user = SimpleNamespace(id="user-1")
+    token = set_current_user(user)
+    try:
+        assert get_current_user() is user
+    finally:
+        reset_current_user(token)
+    assert get_current_user() is None
+
+
+@pytest.mark.no_auto_user
+def test_require_current_user_raises_when_unset():
+    """require_current_user raises RuntimeError if contextvar is unset."""
+    assert get_current_user() is None
+    with pytest.raises(RuntimeError, match="without user context"):
+        require_current_user()
+
+
+@pytest.mark.no_auto_user
+def test_require_current_user_returns_user_when_set():
+    """require_current_user returns the user when contextvar is set."""
+    user = SimpleNamespace(id="user-2")
+    token = set_current_user(user)
+    try:
+        assert require_current_user() is user
+    finally:
+        reset_current_user(token)
+
+
+@pytest.mark.no_auto_user
+def test_protocol_accepts_duck_typed():
+    """CurrentUser is a runtime_checkable Protocol matching any .id-bearing object."""
+    user = SimpleNamespace(id="user-3")
+    assert isinstance(user, CurrentUser)
+
+
+@pytest.mark.no_auto_user
+def test_protocol_rejects_no_id():
+    """Objects without .id do not satisfy CurrentUser Protocol."""
+    not_a_user = SimpleNamespace(email="no-id@example.com")
+    assert not isinstance(not_a_user, CurrentUser)
+
+
+# ---------------------------------------------------------------------------
+# get_effective_user_id / DEFAULT_USER_ID tests
+# ---------------------------------------------------------------------------
+
+
+def test_default_user_id_is_default():
+    assert DEFAULT_USER_ID == "default"
+
+
+@pytest.mark.no_auto_user
+def test_effective_user_id_returns_default_when_no_user():
+    """No user in context -> fallback to DEFAULT_USER_ID."""
+    assert get_effective_user_id() == "default"
+
+
+@pytest.mark.no_auto_user
+def test_effective_user_id_returns_user_id_when_set():
+    user = SimpleNamespace(id="u-abc-123")
+    token = set_current_user(user)
+    try:
+        assert get_effective_user_id() == "u-abc-123"
+    finally:
+        reset_current_user(token)
+
+
+@pytest.mark.no_auto_user
+def test_effective_user_id_coerces_to_str():
+    """User.id might be a UUID object; must come back as str."""
+    import uuid
+    uid = uuid.uuid4()
+
+    user = SimpleNamespace(id=uid)
+    token = set_current_user(user)
+    try:
+        assert get_effective_user_id() == str(uid)
+    finally:
+        reset_current_user(token)
diff --git a/backend/uv.lock b/backend/uv.lock
index bd2630869..d3fab6ed4 100644
--- a/backend/uv.lock
+++ b/backend/uv.lock
@@ -158,6 +158,20 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" },
 ]
 
+[[package]]
+name = "alembic"
+version = "1.18.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mako" },
+    { name = "sqlalchemy" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/94/13/8b084e0f2efb0275a1d534838844926f798bd766566b1375174e2448cd31/alembic-1.18.4.tar.gz", hash = "sha256:cb6e1fd84b6174ab8dbb2329f86d631ba9559dd78df550b57804d607672cedbc", size = 2056725, upload-time = "2026-02-10T16:00:47.195Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d2/29/6533c317b74f707ea28f8d633734dbda2119bbadfc61b2f3640ba835d0f7/alembic-1.18.4-py3-none-any.whl", hash = "sha256:a5ed4adcf6d8a4cb575f3d759f071b03cd6e5c7618eb796cb52497be25bfe19a", size = 263893, upload-time = "2026-02-10T16:00:49.997Z" },
+]
+
 [[package]]
 name = "annotated-doc"
 version = "0.0.4"
@@ -208,6 +222,46 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
 ]
 
+[[package]]
+name = "asyncpg"
+version = "0.31.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667, upload-time = "2025-11-24T23:27:00.812Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042, upload-time = "2025-11-24T23:25:49.578Z" },
+    { url = "https://files.pythonhosted.org/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504, upload-time = "2025-11-24T23:25:51.501Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241, upload-time = "2025-11-24T23:25:53.278Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321, upload-time = "2025-11-24T23:25:54.982Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685, upload-time = "2025-11-24T23:25:57.43Z" },
+    { url = "https://files.pythonhosted.org/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858, upload-time = "2025-11-24T23:25:59.636Z" },
+    { url = "https://files.pythonhosted.org/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852, upload-time = "2025-11-24T23:26:01.084Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175, upload-time = "2025-11-24T23:26:02.564Z" },
+    { url = "https://files.pythonhosted.org/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111, upload-time = "2025-11-24T23:26:04.467Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928, upload-time = "2025-11-24T23:26:05.944Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067, upload-time = "2025-11-24T23:26:07.957Z" },
+    { url = "https://files.pythonhosted.org/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156, upload-time = "2025-11-24T23:26:09.591Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636, upload-time = "2025-11-24T23:26:11.168Z" },
+    { url = "https://files.pythonhosted.org/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079, upload-time = "2025-11-24T23:26:13.164Z" },
+    { url = "https://files.pythonhosted.org/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606, upload-time = "2025-11-24T23:26:14.78Z" },
+    { url = "https://files.pythonhosted.org/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569, upload-time = "2025-11-24T23:26:16.189Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867, upload-time = "2025-11-24T23:26:17.631Z" },
+    { url = "https://files.pythonhosted.org/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349, upload-time = "2025-11-24T23:26:19.689Z" },
+    { url = "https://files.pythonhosted.org/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428, upload-time = "2025-11-24T23:26:21.275Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678, upload-time = "2025-11-24T23:26:23.627Z" },
+    { url = "https://files.pythonhosted.org/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505, upload-time = "2025-11-24T23:26:25.235Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744, upload-time = "2025-11-24T23:26:26.944Z" },
+    { url = "https://files.pythonhosted.org/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251, upload-time = "2025-11-24T23:26:28.404Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901, upload-time = "2025-11-24T23:26:30.34Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280, upload-time = "2025-11-24T23:26:32Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931, upload-time = "2025-11-24T23:26:33.572Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608, upload-time = "2025-11-24T23:26:35.638Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738, upload-time = "2025-11-24T23:26:37.275Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026, upload-time = "2025-11-24T23:26:39.423Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426, upload-time = "2025-11-24T23:26:41.032Z" },
+    { url = "https://files.pythonhosted.org/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495, upload-time = "2025-11-24T23:26:42.659Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062, upload-time = "2025-11-24T23:26:44.086Z" },
+]
+
 [[package]]
 name = "attrs"
 version = "25.4.0"
@@ -325,6 +379,72 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
 ]
 
+[[package]]
+name = "bcrypt"
+version = "5.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d4/36/3329e2518d70ad8e2e5817d5a4cac6bba05a47767ec416c7d020a965f408/bcrypt-5.0.0.tar.gz", hash = "sha256:f748f7c2d6fd375cc93d3fba7ef4a9e3a092421b8dbf34d8d4dc06be9492dfdd", size = 25386, upload-time = "2025-09-25T19:50:47.829Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/13/85/3e65e01985fddf25b64ca67275bb5bdb4040bd1a53b66d355c6c37c8a680/bcrypt-5.0.0-cp313-cp313t-macosx_10_12_universal2.whl", hash = "sha256:f3c08197f3039bec79cee59a606d62b96b16669cff3949f21e74796b6e3cd2be", size = 481806, upload-time = "2025-09-25T19:49:05.102Z" },
+    { url = "https://files.pythonhosted.org/packages/44/dc/01eb79f12b177017a726cbf78330eb0eb442fae0e7b3dfd84ea2849552f3/bcrypt-5.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:200af71bc25f22006f4069060c88ed36f8aa4ff7f53e67ff04d2ab3f1e79a5b2", size = 268626, upload-time = "2025-09-25T19:49:06.723Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/cf/e82388ad5959c40d6afd94fb4743cc077129d45b952d46bdc3180310e2df/bcrypt-5.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:baade0a5657654c2984468efb7d6c110db87ea63ef5a4b54732e7e337253e44f", size = 271853, upload-time = "2025-09-25T19:49:08.028Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/86/7134b9dae7cf0efa85671651341f6afa695857fae172615e960fb6a466fa/bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:c58b56cdfb03202b3bcc9fd8daee8e8e9b6d7e3163aa97c631dfcfcc24d36c86", size = 269793, upload-time = "2025-09-25T19:49:09.727Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/82/6296688ac1b9e503d034e7d0614d56e80c5d1a08402ff856a4549cb59207/bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:4bfd2a34de661f34d0bda43c3e4e79df586e4716ef401fe31ea39d69d581ef23", size = 289930, upload-time = "2025-09-25T19:49:11.204Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/18/884a44aa47f2a3b88dd09bc05a1e40b57878ecd111d17e5bba6f09f8bb77/bcrypt-5.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:ed2e1365e31fc73f1825fa830f1c8f8917ca1b3ca6185773b349c20fd606cec2", size = 272194, upload-time = "2025-09-25T19:49:12.524Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/8f/371a3ab33c6982070b674f1788e05b656cfbf5685894acbfef0c65483a59/bcrypt-5.0.0-cp313-cp313t-manylinux_2_34_aarch64.whl", hash = "sha256:83e787d7a84dbbfba6f250dd7a5efd689e935f03dd83b0f919d39349e1f23f83", size = 269381, upload-time = "2025-09-25T19:49:14.308Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/34/7e4e6abb7a8778db6422e88b1f06eb07c47682313997ee8a8f9352e5a6f1/bcrypt-5.0.0-cp313-cp313t-manylinux_2_34_x86_64.whl", hash = "sha256:137c5156524328a24b9fac1cb5db0ba618bc97d11970b39184c1d87dc4bf1746", size = 271750, upload-time = "2025-09-25T19:49:15.584Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/1b/54f416be2499bd72123c70d98d36c6cd61a4e33d9b89562c22481c81bb30/bcrypt-5.0.0-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:38cac74101777a6a7d3b3e3cfefa57089b5ada650dce2baf0cbdd9d65db22a9e", size = 303757, upload-time = "2025-09-25T19:49:17.244Z" },
+    { url = "https://files.pythonhosted.org/packages/13/62/062c24c7bcf9d2826a1a843d0d605c65a755bc98002923d01fd61270705a/bcrypt-5.0.0-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:d8d65b564ec849643d9f7ea05c6d9f0cd7ca23bdd4ac0c2dbef1104ab504543d", size = 306740, upload-time = "2025-09-25T19:49:18.693Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/c8/1fdbfc8c0f20875b6b4020f3c7dc447b8de60aa0be5faaf009d24242aec9/bcrypt-5.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:741449132f64b3524e95cd30e5cd3343006ce146088f074f31ab26b94e6c75ba", size = 334197, upload-time = "2025-09-25T19:49:20.523Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/c1/8b84545382d75bef226fbc6588af0f7b7d095f7cd6a670b42a86243183cd/bcrypt-5.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:212139484ab3207b1f0c00633d3be92fef3c5f0af17cad155679d03ff2ee1e41", size = 352974, upload-time = "2025-09-25T19:49:22.254Z" },
+    { url = "https://files.pythonhosted.org/packages/10/a6/ffb49d4254ed085e62e3e5dd05982b4393e32fe1e49bb1130186617c29cd/bcrypt-5.0.0-cp313-cp313t-win32.whl", hash = "sha256:9d52ed507c2488eddd6a95bccee4e808d3234fa78dd370e24bac65a21212b861", size = 148498, upload-time = "2025-09-25T19:49:24.134Z" },
+    { url = "https://files.pythonhosted.org/packages/48/a9/259559edc85258b6d5fc5471a62a3299a6aa37a6611a169756bf4689323c/bcrypt-5.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:f6984a24db30548fd39a44360532898c33528b74aedf81c26cf29c51ee47057e", size = 145853, upload-time = "2025-09-25T19:49:25.702Z" },
+    { url = "https://files.pythonhosted.org/packages/2d/df/9714173403c7e8b245acf8e4be8876aac64a209d1b392af457c79e60492e/bcrypt-5.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:9fffdb387abe6aa775af36ef16f55e318dcda4194ddbf82007a6f21da29de8f5", size = 139626, upload-time = "2025-09-25T19:49:26.928Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/14/c18006f91816606a4abe294ccc5d1e6f0e42304df5a33710e9e8e95416e1/bcrypt-5.0.0-cp314-cp314t-macosx_10_12_universal2.whl", hash = "sha256:4870a52610537037adb382444fefd3706d96d663ac44cbb2f37e3919dca3d7ef", size = 481862, upload-time = "2025-09-25T19:49:28.365Z" },
+    { url = "https://files.pythonhosted.org/packages/67/49/dd074d831f00e589537e07a0725cf0e220d1f0d5d8e85ad5bbff251c45aa/bcrypt-5.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:48f753100931605686f74e27a7b49238122aa761a9aefe9373265b8b7aa43ea4", size = 268544, upload-time = "2025-09-25T19:49:30.39Z" },
+    { url = "https://files.pythonhosted.org/packages/f5/91/50ccba088b8c474545b034a1424d05195d9fcbaaf802ab8bfe2be5a4e0d7/bcrypt-5.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f70aadb7a809305226daedf75d90379c397b094755a710d7014b8b117df1ebbf", size = 271787, upload-time = "2025-09-25T19:49:32.144Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/e7/d7dba133e02abcda3b52087a7eea8c0d4f64d3e593b4fffc10c31b7061f3/bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:744d3c6b164caa658adcb72cb8cc9ad9b4b75c7db507ab4bc2480474a51989da", size = 269753, upload-time = "2025-09-25T19:49:33.885Z" },
+    { url = "https://files.pythonhosted.org/packages/33/fc/5b145673c4b8d01018307b5c2c1fc87a6f5a436f0ad56607aee389de8ee3/bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a28bc05039bdf3289d757f49d616ab3efe8cf40d8e8001ccdd621cd4f98f4fc9", size = 289587, upload-time = "2025-09-25T19:49:35.144Z" },
+    { url = "https://files.pythonhosted.org/packages/27/d7/1ff22703ec6d4f90e62f1a5654b8867ef96bafb8e8102c2288333e1a6ca6/bcrypt-5.0.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:7f277a4b3390ab4bebe597800a90da0edae882c6196d3038a73adf446c4f969f", size = 272178, upload-time = "2025-09-25T19:49:36.793Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/88/815b6d558a1e4d40ece04a2f84865b0fef233513bd85fd0e40c294272d62/bcrypt-5.0.0-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:79cfa161eda8d2ddf29acad370356b47f02387153b11d46042e93a0a95127493", size = 269295, upload-time = "2025-09-25T19:49:38.164Z" },
+    { url = "https://files.pythonhosted.org/packages/51/8c/e0db387c79ab4931fc89827d37608c31cc57b6edc08ccd2386139028dc0d/bcrypt-5.0.0-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:a5393eae5722bcef046a990b84dff02b954904c36a194f6cfc817d7dca6c6f0b", size = 271700, upload-time = "2025-09-25T19:49:39.917Z" },
+    { url = "https://files.pythonhosted.org/packages/06/83/1570edddd150f572dbe9fc00f6203a89fc7d4226821f67328a85c330f239/bcrypt-5.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7f4c94dec1b5ab5d522750cb059bb9409ea8872d4494fd152b53cca99f1ddd8c", size = 334034, upload-time = "2025-09-25T19:49:41.227Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/f2/ea64e51a65e56ae7a8a4ec236c2bfbdd4b23008abd50ac33fbb2d1d15424/bcrypt-5.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:0cae4cb350934dfd74c020525eeae0a5f79257e8a201c0c176f4b84fdbf2a4b4", size = 352766, upload-time = "2025-09-25T19:49:43.08Z" },
+    { url = "https://files.pythonhosted.org/packages/d7/d4/1a388d21ee66876f27d1a1f41287897d0c0f1712ef97d395d708ba93004c/bcrypt-5.0.0-cp314-cp314t-win32.whl", hash = "sha256:b17366316c654e1ad0306a6858e189fc835eca39f7eb2cafd6aaca8ce0c40a2e", size = 152449, upload-time = "2025-09-25T19:49:44.971Z" },
+    { url = "https://files.pythonhosted.org/packages/3f/61/3291c2243ae0229e5bca5d19f4032cecad5dfb05a2557169d3a69dc0ba91/bcrypt-5.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:92864f54fb48b4c718fc92a32825d0e42265a627f956bc0361fe869f1adc3e7d", size = 149310, upload-time = "2025-09-25T19:49:46.162Z" },
+    { url = "https://files.pythonhosted.org/packages/3e/89/4b01c52ae0c1a681d4021e5dd3e45b111a8fb47254a274fa9a378d8d834b/bcrypt-5.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:dd19cf5184a90c873009244586396a6a884d591a5323f0e8a5922560718d4993", size = 143761, upload-time = "2025-09-25T19:49:47.345Z" },
+    { url = "https://files.pythonhosted.org/packages/84/29/6237f151fbfe295fe3e074ecc6d44228faa1e842a81f6d34a02937ee1736/bcrypt-5.0.0-cp38-abi3-macosx_10_12_universal2.whl", hash = "sha256:fc746432b951e92b58317af8e0ca746efe93e66555f1b40888865ef5bf56446b", size = 494553, upload-time = "2025-09-25T19:49:49.006Z" },
+    { url = "https://files.pythonhosted.org/packages/45/b6/4c1205dde5e464ea3bd88e8742e19f899c16fa8916fb8510a851fae985b5/bcrypt-5.0.0-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c2388ca94ffee269b6038d48747f4ce8df0ffbea43f31abfa18ac72f0218effb", size = 275009, upload-time = "2025-09-25T19:49:50.581Z" },
+    { url = "https://files.pythonhosted.org/packages/3b/71/427945e6ead72ccffe77894b2655b695ccf14ae1866cd977e185d606dd2f/bcrypt-5.0.0-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:560ddb6ec730386e7b3b26b8b4c88197aaed924430e7b74666a586ac997249ef", size = 278029, upload-time = "2025-09-25T19:49:52.533Z" },
+    { url = "https://files.pythonhosted.org/packages/17/72/c344825e3b83c5389a369c8a8e58ffe1480b8a699f46c127c34580c4666b/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:d79e5c65dcc9af213594d6f7f1fa2c98ad3fc10431e7aa53c176b441943efbdd", size = 275907, upload-time = "2025-09-25T19:49:54.709Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/7e/d4e47d2df1641a36d1212e5c0514f5291e1a956a7749f1e595c07a972038/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:2b732e7d388fa22d48920baa267ba5d97cca38070b69c0e2d37087b381c681fd", size = 296500, upload-time = "2025-09-25T19:49:56.013Z" },
+    { url = "https://files.pythonhosted.org/packages/0f/c3/0ae57a68be2039287ec28bc463b82e4b8dc23f9d12c0be331f4782e19108/bcrypt-5.0.0-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0c8e093ea2532601a6f686edbc2c6b2ec24131ff5c52f7610dd64fa4553b5464", size = 278412, upload-time = "2025-09-25T19:49:57.356Z" },
+    { url = "https://files.pythonhosted.org/packages/45/2b/77424511adb11e6a99e3a00dcc7745034bee89036ad7d7e255a7e47be7d8/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:5b1589f4839a0899c146e8892efe320c0fa096568abd9b95593efac50a87cb75", size = 275486, upload-time = "2025-09-25T19:49:59.116Z" },
+    { url = "https://files.pythonhosted.org/packages/43/0a/405c753f6158e0f3f14b00b462d8bca31296f7ecfc8fc8bc7919c0c7d73a/bcrypt-5.0.0-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:89042e61b5e808b67daf24a434d89bab164d4de1746b37a8d173b6b14f3db9ff", size = 277940, upload-time = "2025-09-25T19:50:00.869Z" },
+    { url = "https://files.pythonhosted.org/packages/62/83/b3efc285d4aadc1fa83db385ec64dcfa1707e890eb42f03b127d66ac1b7b/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e3cf5b2560c7b5a142286f69bde914494b6d8f901aaa71e453078388a50881c4", size = 310776, upload-time = "2025-09-25T19:50:02.393Z" },
+    { url = "https://files.pythonhosted.org/packages/95/7d/47ee337dacecde6d234890fe929936cb03ebc4c3a7460854bbd9c97780b8/bcrypt-5.0.0-cp38-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f632fd56fc4e61564f78b46a2269153122db34988e78b6be8b32d28507b7eaeb", size = 312922, upload-time = "2025-09-25T19:50:04.232Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/3a/43d494dfb728f55f4e1cf8fd435d50c16a2d75493225b54c8d06122523c6/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:801cad5ccb6b87d1b430f183269b94c24f248dddbbc5c1f78b6ed231743e001c", size = 341367, upload-time = "2025-09-25T19:50:05.559Z" },
+    { url = "https://files.pythonhosted.org/packages/55/ab/a0727a4547e383e2e22a630e0f908113db37904f58719dc48d4622139b5c/bcrypt-5.0.0-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3cf67a804fc66fc217e6914a5635000259fbbbb12e78a99488e4d5ba445a71eb", size = 359187, upload-time = "2025-09-25T19:50:06.916Z" },
+    { url = "https://files.pythonhosted.org/packages/1b/bb/461f352fdca663524b4643d8b09e8435b4990f17fbf4fea6bc2a90aa0cc7/bcrypt-5.0.0-cp38-abi3-win32.whl", hash = "sha256:3abeb543874b2c0524ff40c57a4e14e5d3a66ff33fb423529c88f180fd756538", size = 153752, upload-time = "2025-09-25T19:50:08.515Z" },
+    { url = "https://files.pythonhosted.org/packages/41/aa/4190e60921927b7056820291f56fc57d00d04757c8b316b2d3c0d1d6da2c/bcrypt-5.0.0-cp38-abi3-win_amd64.whl", hash = "sha256:35a77ec55b541e5e583eb3436ffbbf53b0ffa1fa16ca6782279daf95d146dcd9", size = 150881, upload-time = "2025-09-25T19:50:09.742Z" },
+    { url = "https://files.pythonhosted.org/packages/54/12/cd77221719d0b39ac0b55dbd39358db1cd1246e0282e104366ebbfb8266a/bcrypt-5.0.0-cp38-abi3-win_arm64.whl", hash = "sha256:cde08734f12c6a4e28dc6755cd11d3bdfea608d93d958fffbe95a7026ebe4980", size = 144931, upload-time = "2025-09-25T19:50:11.016Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/ba/2af136406e1c3839aea9ecadc2f6be2bcd1eff255bd451dd39bcf302c47a/bcrypt-5.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:0c418ca99fd47e9c59a301744d63328f17798b5947b0f791e9af3c1c499c2d0a", size = 495313, upload-time = "2025-09-25T19:50:12.309Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/ee/2f4985dbad090ace5ad1f7dd8ff94477fe089b5fab2040bd784a3d5f187b/bcrypt-5.0.0-cp39-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddb4e1500f6efdd402218ffe34d040a1196c072e07929b9820f363a1fd1f4191", size = 275290, upload-time = "2025-09-25T19:50:13.673Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/6e/b77ade812672d15cf50842e167eead80ac3514f3beacac8902915417f8b7/bcrypt-5.0.0-cp39-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7aeef54b60ceddb6f30ee3db090351ecf0d40ec6e2abf41430997407a46d2254", size = 278253, upload-time = "2025-09-25T19:50:15.089Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c4/ed00ed32f1040f7990dac7115f82273e3c03da1e1a1587a778d8cea496d8/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:f0ce778135f60799d89c9693b9b398819d15f1921ba15fe719acb3178215a7db", size = 276084, upload-time = "2025-09-25T19:50:16.699Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/c4/fa6e16145e145e87f1fa351bbd54b429354fd72145cd3d4e0c5157cf4c70/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a71f70ee269671460b37a449f5ff26982a6f2ba493b3eabdd687b4bf35f875ac", size = 297185, upload-time = "2025-09-25T19:50:18.525Z" },
+    { url = "https://files.pythonhosted.org/packages/24/b4/11f8a31d8b67cca3371e046db49baa7c0594d71eb40ac8121e2fc0888db0/bcrypt-5.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:f8429e1c410b4073944f03bd778a9e066e7fad723564a52ff91841d278dfc822", size = 278656, upload-time = "2025-09-25T19:50:19.809Z" },
+    { url = "https://files.pythonhosted.org/packages/ac/31/79f11865f8078e192847d2cb526e3fa27c200933c982c5b2869720fa5fce/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:edfcdcedd0d0f05850c52ba3127b1fce70b9f89e0fe5ff16517df7e81fa3cbb8", size = 275662, upload-time = "2025-09-25T19:50:21.567Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/8d/5e43d9584b3b3591a6f9b68f755a4da879a59712981ef5ad2a0ac1379f7a/bcrypt-5.0.0-cp39-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:611f0a17aa4a25a69362dcc299fda5c8a3d4f160e2abb3831041feb77393a14a", size = 278240, upload-time = "2025-09-25T19:50:23.305Z" },
+    { url = "https://files.pythonhosted.org/packages/89/48/44590e3fc158620f680a978aafe8f87a4c4320da81ed11552f0323aa9a57/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:db99dca3b1fdc3db87d7c57eac0c82281242d1eabf19dcb8a6b10eb29a2e72d1", size = 311152, upload-time = "2025-09-25T19:50:24.597Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/85/e4fbfc46f14f47b0d20493669a625da5827d07e8a88ee460af6cd9768b44/bcrypt-5.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:5feebf85a9cefda32966d8171f5db7e3ba964b77fdfe31919622256f80f9cf42", size = 313284, upload-time = "2025-09-25T19:50:26.268Z" },
+    { url = "https://files.pythonhosted.org/packages/25/ae/479f81d3f4594456a01ea2f05b132a519eff9ab5768a70430fa1132384b1/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:3ca8a166b1140436e058298a34d88032ab62f15aae1c598580333dc21d27ef10", size = 341643, upload-time = "2025-09-25T19:50:28.02Z" },
+    { url = "https://files.pythonhosted.org/packages/df/d2/36a086dee1473b14276cd6ea7f61aef3b2648710b5d7f1c9e032c29b859f/bcrypt-5.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:61afc381250c3182d9078551e3ac3a41da14154fbff647ddf52a769f588c4172", size = 359698, upload-time = "2025-09-25T19:50:31.347Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/f6/688d2cd64bfd0b14d805ddb8a565e11ca1fb0fd6817175d58b10052b6d88/bcrypt-5.0.0-cp39-abi3-win32.whl", hash = "sha256:64d7ce196203e468c457c37ec22390f1a61c85c6f0b8160fd752940ccfb3a683", size = 153725, upload-time = "2025-09-25T19:50:34.384Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/b9/9d9a641194a730bda138b3dfe53f584d61c58cd5230e37566e83ec2ffa0d/bcrypt-5.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:64ee8434b0da054d830fa8e89e1c8bf30061d539044a39524ff7dec90481e5c2", size = 150912, upload-time = "2025-09-25T19:50:35.69Z" },
+    { url = "https://files.pythonhosted.org/packages/27/44/d2ef5e87509158ad2187f4dd0852df80695bb1ee0cfe0a684727b01a69e0/bcrypt-5.0.0-cp39-abi3-win_arm64.whl", hash = "sha256:f2347d3534e76bf50bca5500989d6c1d05ed64b440408057a37673282c654927", size = 144953, upload-time = "2025-09-25T19:50:37.32Z" },
+]
+
 [[package]]
 name = "beautifulsoup4"
 version = "4.14.3"
@@ -670,12 +790,15 @@ name = "deer-flow"
 version = "0.1.0"
 source = { virtual = "." }
 dependencies = [
+    { name = "bcrypt" },
     { name = "deerflow-harness" },
+    { name = "email-validator" },
     { name = "fastapi" },
     { name = "httpx" },
     { name = "langgraph-sdk" },
     { name = "lark-oapi" },
     { name = "markdown-to-mrkdwn" },
+    { name = "pyjwt" },
     { name = "python-multipart" },
     { name = "python-telegram-bot" },
     { name = "slack-sdk" },
@@ -684,6 +807,11 @@ dependencies = [
     { name = "wecom-aibot-python-sdk" },
 ]
 
+[package.optional-dependencies]
+postgres = [
+    { name = "deerflow-harness", extra = ["postgres"] },
+]
+
 [package.dev-dependencies]
 dev = [
     { name = "prompt-toolkit" },
@@ -694,12 +822,16 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "bcrypt", specifier = ">=4.0.0" },
     { name = "deerflow-harness", editable = "packages/harness" },
+    { name = "deerflow-harness", extras = ["postgres"], marker = "extra == 'postgres'", editable = "packages/harness" },
+    { name = "email-validator", specifier = ">=2.0.0" },
     { name = "fastapi", specifier = ">=0.115.0" },
     { name = "httpx", specifier = ">=0.28.0" },
     { name = "langgraph-sdk", specifier = ">=0.1.51" },
     { name = "lark-oapi", specifier = ">=1.4.0" },
     { name = "markdown-to-mrkdwn", specifier = ">=0.3.1" },
+    { name = "pyjwt", specifier = ">=2.9.0" },
     { name = "python-multipart", specifier = ">=0.0.26" },
     { name = "python-telegram-bot", specifier = ">=21.0" },
     { name = "slack-sdk", specifier = ">=3.33.0" },
@@ -707,6 +839,7 @@ requires-dist = [
     { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" },
     { name = "wecom-aibot-python-sdk", specifier = ">=0.1.6" },
 ]
+provides-extras = ["postgres"]
 
 [package.metadata.requires-dev]
 dev = [
@@ -723,6 +856,8 @@ source = { editable = "packages/harness" }
 dependencies = [
     { name = "agent-client-protocol" },
     { name = "agent-sandbox" },
+    { name = "aiosqlite" },
+    { name = "alembic" },
     { name = "ddgs" },
     { name = "dotenv" },
     { name = "duckdb" },
@@ -748,6 +883,7 @@ dependencies = [
     { name = "pydantic" },
     { name = "pyyaml" },
     { name = "readabilipy" },
+    { name = "sqlalchemy", extra = ["asyncio"] },
     { name = "tavily-python" },
     { name = "tiktoken" },
 ]
@@ -756,6 +892,12 @@ dependencies = [
 ollama = [
     { name = "langchain-ollama" },
 ]
+postgres = [
+    { name = "asyncpg" },
+    { name = "langgraph-checkpoint-postgres" },
+    { name = "psycopg", extra = ["binary"] },
+    { name = "psycopg-pool" },
+]
 pymupdf = [
     { name = "pymupdf4llm" },
 ]
@@ -764,6 +906,9 @@ pymupdf = [
 requires-dist = [
     { name = "agent-client-protocol", specifier = ">=0.4.0" },
     { name = "agent-sandbox", specifier = ">=0.0.19" },
+    { name = "aiosqlite", specifier = ">=0.19" },
+    { name = "alembic", specifier = ">=1.13" },
+    { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.29" },
     { name = "ddgs", specifier = ">=9.10.0" },
     { name = "dotenv", specifier = ">=0.9.9" },
     { name = "duckdb", specifier = ">=1.4.4" },
@@ -781,20 +926,24 @@ requires-dist = [
     { name = "langfuse", specifier = ">=3.4.1" },
     { name = "langgraph", specifier = ">=1.0.6,<1.0.10" },
     { name = "langgraph-api", specifier = ">=0.7.0,<0.8.0" },
+    { name = "langgraph-checkpoint-postgres", marker = "extra == 'postgres'", specifier = ">=3.0.5" },
     { name = "langgraph-checkpoint-sqlite", specifier = ">=3.0.3" },
     { name = "langgraph-cli", specifier = ">=0.4.14" },
     { name = "langgraph-runtime-inmem", specifier = ">=0.22.1" },
     { name = "langgraph-sdk", specifier = ">=0.1.51" },
     { name = "markdownify", specifier = ">=1.2.2" },
     { name = "markitdown", extras = ["all", "xlsx"], specifier = ">=0.0.1a2" },
+    { name = "psycopg", extras = ["binary"], marker = "extra == 'postgres'", specifier = ">=3.3.3" },
+    { name = "psycopg-pool", marker = "extra == 'postgres'", specifier = ">=3.3.0" },
     { name = "pydantic", specifier = ">=2.12.5" },
     { name = "pymupdf4llm", marker = "extra == 'pymupdf'", specifier = ">=0.0.17" },
     { name = "pyyaml", specifier = ">=6.0.3" },
     { name = "readabilipy", specifier = ">=0.3.0" },
+    { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0,<3.0" },
     { name = "tavily-python", specifier = ">=0.7.17" },
     { name = "tiktoken", specifier = ">=0.8.0" },
 ]
-provides-extras = ["ollama", "pymupdf"]
+provides-extras = ["ollama", "postgres", "pymupdf"]
 
 [[package]]
 name = "defusedxml"
@@ -814,6 +963,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
 ]
 
+[[package]]
+name = "dnspython"
+version = "2.8.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
+]
+
 [[package]]
 name = "docstring-parser"
 version = "0.17.0"
@@ -872,6 +1030,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b0/0d/9feae160378a3553fa9a339b0e9c1a048e147a4127210e286ef18b730f03/durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286", size = 3922, upload-time = "2025-05-17T13:52:36.463Z" },
 ]
 
+[[package]]
+name = "email-validator"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "dnspython" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/22/900cb125c76b7aaa450ce02fd727f452243f2e91a61af068b40adba60ea9/email_validator-2.3.0.tar.gz", hash = "sha256:9fc05c37f2f6cf439ff414f8fc46d917929974a82244c20eb10231ba60c54426", size = 51238, upload-time = "2025-08-26T13:09:06.831Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
+]
+
 [[package]]
 name = "et-xmlfile"
 version = "2.0.0"
@@ -1105,6 +1276,53 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" },
 ]
 
+[[package]]
+name = "greenlet"
+version = "3.4.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/86/94/a5935717b307d7c71fe877b52b884c6af707d2d2090db118a03fbd799369/greenlet-3.4.0.tar.gz", hash = "sha256:f50a96b64dafd6169e595a5c56c9146ef80333e67d4476a65a9c55f400fc22ff", size = 195913, upload-time = "2026-04-08T17:08:00.863Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/65/8b/3669ad3b3f247a791b2b4aceb3aa5a31f5f6817bf547e4e1ff712338145a/greenlet-3.4.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:1a54a921561dd9518d31d2d3db4d7f80e589083063ab4d3e2e950756ef809e1a", size = 286902, upload-time = "2026-04-08T15:52:12.138Z" },
+    { url = "https://files.pythonhosted.org/packages/38/3e/3c0e19b82900873e2d8469b590a6c4b3dfd2b316d0591f1c26b38a4879a5/greenlet-3.4.0-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:16dec271460a9a2b154e3b1c2fa1050ce6280878430320e85e08c166772e3f97", size = 606099, upload-time = "2026-04-08T16:24:38.408Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/33/99fef65e7754fc76a4ed14794074c38c9ed3394a5bd129d7f61b705f3168/greenlet-3.4.0-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:90036ce224ed6fe75508c1907a77e4540176dcf0744473627785dd519c6f9996", size = 618837, upload-time = "2026-04-08T16:30:58.298Z" },
+    { url = "https://files.pythonhosted.org/packages/44/57/eae2cac10421feae6c0987e3dc106c6d86262b1cb379e171b017aba893a6/greenlet-3.4.0-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6f0def07ec9a71d72315cf26c061aceee53b306c36ed38c35caba952ea1b319d", size = 624901, upload-time = "2026-04-08T16:40:38.981Z" },
+    { url = "https://files.pythonhosted.org/packages/36/f7/229f3aed6948faa20e0616a0b8568da22e365ede6a54d7d369058b128afd/greenlet-3.4.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a1c4f6b453006efb8310affb2d132832e9bbb4fc01ce6df6b70d810d38f1f6dc", size = 615062, upload-time = "2026-04-08T15:56:33.766Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/8a/0e73c9b94f31d1cc257fe79a0eff621674141cdae7d6d00f40de378a1e42/greenlet-3.4.0-cp312-cp312-manylinux_2_39_riscv64.whl", hash = "sha256:0e1254cf0cbaa17b04320c3a78575f29f3c161ef38f59c977108f19ffddaf077", size = 423927, upload-time = "2026-04-08T16:43:05.293Z" },
+    { url = "https://files.pythonhosted.org/packages/08/97/d988180011aa40135c46cd0d0cf01dd97f7162bae14139b4a3ef54889ba5/greenlet-3.4.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:9b2d9a138ffa0e306d0e2b72976d2fb10b97e690d40ab36a472acaab0838e2de", size = 1573511, upload-time = "2026-04-08T16:26:20.058Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/0f/a5a26fe152fb3d12e6a474181f6e9848283504d0afd095f353d85726374b/greenlet-3.4.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:8424683caf46eb0eb6f626cb95e008e8cc30d0cb675bdfa48200925c79b38a08", size = 1640396, upload-time = "2026-04-08T15:57:30.88Z" },
+    { url = "https://files.pythonhosted.org/packages/42/cf/bb2c32d9a100e36ee9f6e38fad6b1e082b8184010cb06259b49e1266ca01/greenlet-3.4.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0a53fb071531d003b075c444014ff8f8b1a9898d36bb88abd9ac7b3524648a2", size = 238892, upload-time = "2026-04-08T17:03:10.094Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/47/6c41314bac56e71436ce551c7fbe3cc830ed857e6aa9708dbb9c65142eb6/greenlet-3.4.0-cp312-cp312-win_arm64.whl", hash = "sha256:f38b81880ba28f232f1f675893a39cf7b6db25b31cc0a09bb50787ecf957e85e", size = 235599, upload-time = "2026-04-08T15:52:54.3Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/75/7e9cd1126a1e1f0cd67b0eda02e5221b28488d352684704a78ed505bd719/greenlet-3.4.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:43748988b097f9c6f09364f260741aa73c80747f63389824435c7a50bfdfd5c1", size = 285856, upload-time = "2026-04-08T15:52:45.82Z" },
+    { url = "https://files.pythonhosted.org/packages/9d/c4/3e2df392e5cb199527c4d9dbcaa75c14edcc394b45040f0189f649631e3c/greenlet-3.4.0-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5566e4e2cd7a880e8c27618e3eab20f3494452d12fd5129edef7b2f7aa9a36d1", size = 610208, upload-time = "2026-04-08T16:24:39.674Z" },
+    { url = "https://files.pythonhosted.org/packages/da/af/750cdfda1d1bd30a6c28080245be8d0346e669a98fdbae7f4102aa95fff3/greenlet-3.4.0-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1054c5a3c78e2ab599d452f23f7adafef55062a783a8e241d24f3b633ba6ff82", size = 621269, upload-time = "2026-04-08T16:30:59.767Z" },
+    { url = "https://files.pythonhosted.org/packages/e0/93/c8c508d68ba93232784bbc1b5474d92371f2897dfc6bc281b419f2e0d492/greenlet-3.4.0-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:98eedd1803353daf1cd9ef23eef23eda5a4d22f99b1f998d273a8b78b70dd47f", size = 628455, upload-time = "2026-04-08T16:40:40.698Z" },
+    { url = "https://files.pythonhosted.org/packages/54/78/0cbc693622cd54ebe25207efbb3a0eb07c2639cb8594f6e3aaaa0bb077a8/greenlet-3.4.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f82cb6cddc27dd81c96b1506f4aa7def15070c3b2a67d4e46fd19016aacce6cf", size = 617549, upload-time = "2026-04-08T15:56:34.893Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/46/cfaaa0ade435a60550fd83d07dfd5c41f873a01da17ede5c4cade0b9bab8/greenlet-3.4.0-cp313-cp313-manylinux_2_39_riscv64.whl", hash = "sha256:b7857e2202aae67bc5725e0c1f6403c20a8ff46094ece015e7d474f5f7020b55", size = 426238, upload-time = "2026-04-08T16:43:06.865Z" },
+    { url = "https://files.pythonhosted.org/packages/ba/c0/8966767de01343c1ff47e8b855dc78e7d1a8ed2b7b9c83576a57e289f81d/greenlet-3.4.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:227a46251ecba4ff46ae742bc5ce95c91d5aceb4b02f885487aff269c127a729", size = 1575310, upload-time = "2026-04-08T16:26:21.671Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/38/bcdc71ba05e9a5fda87f63ffc2abcd1f15693b659346df994a48c968003d/greenlet-3.4.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:5b99e87be7eba788dd5b75ba1cde5639edffdec5f91fe0d734a249535ec3408c", size = 1640435, upload-time = "2026-04-08T15:57:32.572Z" },
+    { url = "https://files.pythonhosted.org/packages/a1/c2/19b664b7173b9e4ef5f77e8cef9f14c20ec7fce7920dc1ccd7afd955d093/greenlet-3.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:849f8bc17acd6295fcb5de8e46d55cc0e52381c56eaf50a2afd258e97bc65940", size = 238760, upload-time = "2026-04-08T17:04:03.878Z" },
+    { url = "https://files.pythonhosted.org/packages/9b/96/795619651d39c7fbd809a522f881aa6f0ead504cc8201c3a5b789dfaef99/greenlet-3.4.0-cp313-cp313-win_arm64.whl", hash = "sha256:9390ad88b652b1903814eaabd629ca184db15e0eeb6fe8a390bbf8b9106ae15a", size = 235498, upload-time = "2026-04-08T17:05:00.584Z" },
+    { url = "https://files.pythonhosted.org/packages/78/02/bde66806e8f169cf90b14d02c500c44cdbe02c8e224c9c67bafd1b8cadd1/greenlet-3.4.0-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:10a07aca6babdd18c16a3f4f8880acfffc2b88dfe431ad6aa5f5740759d7d75e", size = 286291, upload-time = "2026-04-08T17:09:34.307Z" },
+    { url = "https://files.pythonhosted.org/packages/05/1f/39da1c336a87d47c58352fb8a78541ce63d63ae57c5b9dae1fe02801bbc2/greenlet-3.4.0-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:076e21040b3a917d3ce4ad68fb5c3c6b32f1405616c4a57aa83120979649bd3d", size = 656749, upload-time = "2026-04-08T16:24:41.721Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/6c/90ee29a4ee27af7aa2e2ec408799eeb69ee3fcc5abcecac6ddd07a5cd0f2/greenlet-3.4.0-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:e82689eea4a237e530bb5cb41b180ef81fa2160e1f89422a67be7d90da67f615", size = 669084, upload-time = "2026-04-08T16:31:01.372Z" },
+    { url = "https://files.pythonhosted.org/packages/d2/4a/74078d3936712cff6d3c91a930016f476ce4198d84e224fe6d81d3e02880/greenlet-3.4.0-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:06c2d3b89e0c62ba50bd7adf491b14f39da9e7e701647cb7b9ff4c99bee04b19", size = 673405, upload-time = "2026-04-08T16:40:42.527Z" },
+    { url = "https://files.pythonhosted.org/packages/07/49/d4cad6e5381a50947bb973d2f6cf6592621451b09368b8c20d9b8af49c5b/greenlet-3.4.0-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4df3b0b2289ec686d3c821a5fee44259c05cfe824dd5e6e12c8e5f5df23085cf", size = 665621, upload-time = "2026-04-08T15:56:35.995Z" },
+    { url = "https://files.pythonhosted.org/packages/79/3e/df8a83ab894751bc31e1106fdfaa80ca9753222f106b04de93faaa55feb7/greenlet-3.4.0-cp314-cp314-manylinux_2_39_riscv64.whl", hash = "sha256:070b8bac2ff3b4d9e0ff36a0d19e42103331d9737e8504747cd1e659f76297bd", size = 471670, upload-time = "2026-04-08T16:43:08.512Z" },
+    { url = "https://files.pythonhosted.org/packages/37/31/d1edd54f424761b5d47718822f506b435b6aab2f3f93b465441143ea5119/greenlet-3.4.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8bff29d586ea415688f4cec96a591fcc3bf762d046a796cdadc1fdb6e7f2d5bf", size = 1622259, upload-time = "2026-04-08T16:26:23.201Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/c6/6d3f9cdcb21c4e12a79cb332579f1c6aa1af78eb68059c5a957c7812d95e/greenlet-3.4.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8a569c2fb840c53c13a2b8967c63621fafbd1a0e015b9c82f408c33d626a2fda", size = 1686916, upload-time = "2026-04-08T15:57:34.282Z" },
+    { url = "https://files.pythonhosted.org/packages/63/45/c1ca4a1ad975de4727e52d3ffe641ae23e1d7a8ffaa8ff7a0477e1827b92/greenlet-3.4.0-cp314-cp314-win_amd64.whl", hash = "sha256:207ba5b97ea8b0b60eb43ffcacf26969dd83726095161d676aac03ff913ee50d", size = 239821, upload-time = "2026-04-08T17:03:48.423Z" },
+    { url = "https://files.pythonhosted.org/packages/71/c4/6f621023364d7e85a4769c014c8982f98053246d142420e0328980933ceb/greenlet-3.4.0-cp314-cp314-win_arm64.whl", hash = "sha256:f8296d4e2b92af34ebde81085a01690f26a51eb9ac09a0fcadb331eb36dbc802", size = 236932, upload-time = "2026-04-08T17:04:33.551Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/8f/18d72b629783f5e8d045a76f5325c1e938e659a9e4da79c7dcd10169a48d/greenlet-3.4.0-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:d70012e51df2dbbccfaf63a40aaf9b40c8bed37c3e3a38751c926301ce538ece", size = 294681, upload-time = "2026-04-08T15:52:35.778Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/ad/5fa86ec46769c4153820d58a04062285b3b9e10ba3d461ee257b68dcbf53/greenlet-3.4.0-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a58bec0751f43068cd40cff31bb3ca02ad6000b3a51ca81367af4eb5abc480c8", size = 658899, upload-time = "2026-04-08T16:24:43.32Z" },
+    { url = "https://files.pythonhosted.org/packages/43/f0/4e8174ca0e87ae748c409f055a1ba161038c43cc0a5a6f1433a26ac2e5bf/greenlet-3.4.0-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:05fa0803561028f4b2e3b490ee41216a842eaee11aed004cc343a996d9523aa2", size = 665284, upload-time = "2026-04-08T16:31:02.833Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/92/466b0d9afd44b8af623139a3599d651c7564fa4152f25f117e1ee5949ffb/greenlet-3.4.0-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c4cd56a9eb7a6444edbc19062f7b6fbc8f287c663b946e3171d899693b1c19fa", size = 665872, upload-time = "2026-04-08T16:40:43.912Z" },
+    { url = "https://files.pythonhosted.org/packages/19/da/991cf7cd33662e2df92a1274b7eb4d61769294d38a1bba8a45f31364845e/greenlet-3.4.0-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e60d38719cb80b3ab5e85f9f1aed4960acfde09868af6762ccb27b260d68f4ed", size = 661861, upload-time = "2026-04-08T15:56:37.269Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/14/3395a7ef3e260de0325152ddfe19dffb3e49fe10873b94654352b53ad48e/greenlet-3.4.0-cp314-cp314t-manylinux_2_39_riscv64.whl", hash = "sha256:1f85f204c4d54134ae850d401fa435c89cd667d5ce9dc567571776b45941af72", size = 489237, upload-time = "2026-04-08T16:43:09.993Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c5/6c2c708e14db3d9caea4b459d8464f58c32047451142fe2cfd90e7458f41/greenlet-3.4.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7f50c804733b43eded05ae694691c9aa68bca7d0a867d67d4a3f514742a2d53f", size = 1622182, upload-time = "2026-04-08T16:26:24.777Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/4c/50c5fed19378e11a29fabab1f6be39ea95358f4a0a07e115a51ca93385d8/greenlet-3.4.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:2d4f0635dc4aa638cda4b2f5a07ae9a2cff9280327b581a3fcb6f317b4fbc38a", size = 1685050, upload-time = "2026-04-08T15:57:36.453Z" },
+    { url = "https://files.pythonhosted.org/packages/db/72/85ae954d734703ab48e622c59d4ce35d77ce840c265814af9c078cacc7aa/greenlet-3.4.0-cp314-cp314t-win_amd64.whl", hash = "sha256:1a4a48f24681300c640f143ba7c404270e1ebbbcf34331d7104a4ff40f8ea705", size = 245554, upload-time = "2026-04-08T17:03:50.044Z" },
+]
+
 [[package]]
 name = "grpcio"
 version = "1.78.0"
@@ -1745,6 +1963,21 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4a/de/ddd53b7032e623f3c7bcdab2b44e8bf635e468f62e10e5ff1946f62c9356/langgraph_checkpoint-4.0.0-py3-none-any.whl", hash = "sha256:3fa9b2635a7c5ac28b338f631abf6a030c3b508b7b9ce17c22611513b589c784", size = 46329, upload-time = "2026-01-12T20:30:25.2Z" },
 ]
 
+[[package]]
+name = "langgraph-checkpoint-postgres"
+version = "3.0.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "langgraph-checkpoint" },
+    { name = "orjson" },
+    { name = "psycopg" },
+    { name = "psycopg-pool" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/95/7a/8f439966643d32111248a225e6cb33a182d07c90de780c4dbfc1e0377832/langgraph_checkpoint_postgres-3.0.5.tar.gz", hash = "sha256:a8fd7278a63f4f849b5cbc7884a15ca8f41e7d5f7467d0a66b31e8c24492f7eb", size = 127856, upload-time = "2026-03-18T21:25:29.785Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e8/87/b0f98b33a67204bca9d5619bcd9574222f6b025cf3c125eedcec9a50ecbc/langgraph_checkpoint_postgres-3.0.5-py3-none-any.whl", hash = "sha256:86d7040a88fd70087eaafb72251d796696a0a2d856168f5c11ef620771411552", size = 42907, upload-time = "2026-03-18T21:25:28.75Z" },
+]
+
 [[package]]
 name = "langgraph-checkpoint-sqlite"
 version = "3.0.3"
@@ -1956,6 +2189,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7b/9e/f8ee7d644affa3b80efdd623a3d75865c8f058f3950cb87fb0c48e3559bc/magika-0.6.3-py3-none-win_amd64.whl", hash = "sha256:e57f75674447b20cab4db928ae58ab264d7d8582b55183a0b876711c2b2787f3", size = 12692831, upload-time = "2025-10-30T15:22:32.063Z" },
 ]
 
+[[package]]
+name = "mako"
+version = "1.3.11"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markupsafe" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/59/8a/805404d0c0b9f3d7a326475ca008db57aea9c5c9f2e1e39ed0faa335571c/mako-1.3.11.tar.gz", hash = "sha256:071eb4ab4c5010443152255d77db7faa6ce5916f35226eb02dc34479b6858069", size = 399811, upload-time = "2026-04-14T20:19:51.493Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/68/a5/19d7aaa7e433713ffe881df33705925a196afb9532efc8475d26593921a6/mako-1.3.11-py3-none-any.whl", hash = "sha256:e372c6e333cf004aa736a15f425087ec977e1fcbd2966aae7f17c8dc1da27a77", size = 78503, upload-time = "2026-04-14T20:19:53.233Z" },
+]
+
 [[package]]
 name = "mammoth"
 version = "1.11.0"
@@ -2030,6 +2275,69 @@ xlsx = [
     { name = "pandas" },
 ]
 
+[[package]]
+name = "markupsafe"
+version = "3.0.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" },
+    { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" },
+    { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" },
+    { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" },
+    { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" },
+    { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" },
+    { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" },
+    { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" },
+    { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" },
+    { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" },
+    { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" },
+    { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" },
+    { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" },
+    { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" },
+    { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" },
+    { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" },
+    { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" },
+    { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" },
+    { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" },
+    { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" },
+    { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" },
+    { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" },
+    { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" },
+    { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" },
+    { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" },
+    { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" },
+    { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" },
+    { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" },
+    { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" },
+]
+
 [[package]]
 name = "mcp"
 version = "1.25.0"
@@ -2822,6 +3130,76 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" },
 ]
 
+[[package]]
+name = "psycopg"
+version = "3.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+    { name = "tzdata", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" },
+]
+
+[package.optional-dependencies]
+binary = [
+    { name = "psycopg-binary", marker = "implementation_name != 'pypy'" },
+]
+
+[[package]]
+name = "psycopg-binary"
+version = "3.3.3"
+source = { registry = "https://pypi.org/simple" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" },
+    { url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" },
+    { url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" },
+    { url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" },
+    { url = "https://files.pythonhosted.org/packages/03/0a/cac9fdf1df16a269ba0e5f0f06cac61f826c94cadb39df028cdfe19d3a33/psycopg_binary-3.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05f32239aec25c5fb15f7948cffdc2dc0dac098e48b80a140e4ba32b572a2e7d", size = 4590414, upload-time = "2026-02-18T16:50:01.441Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/c0/d8f8508fbf440edbc0099b1abff33003cd80c9e66eb3a1e78834e3fb4fb9/psycopg_binary-3.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c84f9d214f2d1de2fafebc17fa68ac3f6561a59e291553dfc45ad299f4898c1", size = 4669021, upload-time = "2026-02-18T16:50:08.803Z" },
+    { url = "https://files.pythonhosted.org/packages/04/05/097016b77e343b4568feddf12c72171fc513acef9a4214d21b9478569068/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e77957d2ba17cada11be09a5066d93026cdb61ada7c8893101d7fe1c6e1f3925", size = 5467453, upload-time = "2026-02-18T16:50:14.985Z" },
+    { url = "https://files.pythonhosted.org/packages/91/23/73244e5feb55b5ca109cede6e97f32ef45189f0fdac4c80d75c99862729d/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42961609ac07c232a427da7c87a468d3c82fee6762c220f38e37cfdacb2b178d", size = 5151135, upload-time = "2026-02-18T16:50:24.82Z" },
+    { url = "https://files.pythonhosted.org/packages/11/49/5309473b9803b207682095201d8708bbc7842ddf3f192488a69204e36455/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae07a3114313dd91fce686cab2f4c44af094398519af0e0f854bc707e1aeedf1", size = 6737315, upload-time = "2026-02-18T16:50:35.106Z" },
+    { url = "https://files.pythonhosted.org/packages/d4/5d/03abe74ef34d460b33c4d9662bf6ec1dd38888324323c1a1752133c10377/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d257c58d7b36a621dcce1d01476ad8b60f12d80eb1406aee4cf796f88b2ae482", size = 4979783, upload-time = "2026-02-18T16:50:42.067Z" },
+    { url = "https://files.pythonhosted.org/packages/f0/6c/3fbf8e604e15f2f3752900434046c00c90bb8764305a1b81112bff30ba24/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:07c7211f9327d522c9c47560cae00a4ecf6687f4e02d779d035dd3177b41cb12", size = 4509023, upload-time = "2026-02-18T16:50:50.116Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/6b/1a06b43b7c7af756c80b67eac8bfaa51d77e68635a8a8d246e4f0bb7604a/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8e7e9eca9b363dbedeceeadd8be97149d2499081f3c52d141d7cd1f395a91f83", size = 4185874, upload-time = "2026-02-18T16:50:55.97Z" },
+    { url = "https://files.pythonhosted.org/packages/2b/d3/bf49e3dcaadba510170c8d111e5e69e5ae3f981c1554c5bb71c75ce354bb/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:cb85b1d5702877c16f28d7b92ba030c1f49ebcc9b87d03d8c10bf45a2f1c7508", size = 3925668, upload-time = "2026-02-18T16:51:03.299Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/92/0aac830ed6a944fe334404e1687a074e4215630725753f0e3e9a9a595b62/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d4606c84d04b80f9138d72f1e28c6c02dc5ae0c7b8f3f8aaf89c681ce1cd1b1", size = 4234973, upload-time = "2026-02-18T16:51:09.097Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/96/102244653ee5a143ece5afe33f00f52fe64e389dfce8dbc87580c6d70d3d/psycopg_binary-3.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:74eae563166ebf74e8d950ff359be037b85723d99ca83f57d9b244a871d6c13b", size = 3551342, upload-time = "2026-02-18T16:51:13.892Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/71/7a57e5b12275fe7e7d84d54113f0226080423a869118419c9106c083a21c/psycopg_binary-3.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:497852c5eaf1f0c2d88ab74a64a8097c099deac0c71de1cbcf18659a8a04a4b2", size = 4607368, upload-time = "2026-02-18T16:51:19.295Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/04/cb834f120f2b2c10d4003515ef9ca9d688115b9431735e3936ae48549af8/psycopg_binary-3.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:258d1ea53464d29768bf25930f43291949f4c7becc706f6e220c515a63a24edd", size = 4687047, upload-time = "2026-02-18T16:51:23.84Z" },
+    { url = "https://files.pythonhosted.org/packages/40/e9/47a69692d3da9704468041aa5ed3ad6fc7f6bb1a5ae788d261a26bbca6c7/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:111c59897a452196116db12e7f608da472fbff000693a21040e35fc978b23430", size = 5487096, upload-time = "2026-02-18T16:51:29.645Z" },
+    { url = "https://files.pythonhosted.org/packages/0b/b6/0e0dd6a2f802864a4ae3dbadf4ec620f05e3904c7842b326aafc43e5f464/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:17bb6600e2455993946385249a3c3d0af52cd70c1c1cdbf712e9d696d0b0bf1b", size = 5168720, upload-time = "2026-02-18T16:51:36.499Z" },
+    { url = "https://files.pythonhosted.org/packages/6f/0d/977af38ac19a6b55d22dff508bd743fd7c1901e1b73657e7937c7cccb0a3/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642050398583d61c9856210568eb09a8e4f2fe8224bf3be21b67a370e677eead", size = 6762076, upload-time = "2026-02-18T16:51:43.167Z" },
+    { url = "https://files.pythonhosted.org/packages/34/40/912a39d48322cf86895c0eaf2d5b95cb899402443faefd4b09abbba6b6e1/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:533efe6dc3a7cba5e2a84e38970786bb966306863e45f3db152007e9f48638a6", size = 4997623, upload-time = "2026-02-18T16:51:47.707Z" },
+    { url = "https://files.pythonhosted.org/packages/98/0c/c14d0e259c65dc7be854d926993f151077887391d5a081118907a9d89603/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5958dbf28b77ce2033482f6cb9ef04d43f5d8f4b7636e6963d5626f000efb23e", size = 4532096, upload-time = "2026-02-18T16:51:51.421Z" },
+    { url = "https://files.pythonhosted.org/packages/39/21/8b7c50a194cfca6ea0fd4d1f276158307785775426e90700ab2eba5cd623/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a6af77b6626ce92b5817bf294b4d45ec1a6161dba80fc2d82cdffdd6814fd023", size = 4208884, upload-time = "2026-02-18T16:51:57.336Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/2c/a4981bf42cf30ebba0424971d7ce70a222ae9b82594c42fc3f2105d7b525/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:47f06fcbe8542b4d96d7392c476a74ada521c5aebdb41c3c0155f6595fc14c8d", size = 3944542, upload-time = "2026-02-18T16:52:04.266Z" },
+    { url = "https://files.pythonhosted.org/packages/60/e9/b7c29b56aa0b85a4e0c4d89db691c1ceef08f46a356369144430c155a2f5/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7800e6c6b5dc4b0ca7cc7370f770f53ac83886b76afda0848065a674231e856", size = 4254339, upload-time = "2026-02-18T16:52:10.444Z" },
+    { url = "https://files.pythonhosted.org/packages/98/5a/291d89f44d3820fffb7a04ebc8f3ef5dda4f542f44a5daea0c55a84abf45/psycopg_binary-3.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:165f22ab5a9513a3d7425ffb7fcc7955ed8ccaeef6d37e369d6cc1dff1582383", size = 3652796, upload-time = "2026-02-18T16:52:14.02Z" },
+]
+
+[[package]]
+name = "psycopg-pool"
+version = "3.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/56/9a/9470d013d0d50af0da9c4251614aeb3c1823635cab3edc211e3839db0bcf/psycopg_pool-3.3.0.tar.gz", hash = "sha256:fa115eb2860bd88fce1717d75611f41490dec6135efb619611142b24da3f6db5", size = 31606, upload-time = "2025-12-01T11:34:33.11Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e7/c3/26b8a0908a9db249de3b4169692e1c7c19048a9bc41a4d3209cee7dbb758/psycopg_pool-3.3.0-py3-none-any.whl", hash = "sha256:2e44329155c410b5e8666372db44276a8b1ebd8c90f1c3026ebba40d4bc81063", size = 39995, upload-time = "2025-12-01T11:34:29.761Z" },
+]
+
 [[package]]
 name = "pyasn1"
 version = "0.6.3"
@@ -3605,6 +3983,57 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/b8/a7/903429719d39ac2c42aa37086c90e816d883560f13c87d51f09a2962e021/speechrecognition-3.14.5-py3-none-any.whl", hash = "sha256:0c496d74e9f29b1daadb0d96f5660f47563e42bf09316dacdd57094c5095977e", size = 32856308, upload-time = "2025-12-31T11:25:41.161Z" },
 ]
 
+[[package]]
+name = "sqlalchemy"
+version = "2.0.49"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/45/461788f35e0364a8da7bda51a1fe1b09762d0c32f12f63727998d85a873b/sqlalchemy-2.0.49.tar.gz", hash = "sha256:d15950a57a210e36dd4cec1aac22787e2a4d57ba9318233e2ef8b2daf9ff2d5f", size = 9898221, upload-time = "2026-04-03T16:38:11.704Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/49/b3/2de412451330756aaaa72d27131db6dde23995efe62c941184e15242a5fa/sqlalchemy-2.0.49-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4bbccb45260e4ff1b7db0be80a9025bb1e6698bdb808b83fff0000f7a90b2c0b", size = 2157681, upload-time = "2026-04-03T16:53:07.132Z" },
+    { url = "https://files.pythonhosted.org/packages/50/84/b2a56e2105bd11ebf9f0b93abddd748e1a78d592819099359aa98134a8bf/sqlalchemy-2.0.49-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:fb37f15714ec2652d574f021d479e78cd4eb9d04396dca36568fdfffb3487982", size = 3338976, upload-time = "2026-04-03T17:07:40Z" },
+    { url = "https://files.pythonhosted.org/packages/2c/fa/65fcae2ed62f84ab72cf89536c7c3217a156e71a2c111b1305ab6f0690e2/sqlalchemy-2.0.49-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3bb9ec6436a820a4c006aad1ac351f12de2f2dbdaad171692ee457a02429b672", size = 3351937, upload-time = "2026-04-03T17:12:23.374Z" },
+    { url = "https://files.pythonhosted.org/packages/f8/2f/6fd118563572a7fe475925742eb6b3443b2250e346a0cc27d8d408e73773/sqlalchemy-2.0.49-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8d6efc136f44a7e8bc8088507eaabbb8c2b55b3dbb63fe102c690da0ddebe55e", size = 3281646, upload-time = "2026-04-03T17:07:41.949Z" },
+    { url = "https://files.pythonhosted.org/packages/c5/d7/410f4a007c65275b9cf82354adb4bb8ba587b176d0a6ee99caa16fe638f8/sqlalchemy-2.0.49-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e06e617e3d4fd9e51d385dfe45b077a41e9d1b033a7702551e3278ac597dc750", size = 3316695, upload-time = "2026-04-03T17:12:25.642Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/95/81f594aa60ded13273a844539041ccf1e66c5a7bed0a8e27810a3b52d522/sqlalchemy-2.0.49-cp312-cp312-win32.whl", hash = "sha256:83101a6930332b87653886c01d1ee7e294b1fe46a07dd9a2d2b4f91bcc88eec0", size = 2117483, upload-time = "2026-04-03T17:05:40.896Z" },
+    { url = "https://files.pythonhosted.org/packages/47/9e/fd90114059175cac64e4fafa9bf3ac20584384d66de40793ae2e2f26f3bb/sqlalchemy-2.0.49-cp312-cp312-win_amd64.whl", hash = "sha256:618a308215b6cececb6240b9abde545e3acdabac7ae3e1d4e666896bf5ba44b4", size = 2144494, upload-time = "2026-04-03T17:05:42.282Z" },
+    { url = "https://files.pythonhosted.org/packages/ae/81/81755f50eb2478eaf2049728491d4ea4f416c1eb013338682173259efa09/sqlalchemy-2.0.49-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df2d441bacf97022e81ad047e1597552eb3f83ca8a8f1a1fdd43cd7fe3898120", size = 2154547, upload-time = "2026-04-03T16:53:08.64Z" },
+    { url = "https://files.pythonhosted.org/packages/a2/bc/3494270da80811d08bcfa247404292428c4fe16294932bce5593f215cad9/sqlalchemy-2.0.49-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8e20e511dc15265fb433571391ba313e10dd8ea7e509d51686a51313b4ac01a2", size = 3280782, upload-time = "2026-04-03T17:07:43.508Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/f5/038741f5e747a5f6ea3e72487211579d8cbea5eb9827a9cbd61d0108c4bd/sqlalchemy-2.0.49-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:47604cb2159f8bbd5a1ab48a714557156320f20871ee64d550d8bf2683d980d3", size = 3297156, upload-time = "2026-04-03T17:12:27.697Z" },
+    { url = "https://files.pythonhosted.org/packages/88/50/a6af0ff9dc954b43a65ca9b5367334e45d99684c90a3d3413fc19a02d43c/sqlalchemy-2.0.49-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:22d8798819f86720bc646ab015baff5ea4c971d68121cb36e2ebc2ee43ead2b7", size = 3228832, upload-time = "2026-04-03T17:07:45.38Z" },
+    { url = "https://files.pythonhosted.org/packages/bc/d1/5f6bdad8de0bf546fc74370939621396515e0cdb9067402d6ba1b8afbe9a/sqlalchemy-2.0.49-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9b1c058c171b739e7c330760044803099c7fff11511e3ab3573e5327116a9c33", size = 3267000, upload-time = "2026-04-03T17:12:29.657Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/30/ad62227b4a9819a5e1c6abff77c0f614fa7c9326e5a3bdbee90f7139382b/sqlalchemy-2.0.49-cp313-cp313-win32.whl", hash = "sha256:a143af2ea6672f2af3f44ed8f9cd020e9cc34c56f0e8db12019d5d9ecf41cb3b", size = 2115641, upload-time = "2026-04-03T17:05:43.989Z" },
+    { url = "https://files.pythonhosted.org/packages/17/3a/7215b1b7d6d49dc9a87211be44562077f5f04f9bb5a59552c1c8e2d98173/sqlalchemy-2.0.49-cp313-cp313-win_amd64.whl", hash = "sha256:12b04d1db2663b421fe072d638a138460a51d5a862403295671c4f3987fb9148", size = 2141498, upload-time = "2026-04-03T17:05:45.7Z" },
+    { url = "https://files.pythonhosted.org/packages/28/4b/52a0cb2687a9cd1648252bb257be5a1ba2c2ded20ba695c65756a55a15a4/sqlalchemy-2.0.49-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:24bd94bb301ec672d8f0623eba9226cc90d775d25a0c92b5f8e4965d7f3a1518", size = 3560807, upload-time = "2026-04-03T16:58:31.666Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/d8/fda95459204877eed0458550d6c7c64c98cc50c2d8d618026737de9ed41a/sqlalchemy-2.0.49-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a51d3db74ba489266ef55c7a4534eb0b8db9a326553df481c11e5d7660c8364d", size = 3527481, upload-time = "2026-04-03T17:06:00.155Z" },
+    { url = "https://files.pythonhosted.org/packages/ff/0a/2aac8b78ac6487240cf7afef8f203ca783e8796002dc0cf65c4ee99ff8bb/sqlalchemy-2.0.49-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:55250fe61d6ebfd6934a272ee16ef1244e0f16b7af6cd18ab5b1fc9f08631db0", size = 3468565, upload-time = "2026-04-03T16:58:33.414Z" },
+    { url = "https://files.pythonhosted.org/packages/a5/3d/ce71cfa82c50a373fd2148b3c870be05027155ce791dc9a5dcf439790b8b/sqlalchemy-2.0.49-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:46796877b47034b559a593d7e4b549aba151dae73f9e78212a3478161c12ab08", size = 3477769, upload-time = "2026-04-03T17:06:02.787Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/e8/0a9f5c1f7c6f9ca480319bf57c2d7423f08d31445974167a27d14483c948/sqlalchemy-2.0.49-cp313-cp313t-win32.whl", hash = "sha256:9c4969a86e41454f2858256c39bdfb966a20961e9b58bf8749b65abf447e9a8d", size = 2143319, upload-time = "2026-04-03T17:02:04.328Z" },
+    { url = "https://files.pythonhosted.org/packages/0e/51/fb5240729fbec73006e137c4f7a7918ffd583ab08921e6ff81a999d6517a/sqlalchemy-2.0.49-cp313-cp313t-win_amd64.whl", hash = "sha256:b9870d15ef00e4d0559ae10ee5bc71b654d1f20076dbe8bc7ed19b4c0625ceba", size = 2175104, upload-time = "2026-04-03T17:02:05.989Z" },
+    { url = "https://files.pythonhosted.org/packages/55/33/bf28f618c0a9597d14e0b9ee7d1e0622faff738d44fe986ee287cdf1b8d0/sqlalchemy-2.0.49-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:233088b4b99ebcbc5258c755a097aa52fbf90727a03a5a80781c4b9c54347a2e", size = 2156356, upload-time = "2026-04-03T16:53:09.914Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/a7/5f476227576cb8644650eff68cc35fa837d3802b997465c96b8340ced1e2/sqlalchemy-2.0.49-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:57ca426a48eb2c682dae8204cd89ea8ab7031e2675120a47924fabc7caacbc2a", size = 3276486, upload-time = "2026-04-03T17:07:46.9Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/84/efc7c0bf3a1c5eef81d397f6fddac855becdbb11cb38ff957888603014a7/sqlalchemy-2.0.49-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:685e93e9c8f399b0c96a624799820176312f5ceef958c0f88215af4013d29066", size = 3281479, upload-time = "2026-04-03T17:12:32.226Z" },
+    { url = "https://files.pythonhosted.org/packages/91/68/bb406fa4257099c67bd75f3f2261b129c63204b9155de0d450b37f004698/sqlalchemy-2.0.49-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:9e0400fa22f79acc334d9a6b185dc00a44a8e6578aa7e12d0ddcd8434152b187", size = 3226269, upload-time = "2026-04-03T17:07:48.678Z" },
+    { url = "https://files.pythonhosted.org/packages/67/84/acb56c00cca9f251f437cb49e718e14f7687505749ea9255d7bd8158a6df/sqlalchemy-2.0.49-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:a05977bffe9bffd2229f477fa75eabe3192b1b05f408961d1bebff8d1cd4d401", size = 3248260, upload-time = "2026-04-03T17:12:34.381Z" },
+    { url = "https://files.pythonhosted.org/packages/56/19/6a20ea25606d1efd7bd1862149bb2a22d1451c3f851d23d887969201633f/sqlalchemy-2.0.49-cp314-cp314-win32.whl", hash = "sha256:0f2fa354ba106eafff2c14b0cc51f22801d1e8b2e4149342023bd6f0955de5f5", size = 2118463, upload-time = "2026-04-03T17:05:47.093Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4f/8297e4ed88e80baa1f5aa3c484a0ee29ef3c69c7582f206c916973b75057/sqlalchemy-2.0.49-cp314-cp314-win_amd64.whl", hash = "sha256:77641d299179c37b89cf2343ca9972c88bb6eef0d5fc504a2f86afd15cd5adf5", size = 2144204, upload-time = "2026-04-03T17:05:48.694Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/33/95e7216df810c706e0cd3655a778604bbd319ed4f43333127d465a46862d/sqlalchemy-2.0.49-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c1dc3368794d522f43914e03312202523cc89692f5389c32bea0233924f8d977", size = 3565474, upload-time = "2026-04-03T16:58:35.128Z" },
+    { url = "https://files.pythonhosted.org/packages/0c/a4/ed7b18d8ccf7f954a83af6bb73866f5bc6f5636f44c7731fbb741f72cc4f/sqlalchemy-2.0.49-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7c821c47ecfe05cc32140dcf8dc6fd5d21971c86dbd56eabfe5ba07a64910c01", size = 3530567, upload-time = "2026-04-03T17:06:04.587Z" },
+    { url = "https://files.pythonhosted.org/packages/73/a3/20faa869c7e21a827c4a2a42b41353a54b0f9f5e96df5087629c306df71e/sqlalchemy-2.0.49-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:9c04bff9a5335eb95c6ecf1c117576a0aa560def274876fd156cfe5510fccc61", size = 3474282, upload-time = "2026-04-03T16:58:37.131Z" },
+    { url = "https://files.pythonhosted.org/packages/b7/50/276b9a007aa0764304ad467eceb70b04822dc32092492ee5f322d559a4dc/sqlalchemy-2.0.49-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:7f605a456948c35260e7b2a39f8952a26f077fd25653c37740ed186b90aaa68a", size = 3480406, upload-time = "2026-04-03T17:06:07.176Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/c3/c80fcdb41905a2df650c2a3e0337198b6848876e63d66fe9188ef9003d24/sqlalchemy-2.0.49-cp314-cp314t-win32.whl", hash = "sha256:6270d717b11c5476b0cbb21eedc8d4dbb7d1a956fd6c15a23e96f197a6193158", size = 2149151, upload-time = "2026-04-03T17:02:07.281Z" },
+    { url = "https://files.pythonhosted.org/packages/05/52/9f1a62feab6ed368aff068524ff414f26a6daebc7361861035ae00b05530/sqlalchemy-2.0.49-cp314-cp314t-win_amd64.whl", hash = "sha256:275424295f4256fd301744b8f335cff367825d270f155d522b30c7bf49903ee7", size = 2184178, upload-time = "2026-04-03T17:02:08.623Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/30/8519fdde58a7bdf155b714359791ad1dc018b47d60269d5d160d311fdc36/sqlalchemy-2.0.49-py3-none-any.whl", hash = "sha256:ec44cfa7ef1a728e88ad41674de50f6db8cfdb3e2af84af86e0041aaf02d43d0", size = 1942158, upload-time = "2026-04-03T16:53:44.135Z" },
+]
+
+[package.optional-dependencies]
+asyncio = [
+    { name = "greenlet" },
+]
+
 [[package]]
 name = "sqlite-vec"
 version = "0.1.6"
diff --git a/backend/uv.toml b/backend/uv.toml
new file mode 100644
index 000000000..7884c96f1
--- /dev/null
+++ b/backend/uv.toml
@@ -0,0 +1 @@
+index-url = "https://pypi.org/simple"
diff --git a/config.example.yaml b/config.example.yaml
index b9f7a9632..cdc690f33 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -788,42 +788,79 @@ agents_api:
 # ============================================================================
 # Allow the agent to autonomously create and improve skills in skills/custom/.
 skill_evolution:
-  enabled: false                # Set to true to allow agent-managed writes under skills/custom
-  moderation_model_name: null  # Model for LLM-based security scanning (null = use default model)
+  enabled: false # Set to true to allow agent-managed writes under skills/custom
+  moderation_model_name: null # Model for LLM-based security scanning (null = use default model)
 
 # ============================================================================
-# Checkpointer Configuration
+# Checkpointer Configuration (DEPRECATED — use `database` instead)
 # ============================================================================
-# Configure state persistence for the embedded DeerFlowClient.
-# The LangGraph Server manages its own state persistence separately
-# via the server infrastructure (this setting does not affect it).
+# Legacy standalone checkpointer config. Kept for backward compatibility.
+# Prefer the unified `database` section below, which drives BOTH the
+# LangGraph checkpointer AND DeerFlow application data (runs, feedback,
+# events) from a single backend setting.
 #
-# When configured, DeerFlowClient will automatically use this checkpointer,
-# enabling multi-turn conversations to persist across process restarts.
+# If both `checkpointer` and `database` are present, `checkpointer`
+# takes precedence for LangGraph state persistence only.
 #
-# Supported types:
-#   memory   - In-process only. State is lost when the process exits. (default)
-#   sqlite   - File-based SQLite persistence. Survives restarts.
-#              Requires: uv add langgraph-checkpoint-sqlite
-#   postgres - PostgreSQL persistence. Suitable for multi-process deployments.
-#              Requires: uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool
-#
-# Examples:
-#
-# In-memory (default when omitted — no persistence):
 # checkpointer:
-#   type: memory
+#   type: sqlite
+#   connection_string: checkpoints.db
 #
-# SQLite (file-based, single-process):
-checkpointer:
-  type: sqlite
-  connection_string: checkpoints.db
-#
-# PostgreSQL (multi-process, production):
 # checkpointer:
 #   type: postgres
 #   connection_string: postgresql://user:password@localhost:5432/deerflow
 
+# ============================================================================
+# Database
+# ============================================================================
+# Unified storage backend for LangGraph checkpointer and DeerFlow
+# application data (runs, threads metadata, feedback, etc.).
+#
+# backend: memory  -- No persistence, data lost on restart (default)
+# backend: sqlite  -- Single-node deployment, files in sqlite_dir
+# backend: postgres -- Production multi-node deployment
+#
+# SQLite mode uses a single deerflow.db file with WAL journal mode
+# for both checkpointer and application data.
+#
+# Postgres mode: put your connection URL in .env as DATABASE_URL,
+# then reference it here with $DATABASE_URL.
+# Install the driver first:
+#   Local:  uv sync --extra postgres
+#   Docker: UV_EXTRAS=postgres docker compose build
+#
+# NOTE: When both `checkpointer` and `database` are configured,
+# `checkpointer` takes precedence for LangGraph state persistence.
+# If you use `database`, you can remove the `checkpointer` section.
+# database:
+#   backend: sqlite
+#   sqlite_dir: .deer-flow/data
+#
+# database:
+#   backend: postgres
+#   postgres_url: $DATABASE_URL
+database:
+  backend: sqlite
+  sqlite_dir: .deer-flow/data
+
+# ============================================================================
+# Run Events Configuration
+# ============================================================================
+# Storage backend for run events (messages + execution traces).
+#
+# backend: memory  -- No persistence, data lost on restart (default)
+# backend: db      -- SQL database via ORM, full query capability (production)
+# backend: jsonl   -- Append-only JSONL files (lightweight single-node persistence)
+#
+# run_events:
+#   backend: memory
+#   max_trace_content: 10240    # Truncation threshold for trace content (db backend, bytes)
+#   track_token_usage: true     # Accumulate token counts to RunRow
+run_events:
+  backend: memory
+  max_trace_content: 10240
+  track_token_usage: true
+
 # ============================================================================
 # IM Channels Configuration
 # ============================================================================
diff --git a/deer-flow.code-workspace b/deer-flow.code-workspace
index ef2863302..a4f4cb240 100644
--- a/deer-flow.code-workspace
+++ b/deer-flow.code-workspace
@@ -5,7 +5,7 @@
     }
   ],
   "settings": {
-    "typescript.tsdk": "frontend/node_modules/typescript/lib",
+    "js/ts.tsdk.path": "frontend/node_modules/typescript/lib",
     "python-envs.pythonProjects": [
       {
         "path": "backend",
@@ -44,4 +44,4 @@
       }
     ]
   }
-}
+}
\ No newline at end of file
diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml
index 38337c7df..31cb673da 100644
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -73,6 +73,7 @@ services:
         APT_MIRROR: ${APT_MIRROR:-}
         UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
         UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
+        UV_EXTRAS: ${UV_EXTRAS:-}
     container_name: deer-flow-gateway
     command: sh -c "cd backend && PYTHONPATH=. uv run uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 --workers ${GATEWAY_WORKERS:-4}"
     volumes:
@@ -126,6 +127,7 @@ services:
         APT_MIRROR: ${APT_MIRROR:-}
         UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20}
         UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple}
+        UV_EXTRAS: ${UV_EXTRAS:-}
     container_name: deer-flow-langgraph
     command: sh -c 'cd /app/backend && args="--no-browser --no-reload --host 0.0.0.0 --port 2024 --n-jobs-per-worker $${LANGGRAPH_JOBS_PER_WORKER:-10}" && if [ "$${LANGGRAPH_ALLOW_BLOCKING:-0}" = "1" ]; then args="$$args --allow-blocking"; fi && uv run langgraph dev $$args'
     volumes:
diff --git a/docker/nginx/nginx.local.conf b/docker/nginx/nginx.local.conf
index e79508831..e5a2bef3d 100644
--- a/docker/nginx/nginx.local.conf
+++ b/docker/nginx/nginx.local.conf
@@ -218,6 +218,25 @@ http {
             proxy_set_header X-Forwarded-Proto $scheme;
         }
 
+        # Catch-all for any /api/* prefix not matched by a more specific block above.
+        # Covers the auth module (/api/v1/auth/login, /me, /change-password, ...),
+        # plus feedback / runs / token-usage routes that 2.0-rc added without
+        # updating this nginx config. Longest-prefix matching ensures the explicit
+        # blocks above (/api/models, /api/threads regex, /api/langgraph/, ...) still
+        # win for their paths — only truly unmatched /api/* requests land here.
+        location /api/ {
+            proxy_pass http://gateway;
+            proxy_http_version 1.1;
+            proxy_set_header Host $host;
+            proxy_set_header X-Real-IP $remote_addr;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+
+            # Auth endpoints set HttpOnly cookies — make sure nginx doesn't
+            # strip the Set-Cookie header from upstream responses.
+            proxy_pass_header Set-Cookie;
+        }
+
         # All other requests go to frontend
         location / {
             proxy_pass http://frontend;
@@ -232,6 +251,23 @@ http {
             proxy_set_header Connection 'upgrade';
             proxy_cache_bypass $http_upgrade;
 
+            # Disable response buffering for the frontend. Without this,
+            # nginx tries to spool large upstream responses (e.g. Next.js
+            # static chunks) into ``proxy_temp_path``, which defaults to
+            # the system-owned ``/var/lib/nginx/proxy`` and fails with
+            # ``[crit] open() ... failed (13: Permission denied)`` when
+            # nginx is launched as a non-root user (every dev machine
+            # except production root containers). The symptom on the
+            # client side is ``ERR_INCOMPLETE_CHUNKED_ENCODING`` and
+            # ``ChunkLoadError`` partway through page hydration.
+            #
+            # Streaming the response straight through avoids the
+            # temp-file path entirely. The frontend already sets its
+            # own cache headers, so we don't lose anything from
+            # disabling nginx-side buffering.
+            proxy_buffering off;
+            proxy_request_buffering off;
+
             # Timeouts
             proxy_connect_timeout 600s;
             proxy_send_timeout 600s;
diff --git a/docs/CONFIG_DESIGN.zh.md b/docs/CONFIG_DESIGN.zh.md
new file mode 100644
index 000000000..448bf2c2f
--- /dev/null
+++ b/docs/CONFIG_DESIGN.zh.md
@@ -0,0 +1,301 @@
+# DeerFlow 配置系统设计
+
+> 对应实现：[PR #2271](https://github.com/bytedance/deer-flow/pull/2271) · RFC [#1811](https://github.com/bytedance/deer-flow/issues/1811) · 归档 spec：[config-refactor-design](./plans/2026-04-12-config-refactor-design.md)
+
+## 1. 为什么要重构
+
+重构前的 `deerflow/config/` 有三个结构性问题，凑在一起就是"全局可变状态 + 副作用耦合"的经典反模式：
+
+| 问题 | 具体表现 |
+|------|----------|
+| 双重真相 | 每个 sub-config 同时是 `AppConfig` 字段**和**模块级全局（`_memory_config` / `_title_config` …）。consumer 不知道该信哪个 |
+| 副作用耦合 | `AppConfig.from_file()` 顺便 mutate 8 个 sub-module 的 globals（通过 `load_*_from_dict()`） |
+| 隔离不完整 | 原有的 `ContextVar` 只罩住 `AppConfig` 本体，8 个 sub-config globals 漏在外面 |
+
+从类型论视角看：config 本应是一个**纯值对象（value object）**——构造一次、不变、可复制——但上面这套设计让它变成了"带全局状态的活对象"，于是 test mutation、async 边界、热更新都会互相污染。
+
+## 2. 核心设计原则
+
+> **Config is a value object, not live shared state.**
+> 构造一次，不可变，没有 reload。新 config = 新对象 + 重建 agent。
+
+这一条原则推导出后面所有决策：
+
+- 全部 config model `frozen=True` → 非法状态不可表示
+- `from_file()` 是纯函数 → 无副作用
+- 没有 "热加载"语义 → 改变配置等于"拿到新对象"，由调用方决定要不要换进程全局
+
+## 3. 四层分层
+
+```mermaid
+graph TB
+    subgraph L1 ["第 1 层 数据模型 — 冻结的 ADT"]
+        direction LR
+        AppConfig["AppConfig frozen=True"]
+        Sub["MemoryConfig TitleConfig SummarizationConfig ... 全部 frozen"]
+        AppConfig --> Sub
+    end
+
+    subgraph L2 ["第 2 层 Lifecycle — AppConfig.current"]
+        direction LR
+        Override["_override ContextVar per-context"]
+        Global["_global ClassVar process-singleton"]
+        Auto["auto-load from file with warning"]
+        Override --> Global
+        Global --> Auto
+    end
+
+    subgraph L3 ["第 3 层 Per-invocation context — DeerFlowContext"]
+        direction LR
+        Ctx["frozen dataclass app_config thread_id agent_name"]
+        Resolve["resolve_context legacy bridge"]
+        Ctx --> Resolve
+    end
+
+    subgraph L4 ["第 4 层 访问模式 — 按 caller 类型分流"]
+        direction LR
+        Typed["typed middleware runtime.context.app_config.xxx"]
+        Legacy["dict-legacy resolve_context runtime"]
+        NonAgent["非 agent 路径 AppConfig.current"]
+    end
+
+    L1 --> L2
+    L2 --> L3
+    L3 --> L4
+
+    classDef morandiBlue fill:#B5C4D1,stroke:#6A7A8C,color:#2E3A47
+    classDef morandiGreen fill:#C4D1B5,stroke:#7A8C6A,color:#2E3A47
+    classDef morandiPurple fill:#C9BED1,stroke:#7E6A8C,color:#2E3A47
+    classDef morandiGrey fill:#CFCFCF,stroke:#7A7A7A,color:#2E3A47
+    class L1 morandiBlue
+    class L2 morandiGreen
+    class L3 morandiPurple
+    class L4 morandiGrey
+```
+
+### 3.1 第 1 层：冻结的 ADT
+
+所有 config model 都是 Pydantic `frozen=True`。
+
+```python
+class MemoryConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+    enabled: bool = True
+    storage_path: str | None = None
+    ...
+
+class AppConfig(BaseModel):
+    model_config = ConfigDict(extra="allow", frozen=True)
+    memory: MemoryConfig
+    title: TitleConfig
+    ...
+```
+
+改 config 用 copy-on-write：
+
+```python
+new_config = config.model_copy(update={"memory": new_memory_config})
+```
+
+**从类型论视角**：这就是个 product type（record），所有字段组合起来才是一个完整的 `AppConfig`。冻结意味着 `AppConfig` 是**指称透明**的——同样的输入永远拿到同样的对象。
+
+### 3.2 第 2 层：Lifecycle — `AppConfig.current()`
+
+这层是整个设计最值得讲的一块。它不是一个简单的单 `ContextVar`，而是**三层 fallback**：
+
+```python
+class AppConfig(BaseModel):
+    ...
+
+    # 进程级单例。GIL 下原子指针交换，无需锁
+    _global: ClassVar[AppConfig | None] = None
+
+    # Per-context override，用于测试隔离和多 client
+    _override: ClassVar[ContextVar[AppConfig]] = ContextVar("deerflow_app_config_override")
+
+    @classmethod
+    def init(cls, config: AppConfig) -> None:
+        """设置进程全局。对所有后续 async task 可见"""
+        cls._global = config
+
+    @classmethod
+    def set_override(cls, config: AppConfig) -> Token[AppConfig]:
+        """Per-context 覆盖。返回 Token 给 reset_override()"""
+        return cls._override.set(config)
+
+    @classmethod
+    def reset_override(cls, token: Token[AppConfig]) -> None:
+        cls._override.reset(token)
+
+    @classmethod
+    def current(cls) -> AppConfig:
+        """优先级：per-context override > 进程全局 > 自动从文件加载（warning）"""
+        try:
+            return cls._override.get()
+        except LookupError:
+            pass
+        if cls._global is not None:
+            return cls._global
+        logger.warning("AppConfig.current() called before init(); auto-loading from file. ...")
+        config = cls.from_file()
+        cls._global = config
+        return config
+```
+
+**为什么是三层，不是一层？**
+
+| 原因 | 解释 |
+|------|------|
+| 单 ContextVar 行不通 | Gateway 收到 `PUT /mcp/config` reload config，下一个请求在**全新的 async context** 里跑——ContextVar 的值传不过去。只能用进程级变量 |
+| 保留 ContextVar override | 测试需要 per-test scope config，`Token`-based reset 保证干净恢复。多 client 场景如果真出现也能靠它 |
+| Auto-load fallback | 有些 call site 历史上没调 `init()`（内部脚本、import-time 触发的测试）。加 warning 保证信号不丢，但不硬崩 |
+
+**Scala 视角的映射**：
+
+- `_global` = 进程级 `var`，脏，但别无选择
+- `_override` = `Option[ContextVar]` 形式的 reader monad 层
+- `current()` = fallback chain `override.orElse(global).orElse(autoLoad)`，和 `Option.orElse` 思路一致
+
+**为什么 `_global` 没加锁？**
+
+因为读和写都是单个指针赋值（assignment of class attribute），在 CPython 的 GIL 下是原子的。如果将来改成 read-modify-write（比如 "如果没 init 就 init 成 X"），再加 `threading.Lock`。现在不加是因为——不需要。
+
+### 3.3 第 3 层：`DeerFlowContext` — per-invocation typed context
+
+```python
+# deerflow/config/deer_flow_context.py
+@dataclass(frozen=True)
+class DeerFlowContext:
+    """Typed, immutable, per-invocation context injected via LangGraph Runtime"""
+    app_config: AppConfig
+    thread_id: str
+    agent_name: str | None = None
+```
+
+为什么不把 `thread_id` 也放进 `AppConfig`？
+
+- `AppConfig` 是**配置**——进程启动时确定，所有请求共享
+- `thread_id` 是**每次调用变的运行时身份**——必须 per-invocation
+
+两者是不同的 category，混在一起就是把静态配置和动态 identity 耦合。
+
+**注入路径**：
+
+```python
+# Gateway worker（主路径）
+deer_flow_context = DeerFlowContext(
+    app_config=AppConfig.current(),
+    thread_id=thread_id,
+)
+agent.astream(input, config=config, context=deer_flow_context)
+
+# DeerFlowClient
+AppConfig.init(AppConfig.from_file(config_path))
+context = DeerFlowContext(app_config=AppConfig.current(), thread_id=thread_id)
+agent.stream(input, config=config, context=context)
+```
+
+LangGraph 的 `Runtime` 会把 `context=...` 的值注入到 `Runtime[DeerFlowContext].context` 里。Middleware 拿到的就是 typed 的 `DeerFlowContext`。
+
+**不进 context 的东西**：`sandbox_id`——它是 mid-execution 才 acquire 的**可变运行时状态**，正确的归宿是 `ThreadState.sandbox`（state channel，有 reducer），不是 context。原先 `sandbox/tools.py` 里 3 处 `runtime.context["sandbox_id"] = ...` 的写法全部删除。
+
+### 3.4 第 4 层：访问模式按 caller 类型分流
+
+三种 caller，三种模式：
+
+| Caller 类型 | 访问模式 | 例子 |
+|-------------|----------|------|
+| Typed middleware（签名写 `Runtime[DeerFlowContext]`） | `runtime.context.app_config.xxx` 直读，无包装 | `memory_middleware` / `title_middleware` / `thread_data_middleware` 等 |
+| 可能遇到 dict context 的 tool | `resolve_context(runtime).xxx` | `sandbox/tools.py`（dict-legacy 路径）/ `task_tool.py`（bash subagent gate） |
+| 非 agent 路径（Gateway router、CLI、factory） | `AppConfig.current().xxx` | `app/gateway/routers/*` / `reset_admin.py` / `models/factory.py` |
+
+**关键简化**（commit `a934a822`）：原本所有 middleware 都走 `resolve_context()`，后来发现既然签名已经是 `Runtime[DeerFlowContext]`，包装就是冗余防御，直接 `runtime.context.app_config.xxx` 就行。同时也把 `title_middleware` 里每个 helper 的 `title_config=None` fallback 都删掉了——**required parameter 不给 default**，让类型系统强制 caller 传对。
+
+这对应 Scala / FP 的两个信条：
+- **让非法状态不可表示**（`Option[TitleConfig]` 改成 `TitleConfig` required）
+- **Let-it-crash**（config 解析失败是真 bug，surface 出来比吞掉退化更好）
+
+## 4. `resolve_context()` 的三种分支
+
+`resolve_context()` 自己还在，处理三种 runtime.context 形状：
+
+```python
+def resolve_context(runtime: Any) -> DeerFlowContext:
+    ctx = getattr(runtime, "context", None)
+
+    # 1. typed 路径（Gateway、Client）— 直接返回
+    if isinstance(ctx, DeerFlowContext):
+        return ctx
+
+    # 2. dict-legacy 路径（老测试、第三方 invoke）— 桥接
+    if isinstance(ctx, dict):
+        thread_id = ctx.get("thread_id", "")
+        if not thread_id:
+            logger.warning("...empty thread_id...")
+        return DeerFlowContext(
+            app_config=AppConfig.current(),
+            thread_id=thread_id,
+            agent_name=ctx.get("agent_name"),
+        )
+
+    # 3. 完全没 context — fall back 到 LangGraph configurable
+    cfg = get_config().get("configurable", {})
+    return DeerFlowContext(
+        app_config=AppConfig.current(),
+        thread_id=cfg.get("thread_id", ""),
+        agent_name=cfg.get("agent_name"),
+    )
+```
+
+空 thread_id 会 warn，不会硬崩——在这里 warn 比 crash 合理，因为 `thread_id` 缺失只影响文件路径（落到空字符串目录），不会让整个 agent 跑崩。
+
+## 5. Gateway config 热更新流程
+
+历史上 Gateway 用 `reload_*_config()` 带 mtime 检测。现在改成：
+
+```
+写 extensions_config.json → AppConfig.init(AppConfig.from_file()) → 下一个请求看到新值
+```
+
+**没有**：mtime 检测、自动刷新、`reload_*()` 函数。
+
+哲学很简单：**结构性变化（模型、tools、middleware 链）需要重建 agent；运行时变化（`memory.enabled` 这种 flag）下一次 invocation 从 `AppConfig.current()` 取值就自动生效**。不需要给 config 做"活对象"语义。
+
+## 6. 从原计划的分歧
+
+三处关键分歧（详情见 [归档 spec §7](./plans/2026-04-12-config-refactor-design.md#7-divergence-from-original-plan)）：
+
+| 分歧 | 原计划 | Shipped | 原因 |
+|------|--------|---------|------|
+| Lifecycle 存储 | 单 ContextVar，`ConfigNotInitializedError` 硬崩 | 3 层 fallback，auto-load + warning | ContextVar 跨 async 边界传不过去 |
+| 模块位置 | 新建 `context.py` | Lifecycle 放在 `AppConfig` 自身 classmethod | 减一层模块耦合 |
+| Middleware 访问 | 处处 `resolve_context()` | typed middleware 直读 `runtime.context.xxx` | 类型收紧后防御性包装是 noise |
+
+## 7. 从 Scala / Actor 视角的几点观察
+
+- **`AppConfig` 就是个 case class / ADT**。`frozen=True` 相当于 Scala 的 final case class：构造完就不动。改动靠 `model_copy(update=…)`，对应 Scala 的 `copy(…)`。
+- **`DeerFlowContext` 是 typed reader**。Middleware 接收 `Runtime[DeerFlowContext]`，本质是 `Kleisli[DeerFlowContext, State, Result]`——依赖注入，类型化。比 `RunnableConfig.configurable: dict[str, Any]` 强太多。
+- **`resolve_context()` 是适配层**。存在是因为有三种不同形状的上游输入；在纯 FP 眼里这是个 `X => DeerFlowContext` 的 total function，通过 pattern match 三种 case 把世界收敛回 typed 的那条路径。
+- **Let-it-crash 的体现**：commit `a934a822` 干掉 middleware 里 `try/except resolve_context(...)`，干掉 `TitleConfig | None` 的 defensive fallback。Config 解析失败就让它抛出去，别吞成"degraded mode"——actor supervision 会处理，吞错反而藏 bug。
+- **进程 global 的妥协**：`_global: ClassVar` 是这套设计里唯一违背纯值的地方。但在 Python async + HTTP server 的语境里，你没别的办法跨 request 把"新 config"传给所有 task。承认妥协、限制范围（只在 lifecycle 层一个变量）、周边全部 immutable——这就是工程意义上的"合理妥协"。
+
+## 8. Cheat sheet
+
+想访问 config，怎么办？按你写代码的位置看：
+
+| 我在写什么 | 用什么 |
+|------------|--------|
+| Typed middleware（签名 `Runtime[DeerFlowContext]`） | `runtime.context.app_config.xxx` |
+| Typed tool（`ToolRuntime[DeerFlowContext]`） | `runtime.context.xxx` |
+| 可能被老调用方以 dict context 调到的 tool | `resolve_context(runtime).xxx` |
+| Gateway router、CLI、factory、测试 helper | `AppConfig.current().xxx` |
+| 启动时初始化 | `AppConfig.init(AppConfig.from_file(path))` |
+| 测试里想临时改 config | `token = AppConfig.set_override(cfg)` / `AppConfig.reset_override(token)` |
+| Gateway 写完新 `extensions_config.json` 之后 | `AppConfig.init(AppConfig.from_file())`，然后让 agent 重建（如果结构变了） |
+
+不要：
+- ~~`get_memory_config()` / `get_title_config()` 等旧 getter~~（已删）
+- ~~`reload_app_config()` / `reset_app_config()`~~（已删）
+- ~~`_memory_config` 等模块级 global~~（已删）
+- ~~`runtime.context["sandbox_id"] = ...`~~（走 `runtime.state["sandbox"]`）
+- ~~防御性 `try/except resolve_context(...)`~~（让它崩）
diff --git a/docs/plans/2026-04-12-config-refactor-design.md b/docs/plans/2026-04-12-config-refactor-design.md
new file mode 100644
index 000000000..3a56866b4
--- /dev/null
+++ b/docs/plans/2026-04-12-config-refactor-design.md
@@ -0,0 +1,414 @@
+# Design: Eliminate Global Mutable State in Configuration System
+
+> Implements [#1811](https://github.com/bytedance/deer-flow/issues/1811) · Tracked in [#2151](https://github.com/bytedance/deer-flow/issues/2151)
+>
+> **Phase 1 (shipped):** [PR #2271](https://github.com/bytedance/deer-flow/pull/2271) — frozen config tree, purify `from_file()`, 3-tier `AppConfig.current()` lifecycle, `DeerFlowContext` for agent execution path.
+>
+> **Phase 2 (proposed):** eliminate the remaining implicit-state surface (`_global` / `_override` / `current()`) via pure explicit parameter passing. See §8.
+
+## Problem
+
+`deerflow/config/` had three structural issues:
+
+1. **Dual source of truth** — each sub-config existed both as an `AppConfig` field and a module-level global (e.g. `_memory_config`). Consumers didn't know which to trust.
+2. **Side-effect coupling** — `AppConfig.from_file()` silently mutated 8 sub-module globals via `load_*_from_dict()` calls.
+3. **Incomplete isolation** — `ContextVar` only scoped `AppConfig`, not the 8 sub-config globals.
+
+## Design Principle
+
+**Config is a value object, not live shared state.** Constructed once, immutable, no reload. New config = new object + rebuild agent.
+
+## Solution
+
+### 1. Frozen AppConfig (full tree)
+
+All config models set `frozen=True`, including `DatabaseConfig` and `RunEventsConfig` (added late in review). No mutation after construction.
+
+```python
+class MemoryConfig(BaseModel):
+    model_config = ConfigDict(frozen=True)
+
+class AppConfig(BaseModel):
+    model_config = ConfigDict(extra="allow", frozen=True)
+    memory: MemoryConfig
+    title: TitleConfig
+    ...
+```
+
+Changes use copy-on-write: `config.model_copy(update={...})`.
+
+### 2. Pure `from_file()`
+
+`AppConfig.from_file()` is a pure function — returns a frozen object, no side effects. All 8 `load_*_from_dict()` calls and their imports were removed.
+
+### 3. Deleted sub-module globals
+
+Every sub-config module's global state was deleted:
+
+| Deleted | Files |
+|---------|-------|
+| `_memory_config`, `get_memory_config()`, `set_memory_config()`, `load_memory_config_from_dict()` | `memory_config.py` |
+| `_title_config`, `get_title_config()`, `set_title_config()`, `load_title_config_from_dict()` | `title_config.py` |
+| Same pattern | `summarization_config.py`, `subagents_config.py`, `guardrails_config.py`, `tool_search_config.py`, `checkpointer_config.py`, `stream_bridge_config.py`, `acp_config.py` |
+| `_extensions_config`, `reload_extensions_config()`, `reset_extensions_config()`, `set_extensions_config()` | `extensions_config.py` |
+| `reload_app_config()`, `reset_app_config()`, `set_app_config()`, mtime detection, `push/pop_current_app_config()` | `app_config.py` |
+
+Consumers migrated from `get_memory_config()` → `AppConfig.current().memory` (~100 call-sites).
+
+### 4. Lifecycle: 3-tier `AppConfig.current()`
+
+The original plan called for a single `ContextVar` with hard-fail on uninitialized access. The shipped lifecycle is a **3-tier fallback** attached to `AppConfig` itself (no separate `context.py` module). The divergence is explained in §7.
+
+```python
+# app_config.py
+class AppConfig(BaseModel):
+    ...
+
+    # Process-global singleton. Atomic pointer swap under the GIL,
+    # so no lock is needed for current read/write patterns.
+    _global: ClassVar[AppConfig | None] = None
+
+    # Per-context override (tests, multi-client scenarios).
+    _override: ClassVar[ContextVar[AppConfig]] = ContextVar("deerflow_app_config_override")
+
+    @classmethod
+    def init(cls, config: AppConfig) -> None:
+        """Set the process-global. Visible to all subsequent async tasks."""
+        cls._global = config
+
+    @classmethod
+    def set_override(cls, config: AppConfig) -> Token[AppConfig]:
+        """Per-context override. Returns Token for reset_override()."""
+        return cls._override.set(config)
+
+    @classmethod
+    def reset_override(cls, token: Token[AppConfig]) -> None:
+        cls._override.reset(token)
+
+    @classmethod
+    def current(cls) -> AppConfig:
+        """Priority: per-context override > process-global > auto-load from file."""
+        try:
+            return cls._override.get()
+        except LookupError:
+            pass
+        if cls._global is not None:
+            return cls._global
+        logger.warning(
+            "AppConfig.current() called before init(); auto-loading from file. "
+            "Call AppConfig.init() at process startup to surface config errors early."
+        )
+        config = cls.from_file()
+        cls._global = config
+        return config
+```
+
+**Why three tiers and not one:**
+
+- **Process-global** is required because `ContextVar` doesn't propagate config updates across async request boundaries. Gateway receives a `PUT /mcp/config` on one request, reloads config, and the next request — in a fresh async context — must see the new value. A plain class variable (`_global`) does this; a `ContextVar` does not.
+- **Per-context override** is retained for test isolation and multi-client scenarios. A test can scope its config without mutating the process singleton. `reset_override()` restores the previous state deterministically via `Token`.
+- **Auto-load fallback** is a backward-compatibility escape hatch with a warning. Call sites that skipped explicit `init()` (legacy or test) still work, but the warning surfaces the miss.
+
+### 5. Per-invocation context: `DeerFlowContext`
+
+Lives in `deerflow/config/deer_flow_context.py` (not `context.py` as originally planned — the name was reserved to avoid implying a lifecycle module).
+
+```python
+@dataclass(frozen=True)
+class DeerFlowContext:
+    """Typed, immutable, per-invocation context injected via LangGraph Runtime."""
+    app_config: AppConfig
+    thread_id: str
+    agent_name: str | None = None
+```
+
+**Fields:**
+
+| Field | Type | Source | Mutability |
+|-------|------|--------|-----------|
+| `app_config` | `AppConfig` | `AppConfig.current()` at run start | Immutable per-run |
+| `thread_id` | `str` | Caller-provided | Immutable per-run |
+| `agent_name` | `str \| None` | Caller-provided (bootstrap only) | Immutable per-run |
+
+**Not in context:** `sandbox_id` is mutable runtime state (lazy-acquired mid-execution). It flows through `ThreadState.sandbox` (state channel), not context. All 3 `runtime.context["sandbox_id"] = ...` writes in `sandbox/tools.py` were removed; `SandboxMiddleware.after_agent` reads from `state["sandbox"]` only.
+
+**Construction per entry point:**
+
+```python
+# Gateway runtime (worker.py) — primary path
+deer_flow_context = DeerFlowContext(
+    app_config=AppConfig.current(),
+    thread_id=thread_id,
+)
+agent.astream(input, config=config, context=deer_flow_context)
+
+# DeerFlowClient (client.py)
+AppConfig.init(AppConfig.from_file(config_path))
+context = DeerFlowContext(app_config=AppConfig.current(), thread_id=thread_id)
+agent.stream(input, config=config, context=context)
+
+# LangGraph Server — legacy path, context=None or dict, fallback via resolve_context()
+```
+
+### 6. Access pattern by caller type
+
+The shipped code stratifies callers by what `runtime.context` type they see, and tightened middleware access over time:
+
+| Caller type | Access pattern | Examples |
+|-------------|---------------|----------|
+| Typed middleware (declares `Runtime[DeerFlowContext]`) | `runtime.context.app_config.xxx` — direct field access, no wrapper | `memory_middleware`, `title_middleware`, `thread_data_middleware`, `uploads_middleware`, `loop_detection_middleware` |
+| Tools that may see legacy dict context | `resolve_context(runtime).xxx` | `sandbox/tools.py` (bash-guard gate, sandbox config), `task_tool.py` (bash subagent gate) |
+| Tools with typed runtime | `runtime.context.xxx` directly | `present_file_tool.py`, `setup_agent_tool.py`, `skill_manage_tool.py` |
+| Non-agent paths (Gateway routers, CLI, factories) | `AppConfig.current().xxx` | `app/gateway/routers/*`, `reset_admin.py`, `models/factory.py` |
+
+**Middleware hardening** (late commit `a934a822`): the original plan had middlewares call `resolve_context(runtime)` everywhere. In practice, once the middleware signature was typed as `Runtime[DeerFlowContext]`, the wrapper became defensive noise. The commit removed:
+- `try/except` wrappers around `resolve_context(...)` in middlewares and sandbox tools
+- Optional `title_config=None` fallback on every `_build_title_prompt` / `_format_for_title_model` helper; they now take `TitleConfig` as a **required parameter**
+- Ad-hoc `get_config()` fallback chains in `memory_middleware`
+
+Dropping the swallowed-exception layer means config-resolution bugs surface as errors instead of silently degrading — aligning with let-it-crash.
+
+`resolve_context()` itself still exists and handles three cases:
+
+```python
+def resolve_context(runtime: Any) -> DeerFlowContext:
+    ctx = getattr(runtime, "context", None)
+    if isinstance(ctx, DeerFlowContext):
+        return ctx                        # typed path (Gateway, Client)
+    if isinstance(ctx, dict):
+        return DeerFlowContext(           # legacy dict path (with warning if empty thread_id)
+            app_config=AppConfig.current(),
+            thread_id=ctx.get("thread_id", ""),
+            agent_name=ctx.get("agent_name"),
+        )
+    # Final fallback: LangGraph configurable (e.g. LangGraph Server)
+    cfg = get_config().get("configurable", {})
+    return DeerFlowContext(
+        app_config=AppConfig.current(),
+        thread_id=cfg.get("thread_id", ""),
+        agent_name=cfg.get("agent_name"),
+    )
+```
+
+### 7. Divergence from original plan
+
+Two material divergences from the original design, both driven by implementation feedback:
+
+**7.1 Lifecycle: `ContextVar` → process-global + `ContextVar` override**
+
+*Original:* single `ContextVar` in a new `context.py` module. `get_app_config()` raises `ConfigNotInitializedError` if unset.
+
+*Shipped:* process-global `AppConfig._global` (primary) + `ContextVar` override (scoped) + auto-load with warning (fallback).
+
+*Why:* a `ContextVar` set by Gateway startup is not visible to subsequent requests that spawn fresh async contexts. `PUT /mcp/config` must update config such that the next incoming request sees the new value in *its* async task — this requires process-wide state. ContextVar is retained for test isolation (`reset_override()` works cleanly per test via `Token`) and for per-client scoping if ever needed.
+
+The `ConfigNotInitializedError` was replaced by a warning + auto-load. The hard error caught more legitimate bugs but also broke call sites that historically worked without explicit init (internal scripts, test fixtures during import-time). The warning preserves the signal without breaking backward compatibility; `backend/tests/conftest.py` now has an autouse fixture that sets `_global` to a minimal `AppConfig` so tests never hit auto-load.
+
+**7.2 Module name: `context.py` → lifecycle on `AppConfig`, `deer_flow_context.py` for the invocation context**
+
+*Original:* lifecycle and `DeerFlowContext` both in `deerflow/config/context.py`.
+
+*Shipped:* lifecycle is classmethods on `AppConfig` itself (`init`, `current`, `set_override`, `reset_override`). `DeerFlowContext` and `resolve_context()` live in `deerflow/config/deer_flow_context.py`.
+
+*Why:* the lifecycle operates on `AppConfig` directly — putting it on the class removes one level of module coupling. The per-invocation context is conceptually separate (it's agent-execution plumbing, not config lifecycle) so it got its own file with a distinguishing name.
+
+**7.3 Client lifecycle: `init() + set_override()` → `init()` only**
+
+*Original (never finalized):* `DeerFlowClient.__init__` called both `init()` (process-global) and `set_override()` so two clients with different configs wouldn't clobber each other.
+
+*Shipped:* `init()` only.
+
+*Why (commit `a934a822`):* `set_override()` leaked overrides across test boundaries because the `ContextVar` wasn't reset between client instances. Single-client is the common case, and tests use the autouse fixture for isolation. Multi-client scoping can be added back with explicit `set_override()` if the need arises.
+
+## What doesn't change
+
+- `config.yaml` schema
+- `extensions_config.json` loading
+- External API behavior (Gateway, DeerFlowClient)
+
+## Migration scope (Phase 1, actual)
+
+- ~100 call-sites: `get_*_config()` → `AppConfig.current().xxx`
+- 6 runtime-path migrations: middlewares + sandbox tools read from `runtime.context` or `resolve_context()`
+- 3 deleted sandbox_id writes in `sandbox/tools.py`
+- ~100 test locations updated; `conftest.py` autouse fixture added
+- New tests: `test_config_frozen.py`, `test_deer_flow_context.py`, `test_app_config_reload.py`
+- Gateway update flow: `reload_*` → `AppConfig.init(AppConfig.from_file())`
+- Dependency: langgraph `Runtime` / `ToolRuntime` (already available at target version)
+
+## 8. Phase 2: pure explicit parameter passing
+
+Phase 1 shipped a working 3-tier `AppConfig.current()` lifecycle. The remaining implicit-state surface is:
+
+- `AppConfig._global: ClassVar` — process-level singleton
+- `AppConfig._override: ClassVar[ContextVar]` — per-context override
+- `AppConfig.current()` — fallback-chain reader with auto-load warning
+
+Phase 2 proposes removing all three. `AppConfig` reduces to a pure Pydantic value object with `from_file()` as its only factory. All consumers receive `AppConfig` as an explicit parameter, either through a typed constructor, a function signature, or LangGraph `Runtime[DeerFlowContext]`.
+
+### 8.1 Motivation
+
+Phase 1 addressed the **data side** of the problem: config is now a frozen ADT, sub-module globals deleted, `from_file()` pure. The **access side** still relies on implicit ambient lookup:
+
+```python
+# Today (Phase 1 shipped):
+def _get_memory_prompt() -> str:
+    config = AppConfig.current().memory  # implicit global lookup
+    ...
+
+# Target (Phase 2):
+def _get_memory_prompt(config: MemoryConfig) -> str:  # explicit dependency
+    ...
+```
+
+Three concrete benefits:
+
+| Benefit | What it buys |
+|---------|-------------|
+| Referential transparency | A function's result depends only on its inputs. Testing becomes parameter substitution, no `patch.object(AppConfig, "current")` chains |
+| Dependency visibility | A function signature declares what config it needs. No "this deep helper secretly reads `.memory`" surprises |
+| True multi-config isolation | Two `DeerFlowClient` instances with different configs can run in the same process without any ambient shared state to contend over |
+
+The cost (Phase 1 wouldn't have made this smaller): ~97 production call sites + ~91 test mock sites need touching, plus signature changes for helpers that now accept `config` as a parameter.
+
+### 8.2 Non-agent call paths and their target APIs
+
+Phase 1 got the agent-execution path right (`runtime.context.app_config.xxx`). The unsolved paths split into four categories:
+
+**FastAPI Gateway** → `Depends(get_config)`
+
+```python
+# app/gateway/app.py — at startup
+app.state.config = AppConfig.from_file()
+
+# app/gateway/deps.py
+def get_config(request: Request) -> AppConfig:
+    return request.app.state.config
+
+# app/gateway/routers/models.py
+@router.get("/models")
+def list_models(config: AppConfig = Depends(get_config)):
+    ...
+
+# app/gateway/routers/mcp.py — config reload replaces AppConfig.init()
+@router.put("/config")
+def update_mcp(..., request: Request):
+    ...
+    request.app.state.config = AppConfig.from_file()
+```
+
+`app.state.config` is a FastAPI-owned attribute on the app object, not a module-level global. Scoped to the app's lifetime, only written at startup and config-reload.
+
+**`DeerFlowClient`** → constructor-captured config
+
+```python
+class DeerFlowClient:
+    def __init__(self, config_path: str | None = None, config: AppConfig | None = None):
+        self._config = config or AppConfig.from_file(config_path)
+
+    def chat(self, message: str, thread_id: str) -> str:
+        context = DeerFlowContext(app_config=self._config, thread_id=thread_id)
+        ...
+```
+
+Multiple `DeerFlowClient` instances are now first-class — each owns its config, nothing shared.
+
+**Agent construction (`make_lead_agent`, `_build_middlewares`, prompt helpers)** → threaded through
+
+```python
+def make_lead_agent(config: RunnableConfig, app_config: AppConfig):
+    middlewares = _build_middlewares(app_config, runtime_config=config)
+    ...
+
+def _build_middlewares(app_config: AppConfig, runtime_config: RunnableConfig):
+    if app_config.token_usage.enabled:
+        middlewares.append(TokenUsageMiddleware())
+    ...
+```
+
+Every helper that reads config is now on a function-signature chain from `make_lead_agent`.
+
+**Background threads (memory debounce Timer, queue consumers)** → closure-captured
+
+```python
+def MemoryQueue.add(self, conversation, user_id, config: MemoryConfig):
+    # capture config at enqueue time
+    def _flush():
+        self._updater.update(conversation, user_id, config)
+    self._timer = Timer(config.debounce_seconds, _flush)
+    self._timer.start()
+```
+
+The captured config lives in the closure, not in a contextvar the thread can't see.
+
+### 8.3 Target `AppConfig` shape
+
+```python
+class AppConfig(BaseModel):
+    model_config = ConfigDict(extra="allow", frozen=True)
+
+    log_level: str = "info"
+    memory: MemoryConfig = Field(default_factory=MemoryConfig)
+    ...  # same fields as Phase 1
+
+    @classmethod
+    def from_file(cls, config_path: str | None = None) -> Self:
+        """Pure factory. Reads file, returns frozen object. No side effects."""
+        ...
+
+    @classmethod
+    def resolve_config_path(cls, config_path: str | None = None) -> Path:
+        """Unchanged from Phase 1."""
+        ...
+
+    def get_model_config(self, name: str) -> ModelConfig | None:
+        """Unchanged."""
+        ...
+
+    # Removed:
+    # - _global: ClassVar
+    # - _override: ClassVar[ContextVar]
+    # - init(), set_override(), reset_override(), current()
+```
+
+### 8.4 `DeerFlowContext` and `resolve_context()` after Phase 2
+
+`DeerFlowContext` is unchanged — it's already Phase 2-compliant.
+
+`resolve_context()` simplifies: the "fall back to `AppConfig.current()`" branch goes away. The dict-context legacy path either constructs `DeerFlowContext` with an explicitly-passed `AppConfig` (fed by caller) or is deleted if no dict-context callers remain.
+
+```python
+def resolve_context(runtime: Any) -> DeerFlowContext:
+    ctx = getattr(runtime, "context", None)
+    if isinstance(ctx, DeerFlowContext):
+        return ctx
+    raise RuntimeError(
+        "runtime.context is not a DeerFlowContext. All callers must construct "
+        "and inject one explicitly; there is no global fallback."
+    )
+```
+
+Let-it-crash: if Phase 2 is done correctly, every caller constructs a typed context. If one doesn't, fail loudly.
+
+### 8.5 Trade-off acknowledgment
+
+The three cases where ambient lookup is genuinely tempting (and why we reject them):
+
+| Tempting case | Why ambient looks easier | Why we still reject it |
+|---------------|-------------------------|------------------------|
+| Deep helper in `memory/storage.py` needs `memory.storage_path` | Just threaded through 4 call layers | That's exactly the dependency chain you want visible. It's either there or it's hiding |
+| Community tool factory reading API keys from config | "Each tool factory doesn't want to take config" | Each tool factory literally needs the config. Passing it is the honest signature |
+| Test that wants to "override just one field globally" | `patch.object(AppConfig, "current")` is one line | Tests constructing their own `AppConfig` is one fixture — and that fixture becomes infrastructure for all future tests |
+
+The rejection is consistent: **an explicit parameter is strictly more honest than an implicit global lookup**, in every case.
+
+### 8.6 Scope
+
+- ~97 production call sites: `AppConfig.current()` → parameter
+- ~91 test mock sites: `patch.object(AppConfig, "current")` / `AppConfig._global = ...` → fixture injection
+- ~30 FastAPI endpoints gain `config: AppConfig = Depends(get_config)`
+- ~15 factory / helper functions gain `config: AppConfig` parameter
+- Delete from `app_config.py`: `_global`, `_override`, `init`, `current`, `set_override`, `reset_override`
+- Simplify `resolve_context()`: remove `AppConfig.current()` fallback
+
+Implementation plan: see [2026-04-12-config-refactor-plan.md §Phase 2](./2026-04-12-config-refactor-plan.md#phase-2-pure-explicit-parameter-passing).
diff --git a/docs/plans/2026-04-12-config-refactor-plan.md b/docs/plans/2026-04-12-config-refactor-plan.md
new file mode 100644
index 000000000..18a275b36
--- /dev/null
+++ b/docs/plans/2026-04-12-config-refactor-plan.md
@@ -0,0 +1,1208 @@
+# Config Refactor Implementation Plan — Shipped
+
+> **Status:** Shipped in [PR #2271](https://github.com/bytedance/deer-flow/pull/2271). All tasks complete. This document is an implementation log; for the shipped architecture see [design doc](./2026-04-12-config-refactor-design.md).
+>
+> **Goal:** Eliminate global mutable state in the configuration system — frozen `AppConfig`, pure `from_file()`, process-global + ContextVar-override lifecycle, `Runtime[DeerFlowContext]` propagation.
+>
+> **Tech Stack:** Pydantic v2 (`frozen=True`, `model_copy`), Python `contextvars.ContextVar` + `Token`, LangGraph `Runtime` / `ToolRuntime`.
+>
+> **Issues:** [#2151](https://github.com/bytedance/deer-flow/issues/2151) (implementation), [#1811](https://github.com/bytedance/deer-flow/issues/1811) (RFC)
+
+## Post-mortem — divergences from the original plan
+
+The implementation diverged from the original task-by-task plan in three places. The rationale lives in the design doc §7; here is the commit trail.
+
+| Divergence | Original plan | Shipped | Triggering commit |
+|------------|--------------|---------|-------------------|
+| Lifecycle storage | Single `ContextVar` in new `context.py`, raises `ConfigNotInitializedError` | 3-tier: `AppConfig._global` (process singleton) + `_override: ContextVar` + auto-load-with-warning fallback | `7a11e925` ("use process-global + ContextVar override"), refined in `4df595b0` |
+| Module / API shape | Top-level `get_app_config()` / `init_app_config()` in `context.py` | Classmethods on `AppConfig` (`current`, `init`, `set_override`, `reset_override`); `DeerFlowContext` + `resolve_context` in `deer_flow_context.py` | Same commits + `9040e49e` (call-site migration) |
+| Middleware access | `resolve_context(runtime)` in every middleware and tool | Typed middleware reads `runtime.context.xxx` directly; `resolve_context()` only in dict-legacy callers; defensive `try/except` wrappers removed | `a934a822` ("simplify runtime context access") |
+
+**Core insight:** ContextVar alone could not propagate config changes across Gateway request boundaries; process-global fixed that. The override ContextVar was kept for test/multi-client isolation. Hard-fail on uninitialized access (`ConfigNotInitializedError`) was dropped in favor of warning + auto-load to preserve backward compatibility, and tests use an autouse fixture in `backend/tests/conftest.py` to avoid the auto-load path.
+
+---
+
+## File Structure (shipped)
+
+### New files
+
+| File | Responsibility |
+|------|---------------|
+| `deerflow/config/deer_flow_context.py` | `DeerFlowContext` frozen dataclass + `resolve_context()` helper |
+
+The originally-planned `deerflow/config/context.py` was never created. Lifecycle (`init`, `current`, `set_override`, `reset_override`) is on `AppConfig` itself in `app_config.py`.
+
+### Modified files (config layer)
+
+| File | Change |
+|------|--------|
+| `deerflow/config/app_config.py` | `frozen=True`, purify `from_file()`, delete mtime/reload/reset/push/pop; add classmethods `init`/`current`/`set_override`/`reset_override` with `_global` ClassVar and `_override` ContextVar |
+| `deerflow/config/memory_config.py` | `frozen=True`, delete all globals and loader functions |
+| `deerflow/config/title_config.py` | Same pattern |
+| `deerflow/config/summarization_config.py` | Same pattern |
+| `deerflow/config/subagents_config.py` | Same pattern |
+| `deerflow/config/guardrails_config.py` | Same pattern (also delete `reset_guardrails_config`) |
+| `deerflow/config/tool_search_config.py` | Same pattern |
+| `deerflow/config/checkpointer_config.py` | Same pattern |
+| `deerflow/config/stream_bridge_config.py` | Same pattern |
+| `deerflow/config/acp_config.py` | Same pattern |
+| `deerflow/config/extensions_config.py` | `frozen=True`, delete globals (`_extensions_config`, `reload_extensions_config`, `reset_extensions_config`, `set_extensions_config`) |
+| `deerflow/config/database_config.py` | `frozen=True` (added in `4df595b0` review round) |
+| `deerflow/config/run_events_config.py` | `frozen=True` (same) |
+| `deerflow/config/tracing_config.py` | `frozen=True`, unchanged exports |
+| `deerflow/config/__init__.py` | Removed deleted getter exports; no new re-exports needed since API is now on `AppConfig` |
+
+### Modified files (production consumers)
+
+| File | Change |
+|------|--------|
+| `deerflow/agents/lead_agent/agent.py` | `get_summarization_config()` → `AppConfig.current().summarization` |
+| `deerflow/agents/lead_agent/prompt.py` | `get_memory_config()` → `AppConfig.current().memory`; ACP agents derived from `AppConfig.current()` |
+| `deerflow/agents/middlewares/memory_middleware.py` | Reads `runtime.context.app_config.memory` directly (typed `Runtime[DeerFlowContext]`) |
+| `deerflow/agents/middlewares/title_middleware.py` | `after_model` / `aafter_model` read `runtime.context.app_config.title`; helpers take `TitleConfig` as required parameter |
+| `deerflow/agents/middlewares/tool_error_handling_middleware.py` | `get_guardrails_config()` → `AppConfig.current().guardrails` |
+| `deerflow/agents/middlewares/loop_detection_middleware.py` | Reads `runtime.context.thread_id` directly |
+| `deerflow/agents/middlewares/thread_data_middleware.py` | Reads `runtime.context.thread_id` directly |
+| `deerflow/agents/middlewares/uploads_middleware.py` | Reads `runtime.context.thread_id` directly |
+| `deerflow/agents/memory/updater.py` / `queue.py` / `storage.py` | `get_memory_config()` → `AppConfig.current().memory` |
+| `deerflow/runtime/checkpointer/provider.py` / `async_provider.py` | `get_checkpointer_config()` → `AppConfig.current().checkpointer` |
+| `deerflow/runtime/store/provider.py` / `async_provider.py` | Same pattern |
+| `deerflow/runtime/stream_bridge/async_provider.py` | `get_stream_bridge_config()` → `AppConfig.current().stream_bridge` |
+| `deerflow/runtime/runs/worker.py` | Constructs `DeerFlowContext(app_config=AppConfig.current(), thread_id=thread_id)` and passes via `agent.astream(context=...)` |
+| `deerflow/subagents/registry.py` | `get_subagents_app_config()` → `AppConfig.current().subagents` |
+| `deerflow/sandbox/middleware.py` | Reads `runtime.context.thread_id`; removed `runtime.context["sandbox_id"]` read path |
+| `deerflow/sandbox/tools.py` | Removed 3× `runtime.context["sandbox_id"] = ...` writes; state now flows through `runtime.state["sandbox"]`; sandbox-config access via `resolve_context(runtime).app_config.sandbox` where dict-context fallback may still apply |
+| `deerflow/sandbox/local/local_sandbox_provider.py` / `sandbox_provider.py` / `security.py` | `get_app_config()` → `AppConfig.current()` |
+| `deerflow/community/*/tools.py` (tavily, jina_ai, firecrawl, exa, ddg_search, image_search, infoquest, aio_sandbox) | `get_app_config()` → `AppConfig.current()` |
+| `deerflow/skills/loader.py` / `manager.py` / `security_scanner.py` | Same pattern |
+| `deerflow/tools/builtins/*.py` | Typed tools read `runtime.context.xxx`; `task_tool.py` uses `resolve_context()` for bash-subagent guard |
+| `deerflow/tools/tools.py` / `skill_manage_tool.py` | ACP agents derived from `AppConfig.current()`; skill manage reads `runtime.context.thread_id` |
+| `deerflow/models/factory.py` | `get_app_config()` → `AppConfig.current()` |
+| `deerflow/utils/file_conversion.py` | Same |
+| `deerflow/client.py` | `AppConfig.init(AppConfig.from_file(config_path))`; constructs `DeerFlowContext` at invoke time. Earlier iterations used `set_override()`; removed in `a934a822` |
+| `app/gateway/app.py` | `AppConfig.init(AppConfig.from_file())` at startup |
+| `app/gateway/deps.py` / `auth/reset_admin.py` | `get_app_config()` → `AppConfig.current()` |
+| `app/gateway/routers/mcp.py` / `skills.py` | Construct new config + `AppConfig.init()` instead of `reload_extensions_config()` |
+| `app/gateway/routers/memory.py` / `models.py` | `get_memory_config()` → `AppConfig.current().memory`, etc. |
+| `app/channels/service.py` | `get_app_config()` → `AppConfig.current()` |
+| `backend/CLAUDE.md` | Config Lifecycle + `DeerFlowContext` sections updated |
+
+### Modified files (tests)
+
+~100 test locations updated. Patterns:
+
+- `@patch("...get_memory_config")` → `@patch.object(AppConfig, "current", ...)` returning a frozen `AppConfig` with the desired sub-config
+- Tests that mutated `AppConfig` instances now construct fresh ones or use `model_copy(update={...})`
+- `backend/tests/conftest.py` gained an autouse `_auto_app_config` fixture that sets `AppConfig._global` to a minimal config for every test
+
+New test files:
+- `backend/tests/test_config_frozen.py` — verifies every config model rejects mutation
+- `backend/tests/test_deer_flow_context.py` — verifies `DeerFlowContext` construction, defaults, and `resolve_context()` for all three input shapes
+- `backend/tests/test_app_config_reload.py` — verifies lifecycle: `init()` visibility across contexts, `set_override()` + `reset_override()` with `Token`, auto-load warning
+
+---
+
+## Task log
+
+All tasks complete. Checkboxes below reflect the shipped state. For detailed step-by-step TDD sequence, see the commit history on `refactor/config-deerflow-context`.
+
+### Task 1: Freeze all sub-config models
+
+- [x] Write `test_config_frozen.py` parameterized over every config model
+- [x] Add `model_config = ConfigDict(frozen=True)` (or `extra="allow", frozen=True`) to every model
+- [x] Add frozen=True to `DatabaseConfig`, `RunEventsConfig` in review round (`4df595b0`)
+- [x] Fix tests that mutated config objects — use `model_copy(update={...})` or fresh instances
+
+### Task 2: Freeze `AppConfig`
+
+- [x] Extend `test_config_frozen.py` with `test_app_config_is_frozen`
+- [x] Change `AppConfig.model_config` to `ConfigDict(extra="allow", frozen=True)`
+
+### Task 3: Purify `from_file()`
+
+- [x] Write test verifying no `load_*_from_dict()` calls happen during `from_file()`
+- [x] Remove all 8 `load_*_from_dict()` calls and their imports from `app_config.py`
+
+### Task 4: Replace `app_config.py` lifecycle
+
+**Diverged from original plan.** See post-mortem for rationale.
+
+- [x] ~~Create `deerflow/config/context.py`~~ → Lifecycle added directly to `AppConfig` as classmethods
+- [x] Add `_global: ClassVar[AppConfig | None]` for process-global storage (atomic pointer swap under GIL, no lock)
+- [x] Add `_override: ClassVar[ContextVar[AppConfig]]` for per-context override
+- [x] Implement `init()`, `current()`, `set_override()` (returns `Token`), `reset_override()`
+- [x] `current()` priority order: override → global → auto-load-with-warning
+- [x] Delete old lifecycle: `get_app_config`, `reload_app_config`, `reset_app_config`, `set_app_config`, `peek_current_app_config`, `push_current_app_config`, `pop_current_app_config`, `_load_and_cache_app_config`, mtime globals
+- [x] Write `test_app_config_reload.py` covering init/override/reset/auto-load paths
+
+Commits: `7a11e925` (initial process-global + override), `4df595b0` (harden: `Token` return, auto-load warning, doc `_global` lock-free rationale).
+
+### Task 5: Migrate call sites to `AppConfig.current()`
+
+- [x] ~100 `get_app_config()` / `get_memory_config()` / `get_title_config()` / ... call sites migrated to `AppConfig.current().xxx`
+- [x] Tests that patched module-level getters migrated to `patch.object(AppConfig, "current", ...)`
+- [x] Update `deerflow/config/__init__.py` — removed deleted getter exports
+
+Commits: `9040e49e` (bulk migration), `82fdabd7` (deps.py + reset_admin.py follow-up), `6c0c2ecf` (test mocks update), `faec3bf9` (runtime-path migration).
+
+### Task 6: Delete sub-config module globals (memory / title / summarization)
+
+- [x] Delete `_memory_config`, `get_memory_config`, `set_memory_config`, `load_memory_config_from_dict` from `memory_config.py`
+- [x] Delete analogous globals from `title_config.py`, `summarization_config.py`
+- [x] Migrate 6 production consumers of `get_memory_config`, 1 of `get_title_config`, 1 of `get_summarization_config`
+- [x] Fix tests that patched the deleted getters
+
+### Task 7: Delete remaining sub-config module globals
+
+- [x] `subagents_config.py` — delete globals; migrate `subagents/registry.py`
+- [x] `guardrails_config.py` — delete globals + `reset_guardrails_config`; migrate `tool_error_handling_middleware.py`
+- [x] `tool_search_config.py` — delete globals (no production consumers)
+- [x] `checkpointer_config.py` — delete globals; migrate 2 consumers in runtime/
+- [x] `stream_bridge_config.py` — delete globals; migrate 1 consumer
+- [x] `acp_config.py` — delete globals; migrate 2 consumers (`agents/lead_agent/prompt.py`, `tools/tools.py`)
+- [x] `extensions_config.py` — delete globals + `reload_extensions_config`/`reset_extensions_config`/`set_extensions_config`; migrate 4 consumers (`sandbox/tools.py`, `client.py`, `gateway/routers/mcp.py`, `gateway/routers/skills.py`)
+
+### Task 8: Update `__init__.py` exports
+
+- [x] Remove deleted-getter exports; keep type exports (`AppConfig`, `ExtensionsConfig`, `MemoryConfig`, etc.)
+- [x] `tracing_config` re-exports preserved (still function-based, no lifecycle change)
+
+### Task 9: Gateway config update flow
+
+- [x] `app/gateway/routers/mcp.py`: write extensions_config.json → `AppConfig.init(AppConfig.from_file())`
+- [x] `app/gateway/routers/skills.py`: same pattern
+- [x] `deerflow/client.py`: `update_mcp_config()` and `update_skill()` reuse the same pattern (now via `AppConfig.current().extensions` + `init(AppConfig.from_file())`)
+
+### Task 10: Create `DeerFlowContext`
+
+- [x] Create `deerflow/config/deer_flow_context.py` with `DeerFlowContext` frozen dataclass
+- [x] Fields: `app_config: AppConfig`, `thread_id: str`, `agent_name: str | None = None`
+- [x] Typed via `TYPE_CHECKING` import to avoid circular dependency
+- [x] Wire into `create_agent(context_schema=DeerFlowContext)` in `lead_agent/agent.py`
+- [x] Wire into `DeerFlowClient.stream(context=...)`
+
+### Task 11: Add `resolve_context()` helper
+
+- [x] Handle typed context (Gateway/Client path): return `runtime.context` directly
+- [x] Handle dict context (legacy/tests): construct `DeerFlowContext` from dict keys; warn on empty `thread_id`
+- [x] Handle missing context (LangGraph Server): fall back to `get_config().get("configurable", {})`; warn on empty `thread_id`
+- [x] Write `test_deer_flow_context.py` covering all three paths
+
+### Task 12: Remove `sandbox_id` from `runtime.context`
+
+- [x] Delete 3× `runtime.context["sandbox_id"] = sandbox_id` writes in `sandbox/tools.py`
+- [x] Delete context-based release path in `sandbox/middleware.py:after_agent`
+- [x] Sandbox state flows exclusively through `runtime.state["sandbox"] = {"sandbox_id": ...}`
+
+### Task 13: Wire `DeerFlowContext` into Gateway runtime and client
+
+- [x] `deerflow/runtime/runs/worker.py`: construct `DeerFlowContext(app_config=AppConfig.current(), thread_id=thread_id)`, pass via `agent.astream(context=...)`; remove dict-context injection
+- [x] `deerflow/client.py`: call `AppConfig.init(AppConfig.from_file(config_path))` in `__init__` / `_reload_config()`; construct `DeerFlowContext` at invoke time
+
+### Task 14: Migrate middleware/tools from dict access to typed access
+
+Originally planned as "replace with `resolve_context()`". Shipped as: typed middleware reads `runtime.context.xxx` directly; `resolve_context()` only where dict-context may still appear.
+
+- [x] `thread_data_middleware`, `uploads_middleware`, `memory_middleware`, `loop_detection_middleware`: `runtime.context.thread_id` direct read
+- [x] `sandbox/middleware.py`: same
+- [x] `present_file_tool`, `setup_agent_tool`, `skill_manage_tool`: same pattern (typed `ToolRuntime`)
+- [x] `task_tool.py`: keep `resolve_context()` for bash-subagent guard (uses `app_config`)
+- [x] `sandbox/tools.py`: keep `resolve_context()` for sandbox config + thread_id in dict-legacy paths
+
+Commit: `a934a822`.
+
+### Task 15: Middleware reads config from Runtime
+
+- [x] `memory_middleware`: `runtime.context.app_config.memory` — no wrapper, no `try/except`
+- [x] `title_middleware`: `runtime.context.app_config.title` passed as required parameter to helpers; no `TitleConfig | None` fallback
+- [x] `tool_error_handling_middleware`: reads from `AppConfig.current().guardrails` (lives outside per-invocation context)
+
+Commit: `a934a822`.
+
+### Task 16: Final cleanup and verification
+
+- [x] Grep verified: no remaining `runtime.context.get(...)` / `runtime.context[...]` patterns in production code (the pattern exists in `app/channels/wechat.py` but is unrelated — it's a channel-token helper, not LangGraph runtime)
+- [x] Grep verified: no remaining `get_memory_config` / `get_title_config` / `get_summarization_config` / `get_subagents_app_config` / `get_guardrails_config` / `get_tool_search_config` / `get_checkpointer_config` / `get_stream_bridge_config` / `get_acp_agents` / `reload_*` / `reset_*` / `set_extensions_config` / `push_current_app_config` / `pop_current_app_config` / `load_*_from_dict` references
+- [x] Full test suite passes (`make test` — 2376 passed per PR description)
+- [x] CI green (backend-unit-tests)
+- [x] `backend/CLAUDE.md` updated with new Config Lifecycle and `DeerFlowContext` sections
+
+---
+
+## Follow-ups (not in Phase 1 PR)
+
+- Consider re-exporting `DeerFlowContext` / `resolve_context` from `deerflow.config.__init__` for ergonomic imports.
+- `app/channels/wechat.py` uses `_resolve_context_token` — unrelated naming collision with `resolve_context()`. No action required but worth noting for future readers.
+- **Phase 2** (below) subsumes the auto-load-warning concern: `AppConfig.current()` goes away entirely rather than getting its warning promoted to error.
+
+---
+
+# Phase 2: Pure explicit parameter passing
+
+> **Status:** Shipped. P2-1..P2-5 landed first with `AppConfig.current()` kept as a transition fallback; P2-6..P2-10 landed together in commit `84dccef2` to eliminate the fallback and delete the ambient-lookup surface entirely. `AppConfig` is now a pure Pydantic value object with no process-global state and no classmethod accessors.
+>
+> **Design:** [§8 of the design doc](./2026-04-12-config-refactor-design.md#8-phase-2-pure-explicit-parameter-passing)
+
+## Shipped commits
+
+| Commit | Task | Category | What changed |
+|--------|------|----------|--------------|
+| `c45157e0` | P2-1 | infrastructure | `get_config` FastAPI dependency, `app.state.config` populated at startup |
+| `70323e05` | P2-2 | G (Gateway) | 6 routers migrated to `Depends(get_config)`; reload paths dual-write `app.state.config` + `AppConfig.init()` |
+| `f8738d1e` | P2-3 | H (Client) | `DeerFlowClient.__init__(config=...)` captures config locally; multi-client isolation test pins invariant |
+| `23b424e7` | P2-4 | B (Agent construction) | `make_lead_agent`, `_build_middlewares`, `_resolve_model_name`, `build_lead_runtime_middlewares` accept optional `app_config` |
+| `74b7a7ef` | P2-5 (partial) | D (Runtime) | `RunContext` gains `app_config` field; Worker builds `DeerFlowContext` from it; Gateway `deps.get_run_context` populates it. Standalone providers (checkpointer/store/stream_bridge) already accept optional config from Phase 1 |
+| `84dccef2` | P2-6..P2-10 | C+E+F+I + deletion | Memory closure-captures `MemoryConfig`; sandbox/skills/community/factories/tools thread `app_config` end-to-end; `resolve_context()` rejects non-typed runtime.context; `AppConfig.current()` removed; `get_sandbox_provider(app_config)` required; `make_lead_agent` LangGraph-Server bootstrap path loads via `AppConfig.from_file()`. All 2337 non-e2e tests pass. |
+
+## Completed tasks (P2-6 through P2-10)
+
+All landed in `84dccef2`.
+
+### P2-6: Memory subsystem closure-captured config (Category C) — shipped
+- [x] `MemoryConfig` captured at enqueue time so the Timer thread survives the ContextVar boundary.
+- [x] `deerflow/agents/memory/{queue,updater,storage}.py` no longer read any process-global.
+
+### P2-7: Sandbox / skills / factories / tools / community (Categories E+F) — shipped
+- [x] `sandbox/tools.py` helpers take `app_config` explicitly; the `_cached` attribute trick is gone.
+- [x] `sandbox/security.py`, `sandbox/sandbox_provider.py`, `sandbox/local/local_sandbox_provider.py`, `community/aio_sandbox/aio_sandbox_provider.py` all require `app_config`.
+- [x] `skills/manager.py` + `skills/loader.py` + `agents/lead_agent/prompt.py` cache refresh thread `app_config` through the worker thread via closure.
+- [x] Community tools (tavily, jina, firecrawl, exa, ddg, image_search, infoquest, aio_sandbox) read `resolve_context(runtime).app_config`.
+- [x] `subagents/registry.py` (`get_subagent_config`, `list_subagents`, `get_available_subagent_names`) take `app_config`.
+- [x] `models/factory.py::create_chat_model` and `tools/tools.py::get_available_tools` require `app_config`.
+
+### P2-8: Test fixtures (Category I) — shipped
+- [x] `conftest.py` autouse fixture no longer monkey-patches `AppConfig.current`; it only stubs `from_file()` so tests don't need a real `config.yaml`.
+- [x] ~90 call sites migrated: `patch.object(AppConfig, "current", ...)` removed where production no longer calls it (≈56 sites), and for the remaining ~10 files whose tests called `AppConfig.current()` themselves, the tests now hold the config in a local variable and pass it explicitly.
+- [x] `test_deer_flow_context.py` updated to assert that `resolve_context()` raises on dict/None contexts.
+- [x] `grep -rn 'AppConfig\.current' backend/tests` is clean.
+
+### P2-9: Simplify `resolve_context()` — shipped
+- [x] `resolve_context(runtime)` returns `runtime.context` when it is a `DeerFlowContext`; any other shape raises `RuntimeError` pointing at the composition root that should have attached the typed context.
+- [x] The dict-context and `get_config().configurable` fallbacks are deleted.
+
+### P2-10: Delete `AppConfig` lifecycle — shipped
+- [x] `AppConfig.current()` classmethod removed.
+- [x] `_global` / `_override` / `init` / `set_override` / `reset_override` already gone as of Phase 1; nothing left to delete on the ambient side.
+- [x] LangGraph Server bootstrap uses `AppConfig.from_file()` inside `make_lead_agent` — a pure load, not an ambient lookup.
+- [x] `backend/CLAUDE.md` Config Lifecycle section rewritten to describe the explicit-parameter design.
+- [x] `app/gateway/deps.py` docstrings no longer mention `AppConfig.current()`.
+- [x] Production grep confirms zero `AppConfig.current()` call sites in `backend/packages` or `backend/app`.
+
+## Rationale
+
+Phase 1 fixed the **data side** (frozen ADT, no sub-module globals, pure `from_file`). Phase 2 fixes the **access side** (no ambient lookup). Together they make `AppConfig` referentially transparent: a function's result depends only on its inputs, nothing ambient.
+
+## Scope
+
+- ~97 production call sites: `AppConfig.current()` → parameter
+- ~91 test mock sites: `patch.object(AppConfig, "current")` / `AppConfig._global = ...` → fixture injection
+- ~30 FastAPI endpoints: add `config: AppConfig = Depends(get_config)`
+- ~15 factory/helper functions: add `config: AppConfig` parameter
+- Delete Phase 1 lifecycle from `app_config.py`
+
+## Ordering rule
+
+`AppConfig._global` can only be deleted **after** every caller is migrated. Tasks run in this order:
+
+1. Introduce new primitives alongside the old ones (Task P2-1)
+2. Migrate call sites category by category (Tasks P2-2 through P2-9)
+3. Delete the old lifecycle (Task P2-10)
+
+Each category task is independently mergeable. After a category is migrated, grep confirms the old callers in that category are gone but the old lifecycle still exists (other categories may still use it).
+
+## File structure (Phase 2)
+
+### Modified files
+
+| File | Change |
+|------|--------|
+| `app/gateway/app.py` | Store config on `app.state.config` at startup; remove `AppConfig.init()` call |
+| `app/gateway/deps.py` | Add `get_config(request: Request) -> AppConfig`; remove `AppConfig.current()` uses |
+| `app/gateway/routers/*.py` | Add `config: AppConfig = Depends(get_config)` to each endpoint; remove `AppConfig.current()` |
+| `app/gateway/auth/reset_admin.py` | Take `config: AppConfig` parameter |
+| `app/channels/service.py` | Take `config: AppConfig` parameter |
+| `deerflow/client.py` | Remove `AppConfig.init()` call; store `self._config = AppConfig.from_file(...)`; all methods read `self._config` |
+| `deerflow/agents/lead_agent/agent.py` | `make_lead_agent(runtime_config, app_config)`, `_build_middlewares(app_config, ...)`, pass down through every helper |
+| `deerflow/agents/lead_agent/prompt.py` | Every helper takes config (or the specific sub-config slice it needs) as a parameter |
+| `deerflow/agents/middlewares/tool_error_handling_middleware.py` | Take guardrails config at construction |
+| `deerflow/agents/memory/queue.py` | Capture `MemoryConfig` at enqueue; Timer closure reads from capture |
+| `deerflow/agents/memory/updater.py` | Constructor takes `MemoryConfig`; store on `self` |
+| `deerflow/agents/memory/storage.py` | Constructor takes `MemoryConfig`; store on `self` |
+| `deerflow/runtime/runs/worker.py` | Receive `AppConfig` from `RunManager`; build `DeerFlowContext` from parameter |
+| `deerflow/runtime/checkpointer/provider.py` / `async_provider.py` | Constructor takes `CheckpointerConfig \| None` |
+| `deerflow/runtime/store/provider.py` / `async_provider.py` | Constructor takes relevant config |
+| `deerflow/runtime/stream_bridge/async_provider.py` | Constructor takes `StreamBridgeConfig \| None` |
+| `deerflow/sandbox/*.py`, `deerflow/skills/*.py` | Helpers take config parameter |
+| `deerflow/community/*/tools.py` | Factory takes config parameter |
+| `deerflow/models/factory.py` | `create_chat_model(name, config, thinking_enabled=False)` |
+| `deerflow/tools/tools.py` | `get_available_tools(config, ...)` |
+| `deerflow/subagents/registry.py` | Helper takes `SubagentsAppConfig` |
+| `deerflow/config/deer_flow_context.py` | Simplify `resolve_context()`: typed-only; raise on non-DeerFlowContext |
+| `deerflow/config/app_config.py` | **Delete** `_global`, `_override`, `init`, `current`, `set_override`, `reset_override` |
+| `backend/tests/conftest.py` | Replace `_auto_app_config` autouse fixture with per-test `test_config` fixture returning `AppConfig` |
+| `backend/tests/test_*.py` | Replace `patch.object(AppConfig, "current", ...)` with passing different `AppConfig` instances |
+| `backend/CLAUDE.md` | Update Config Lifecycle section to describe pure-parameter design |
+
+### New files
+
+None. Phase 2 is a pure refactor — same file set.
+
+---
+
+## Task P2-1: Add FastAPI `Depends(get_config)` infrastructure
+
+Introduce the new FastAPI DI primitive. Old `AppConfig.current()` still works; this task only adds the new path.
+
+**Files:**
+- Modify: `backend/app/gateway/app.py`
+- Modify: `backend/app/gateway/deps.py`
+- Test: `backend/tests/test_gateway_deps_config.py` (new)
+
+- [ ] **Step 1: Write the failing test**
+
+```python
+# backend/tests/test_gateway_deps_config.py
+from fastapi import FastAPI, Depends
+from fastapi.testclient import TestClient
+from deerflow.config.app_config import AppConfig
+from deerflow.config.sandbox_config import SandboxConfig
+from app.gateway.deps import get_config
+
+
+def test_get_config_returns_app_state_config():
+    app = FastAPI()
+    cfg = AppConfig(sandbox=SandboxConfig(use="test"))
+    app.state.config = cfg
+
+    @app.get("/probe")
+    def probe(c: AppConfig = Depends(get_config)):
+        return {"same": c is cfg}
+
+    client = TestClient(app)
+    assert client.get("/probe").json() == {"same": True}
+```
+
+- [ ] **Step 2: Run test to verify it fails**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_gateway_deps_config.py -v
+```
+Expected: FAIL — `get_config` doesn't exist or returns the wrong thing.
+
+- [ ] **Step 3: Add `get_config` to `deps.py`**
+
+```python
+# backend/app/gateway/deps.py
+from fastapi import Request
+from deerflow.config.app_config import AppConfig
+
+
+def get_config(request: Request) -> AppConfig:
+    """FastAPI dependency that returns the app-scoped AppConfig."""
+    return request.app.state.config
+```
+
+- [ ] **Step 4: Wire startup in `app.py`**
+
+In `backend/app/gateway/app.py`, at startup (existing `AppConfig.init` call site), add:
+
+```python
+app.state.config = AppConfig.from_file()
+# Keep AppConfig.init() for now — other callers still use AppConfig.current()
+AppConfig.init(app.state.config)
+```
+
+- [ ] **Step 5: Run test to verify it passes**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_gateway_deps_config.py -v
+```
+Expected: PASS.
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/app/gateway/deps.py backend/app/gateway/app.py backend/tests/test_gateway_deps_config.py
+git commit -m "feat(config): add FastAPI get_config dependency reading from app.state"
+```
+
+---
+
+## Task P2-2 (Category G): Migrate FastAPI routers to `Depends(get_config)`
+
+**Files:**
+- Modify: `backend/app/gateway/routers/models.py` (2 calls)
+- Modify: `backend/app/gateway/routers/mcp.py` (3 calls)
+- Modify: `backend/app/gateway/routers/memory.py` (2 calls)
+- Modify: `backend/app/gateway/routers/skills.py` (1 call)
+- Modify: `backend/app/gateway/auth/reset_admin.py` (1 call)
+- Modify: `backend/app/channels/service.py` (1 call)
+
+**Pattern for each endpoint:**
+
+```python
+# Before
+from deerflow.config.app_config import AppConfig
+
+@router.get("/models")
+def list_models():
+    models = AppConfig.current().models
+    ...
+
+# After
+from fastapi import Depends
+from app.gateway.deps import get_config
+
+@router.get("/models")
+def list_models(config: AppConfig = Depends(get_config)):
+    models = config.models
+    ...
+```
+
+**For `mcp.py` / `skills.py` runtime config reload:**
+
+```python
+# Before
+AppConfig.init(AppConfig.from_file())
+
+# After
+request.app.state.config = AppConfig.from_file()
+# Keep the AppConfig.init() call alongside for now — other consumers still need it
+AppConfig.init(request.app.state.config)
+```
+
+- [ ] **Step 1: Migrate `models.py`**
+
+Replace 2 `AppConfig.current()` reads with `config: AppConfig = Depends(get_config)` parameter.
+
+- [ ] **Step 2: Migrate `mcp.py`** — 3 reads + 1 reload write
+
+- [ ] **Step 3: Migrate `memory.py`** — 2 reads
+
+- [ ] **Step 4: Migrate `skills.py`** — 1 read + 1 reload write
+
+- [ ] **Step 5: Migrate `auth/reset_admin.py`**
+
+`reset_admin.py` is a CLI-like entry. Signature changes to `reset_admin(config: AppConfig)`. Caller in `cli.py` (or wherever it's invoked) constructs config at top.
+
+- [ ] **Step 6: Migrate `app/channels/service.py`**
+
+Constructor or `start_channel_service(config: AppConfig)` — pass config from `app.py` where it's called.
+
+- [ ] **Step 7: Run full gateway test suite**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_gateway_*.py tests/test_channels_*.py -v
+```
+
+- [ ] **Step 8: Grep verify Category G complete**
+
+```bash
+cd backend && grep -rn "AppConfig\.current()" app/gateway/ app/channels/
+```
+Expected: no matches.
+
+- [ ] **Step 9: Commit**
+
+```bash
+git add backend/app/gateway/ backend/app/channels/ backend/tests/
+git commit -m "refactor(config): migrate gateway routers and channels to Depends(get_config)"
+```
+
+---
+
+## Task P2-3 (Category H): `DeerFlowClient` constructor-captured config
+
+**Files:**
+- Modify: `backend/packages/harness/deerflow/client.py` (7 `current()` + 2 `init()` calls)
+- Modify: `backend/tests/test_client.py`, `backend/tests/test_client_e2e.py`
+
+**Pattern:**
+
+```python
+# Before
+class DeerFlowClient:
+    def __init__(self, config_path: str | None = None):
+        if config_path is not None:
+            AppConfig.init(AppConfig.from_file(config_path))
+        self._app_config = AppConfig.current()
+
+    def some_method(self):
+        ext = AppConfig.current().extensions
+        ...
+
+# After
+class DeerFlowClient:
+    def __init__(
+        self,
+        config_path: str | None = None,
+        config: AppConfig | None = None,
+    ):
+        self._config = config or AppConfig.from_file(config_path)
+
+    def some_method(self):
+        ext = self._config.extensions
+        ...
+
+    def _reload_config(self):
+        # Mutate self._config with model_copy or rebuild from file
+        self._config = AppConfig.from_file(...)
+```
+
+- [ ] **Step 1: Update constructor signature**
+
+Add `config: AppConfig | None = None` parameter. Construct `self._config` locally, not via `AppConfig.init() + current()`.
+
+- [ ] **Step 2: Replace all 7 `AppConfig.current()` calls with `self._config`**
+
+- [ ] **Step 3: Update `_reload_config()` to rebuild `self._config`**
+
+- [ ] **Step 4: Write test for multi-client isolation**
+
+```python
+# backend/tests/test_client_multi_isolation.py
+from deerflow.client import DeerFlowClient
+from deerflow.config.app_config import AppConfig
+from deerflow.config.sandbox_config import SandboxConfig
+from deerflow.config.memory_config import MemoryConfig
+
+
+def test_two_clients_different_configs_do_not_contend():
+    cfg_a = AppConfig(sandbox=SandboxConfig(use="test"), memory=MemoryConfig(enabled=True))
+    cfg_b = AppConfig(sandbox=SandboxConfig(use="test"), memory=MemoryConfig(enabled=False))
+
+    client_a = DeerFlowClient(config=cfg_a)
+    client_b = DeerFlowClient(config=cfg_b)
+
+    assert client_a._config.memory.enabled is True
+    assert client_b._config.memory.enabled is False
+    # Verify mutation of one client's config does not affect the other
+    # (impossible because frozen, but verify via identity too)
+    assert client_a._config is cfg_a
+    assert client_b._config is cfg_b
+```
+
+- [ ] **Step 5: Run test to verify multi-client works**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_client_multi_isolation.py -v
+```
+
+- [ ] **Step 6: Update existing client tests**
+
+Replace `AppConfig.init(MagicMock(...))` patterns in `test_client.py` with constructing `AppConfig` instances and passing via `DeerFlowClient(config=cfg)`.
+
+- [ ] **Step 7: Run full client test suite**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_client*.py -v
+```
+
+- [ ] **Step 8: Grep verify Category H complete**
+
+```bash
+cd backend && grep -n "AppConfig\.current()\|AppConfig\.init(" packages/harness/deerflow/client.py
+```
+Expected: no matches.
+
+- [ ] **Step 9: Commit**
+
+```bash
+git add backend/packages/harness/deerflow/client.py backend/tests/
+git commit -m "refactor(config): DeerFlowClient captures config in constructor"
+```
+
+---
+
+## Task P2-4 (Category B): Agent construction — thread `AppConfig` from `make_lead_agent`
+
+**Files:**
+- Modify: `backend/packages/harness/deerflow/agents/lead_agent/agent.py` (5 calls)
+- Modify: `backend/packages/harness/deerflow/agents/lead_agent/prompt.py` (5 calls)
+- Modify: `backend/packages/harness/deerflow/agents/middlewares/tool_error_handling_middleware.py` (1 call)
+
+**Pattern:**
+
+```python
+# Before
+def make_lead_agent(config: RunnableConfig) -> CompiledStateGraph:
+    app_config = AppConfig.current()
+    model_name = _resolve_runtime_model_name(config)
+    ...
+
+def _build_middlewares(config, runtime_config):
+    if AppConfig.current().token_usage.enabled:
+        ...
+
+# After
+def make_lead_agent(config: RunnableConfig, app_config: AppConfig) -> CompiledStateGraph:
+    model_name = _resolve_runtime_model_name(config, app_config)
+    ...
+
+def _build_middlewares(app_config: AppConfig, runtime_config: RunnableConfig):
+    if app_config.token_usage.enabled:
+        ...
+```
+
+- [ ] **Step 1: Update `make_lead_agent` signature and internal calls**
+
+Add `app_config: AppConfig` parameter. Replace all 5 `AppConfig.current()` calls with `app_config.xxx`.
+
+- [ ] **Step 2: Update `_build_middlewares`, `_create_*_middleware` helpers**
+
+Thread `app_config` through each helper that previously called `AppConfig.current()`.
+
+- [ ] **Step 3: Update `prompt.py` helpers**
+
+Every function that previously called `AppConfig.current()` now takes the relevant config slice as a parameter. Caller (either `apply_prompt_template` or `make_lead_agent`) provides it.
+
+- [ ] **Step 4: Update `tool_error_handling_middleware.py`**
+
+Guardrail config is needed at middleware construction. Pass `GuardrailsConfig` to the middleware's `__init__`.
+
+- [ ] **Step 5: Update the two call sites of `make_lead_agent`**
+
+- `backend/langgraph.json` (or wherever LangGraph Server registers the agent) — the registration function wraps `make_lead_agent` and must supply `app_config`. If LangGraph Server doesn't support injecting extra args, wrap:
+
+  ```python
+  def _lead_agent_for_langgraph(config: RunnableConfig):
+      return make_lead_agent(config, AppConfig.from_file())
+  ```
+
+  (LangGraph Server still reads config from file — there's no central config broker in that process yet.)
+
+- `backend/packages/harness/deerflow/client.py` — already has `self._config`, pass it: `make_lead_agent(config, self._config)`.
+
+- [ ] **Step 6: Run agent tests**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_lead_agent*.py -v
+```
+
+- [ ] **Step 7: Grep verify Category B complete**
+
+```bash
+cd backend && grep -n "AppConfig\.current()" packages/harness/deerflow/agents/lead_agent/ packages/harness/deerflow/agents/middlewares/
+```
+Expected: no matches.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add backend/packages/harness/deerflow/agents/ backend/langgraph.json backend/packages/harness/deerflow/client.py backend/tests/
+git commit -m "refactor(config): thread AppConfig through lead agent construction"
+```
+
+---
+
+## Task P2-5 (Category D): Runtime infrastructure takes config at construction
+
+**Files:**
+- Modify: `deerflow/runtime/checkpointer/provider.py` (2 calls), `async_provider.py` (1 call)
+- Modify: `deerflow/runtime/store/provider.py` (2 calls), `async_provider.py` (1 call)
+- Modify: `deerflow/runtime/stream_bridge/async_provider.py` (1 call)
+- Modify: `deerflow/runtime/runs/worker.py` (1 call)
+
+**Pattern:**
+
+```python
+# Before
+class CheckpointerProvider:
+    def get(self):
+        config = AppConfig.current().checkpointer
+        ...
+
+# After
+class CheckpointerProvider:
+    def __init__(self, config: CheckpointerConfig | None):
+        self._config = config
+
+    def get(self):
+        config = self._config
+        ...
+```
+
+Callers construct these providers at startup (from `app/gateway/app.py` or `DeerFlowClient.__init__`) with the relevant config slice.
+
+- [ ] **Step 1: Update `CheckpointerProvider` constructor + `get_checkpointer_provider()` factory**
+
+The factory may need to go from a module-level singleton getter to one that accepts config. Alternatively, the factory stays but takes config as parameter.
+
+- [ ] **Step 2: Update `StoreProvider` analogously**
+
+- [ ] **Step 3: Update `StreamBridgeProvider` analogously**
+
+- [ ] **Step 4: Update `worker.py`**
+
+`Worker` already receives a `RunManager`; `RunManager` receives config at construction time (from Gateway `app.py`) and forwards to `Worker`. Replace `AppConfig.current()` in worker with the injected config.
+
+- [ ] **Step 5: Update `RunManager` construction in `app/gateway/app.py`**
+
+Pass `app.state.config` into `RunManager(..., config=app.state.config)`.
+
+- [ ] **Step 6: Run runtime tests**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_checkpointer*.py tests/test_store*.py tests/test_stream_bridge*.py tests/test_worker*.py -v
+```
+
+- [ ] **Step 7: Grep verify Category D complete**
+
+```bash
+cd backend && grep -rn "AppConfig\.current()" packages/harness/deerflow/runtime/
+```
+Expected: no matches.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add backend/packages/harness/deerflow/runtime/ backend/app/gateway/app.py backend/tests/
+git commit -m "refactor(config): runtime providers take config at construction"
+```
+
+---
+
+## Task P2-6 (Category C): Memory subsystem — closure-captured config
+
+**Files:**
+- Modify: `deerflow/agents/memory/queue.py` (2 calls)
+- Modify: `deerflow/agents/memory/updater.py` (3 calls)
+- Modify: `deerflow/agents/memory/storage.py` (3 calls)
+
+This category is the trickiest because the Timer callback runs on a thread without Runtime. Config must be captured at enqueue time into the closure.
+
+**Pattern:**
+
+```python
+# Before — config read from ambient state on Timer thread
+class MemoryQueue:
+    def add(self, conversation, user_id):
+        config = AppConfig.current().memory  # may not exist on Timer thread
+        if not config.enabled:
+            return
+        # schedule Timer ...
+
+# After — config captured at enqueue time
+class MemoryQueue:
+    def __init__(self, updater: MemoryUpdater, config: MemoryConfig):
+        self._updater = updater
+        self._config = config
+
+    def add(self, conversation, user_id):
+        config = self._config  # captured at construction
+        if not config.enabled:
+            return
+        # Timer callback closes over `config` and `conversation`
+        def _flush():
+            self._updater.update(conversation, user_id, config)
+        self._timer = Timer(config.debounce_seconds, _flush)
+        self._timer.start()
+```
+
+- [ ] **Step 1: Add `MemoryConfig` parameter to `MemoryStorage.__init__`**
+
+Replace all 3 `AppConfig.current().memory` reads with `self._config.memory` field accesses.
+
+- [ ] **Step 2: Add `MemoryConfig` parameter to `MemoryUpdater.__init__`**
+
+Same pattern.
+
+- [ ] **Step 3: Add `MemoryConfig` parameter to `MemoryQueue.__init__`**
+
+Same pattern. Timer callbacks close over `self._config`.
+
+- [ ] **Step 4: Update the factory / caller path**
+
+`MemoryMiddleware` (the consumer) currently constructs `MemoryQueue` lazily. Now it must get `MemoryConfig` from `runtime.context.app_config.memory` in `before_model`, and construct the queue with that config. Cache construction by config identity if re-construction on every invocation is too expensive.
+
+Alternatively: `MemoryMiddleware.__init__(config: MemoryConfig)` and the config is supplied at middleware-chain construction time (from `make_lead_agent` → `_build_middlewares`).
+
+- [ ] **Step 5: Write regression test for Timer thread**
+
+```python
+# backend/tests/test_memory_queue_timer_captures_config.py
+def test_timer_callback_uses_captured_config():
+    """Verify Timer callback reads config from closure, not ambient state."""
+    cfg = MemoryConfig(enabled=True, debounce_seconds=0.01, ...)
+    updater = MagicMock()
+    queue = MemoryQueue(updater=updater, config=cfg)
+
+    queue.add(conversation=..., user_id="u1")
+    time.sleep(0.05)
+
+    # Verify updater was called with the captured cfg, not a re-read from AppConfig
+    assert updater.update.called
+```
+
+- [ ] **Step 6: Run memory tests**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_memory*.py -v
+```
+
+- [ ] **Step 7: Grep verify Category C complete**
+
+```bash
+cd backend && grep -rn "AppConfig\.current()" packages/harness/deerflow/agents/memory/
+```
+Expected: no matches.
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add backend/packages/harness/deerflow/agents/memory/ backend/tests/
+git commit -m "refactor(config): memory subsystem captures config at construction/enqueue"
+```
+
+---
+
+## Task P2-7 (Category E+F): Sandbox / skills / factories / tools / community — parameter threading
+
+This is the largest mechanical task by file count. All follow the same pattern: add `config: AppConfig` (or a sub-config slice) to the function signature, replace `AppConfig.current()` with the parameter.
+
+**Files:**
+- `deerflow/sandbox/local/local_sandbox_provider.py` (1), `sandbox_provider.py` (1), `security.py` (2)
+- `deerflow/sandbox/tools.py` (5 — these already use `resolve_context()`; no change)
+- `deerflow/skills/loader.py` (1), `manager.py` (1), `security_scanner.py` (1)
+- `deerflow/models/factory.py` (1)
+- `deerflow/tools/tools.py` (2)
+- `deerflow/subagents/registry.py` (1)
+- `deerflow/utils/file_conversion.py` (1)
+- `deerflow/community/aio_sandbox/aio_sandbox_provider.py` (2)
+- `deerflow/community/tavily/tools.py` (2)
+- `deerflow/community/jina_ai/tools.py` (1)
+- `deerflow/community/infoquest/tools.py` (3)
+- `deerflow/community/image_search/tools.py` (1)
+- `deerflow/community/firecrawl/tools.py` (2)
+- `deerflow/community/exa/tools.py` (2)
+- `deerflow/community/ddg_search/tools.py` (1)
+
+**Pattern:**
+
+```python
+# Before
+def get_available_tools(groups, include_mcp=True, model_name=None, subagent_enabled=False):
+    config = AppConfig.current()
+    ...
+
+# After
+def get_available_tools(
+    app_config: AppConfig,
+    groups=None,
+    include_mcp=True,
+    model_name=None,
+    subagent_enabled=False,
+):
+    config = app_config
+    ...
+```
+
+**Caller responsibility:** whoever calls `get_available_tools()` must have `AppConfig` in scope. For agent construction that's `make_lead_agent(config, app_config)` from Task P2-4. For factory tools registered via `use:` strings in config, the `tools.py` resolution pass threads `app_config` through.
+
+- [ ] **Step 1: Update `deerflow/models/factory.py`**
+
+`create_chat_model(name, thinking_enabled=False)` → `create_chat_model(name, app_config, thinking_enabled=False)`. Every caller (agent.py, client.py memory-updater internal model setup) passes `app_config`.
+
+- [ ] **Step 2: Update `deerflow/tools/tools.py`**
+
+`get_available_tools(...)` signature gains `app_config: AppConfig`. Community tool resolution inside it also threads config.
+
+- [ ] **Step 3: Update `deerflow/subagents/registry.py`**
+
+- [ ] **Step 4: Update `deerflow/sandbox/*.py` (non-tools)**
+
+Provider construction takes config. `security.py` helpers take config parameter.
+
+- [ ] **Step 5: Update `deerflow/skills/*.py`**
+
+Loader / manager / scanner take config parameter.
+
+- [ ] **Step 6: Update `deerflow/utils/file_conversion.py`**
+
+- [ ] **Step 7: Update community tool factories**
+
+Each `community/<name>/tools.py` factory now accepts `app_config`. The `tools.py` resolution pass (Step 2) supplies it when instantiating.
+
+- [ ] **Step 8: Run affected test files**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_tool*.py tests/test_skill*.py tests/test_sandbox*.py tests/test_community*.py tests/test_*tool*.py -v
+```
+
+- [ ] **Step 9: Grep verify Category E+F complete**
+
+```bash
+cd backend && grep -rn "AppConfig\.current()" packages/harness/deerflow/{sandbox,skills,models,tools,subagents,utils,community}/
+```
+Expected: no matches (except `sandbox/tools.py` may retain `resolve_context()` calls for dict-legacy paths — those are fine).
+
+- [ ] **Step 10: Commit**
+
+```bash
+git add backend/packages/harness/deerflow/ backend/tests/
+git commit -m "refactor(config): thread AppConfig through sandbox/skills/factories/tools"
+```
+
+---
+
+## Task P2-8 (Category I): Test fixtures
+
+**Files:**
+- Modify: `backend/tests/conftest.py`
+- Modify: ~18 test files using `patch.object(AppConfig, "current")` or `AppConfig._global = ...`
+
+**Pattern:**
+
+```python
+# Before — conftest.py autouse fixture
+@pytest.fixture(autouse=True)
+def _auto_app_config():
+    previous_global = AppConfig._global
+    AppConfig._global = AppConfig(sandbox=SandboxConfig(use="test"))
+    try:
+        yield
+    finally:
+        AppConfig._global = previous_global
+
+
+# Before — test using it
+def test_something():
+    with patch.object(AppConfig, "current", return_value=AppConfig(...)):
+        result = function_under_test()
+
+# After — conftest.py fixture returns config
+@pytest.fixture
+def test_config() -> AppConfig:
+    """Minimal AppConfig for tests that need one."""
+    return AppConfig(sandbox=SandboxConfig(use="test"))
+
+
+# After — test passes config explicitly
+def test_something(test_config):
+    overridden = test_config.model_copy(update={"memory": MemoryConfig(enabled=False)})
+    result = function_under_test(config=overridden)
+```
+
+- [ ] **Step 1: Update `conftest.py`**
+
+Replace `_auto_app_config` autouse fixture with a non-autouse `test_config` fixture. The autouse is no longer needed because `AppConfig.current()` no longer exists after P2-10.
+
+**Note:** Do not remove autouse yet. Tests that still call `AppConfig.current()` (pre-migration) would break. Instead:
+- Add the new `test_config` fixture
+- Keep autouse for now so old tests still work
+- Remove autouse only in Task P2-10 alongside deletion of `current()`
+
+- [ ] **Step 2: Migrate tests by module, starting with most isolated**
+
+For each test file using `patch.object(AppConfig, "current", ...)`:
+- Replace with fixture injection: `def test_xxx(test_config)` and pass `test_config` (or a `model_copy(update=...)` variant) into the function under test.
+
+Per-file migration order (smallest blast radius first):
+1. `test_memory_updater.py` (14 occurrences) — Memory subsystem already took config parameter in P2-6
+2. `test_client.py` (20 occurrences) — Client already took config in P2-3
+3. `test_checkpointer.py` (11 occurrences) — Providers took config in P2-5
+4. `test_memory_storage.py` (10 occurrences)
+5. Remaining files
+
+- [ ] **Step 3: Verify all tests pass after each file migration**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/<migrated_file>.py -v
+```
+
+- [ ] **Step 4: Commit after each file (keeps diffs reviewable)**
+
+```bash
+git commit -m "refactor(tests): migrate <file> to explicit config fixture"
+```
+
+- [ ] **Step 5: Final grep verify**
+
+```bash
+cd backend && grep -rn "patch\.object(AppConfig, \"current\"" tests/
+cd backend && grep -rn "AppConfig\._global" tests/
+```
+Expected: no matches.
+
+---
+
+## Task P2-9: Simplify `resolve_context()`
+
+**Files:**
+- Modify: `backend/packages/harness/deerflow/config/deer_flow_context.py`
+- Test: `backend/tests/test_deer_flow_context.py`
+
+After P2-2 through P2-8, every caller that invokes `resolve_context()` either passes a typed `DeerFlowContext` or a dict. The dict path's `AppConfig.current()` fallback is no longer reachable if all construction sites are explicit.
+
+- [ ] **Step 1: Update `test_deer_flow_context.py` to expect hard failure on non-DeerFlowContext**
+
+```python
+def test_resolve_context_raises_on_missing_context():
+    runtime = MagicMock()
+    runtime.context = None
+    with pytest.raises(RuntimeError, match="not a DeerFlowContext"):
+        resolve_context(runtime)
+
+def test_resolve_context_raises_on_dict_context():
+    runtime = MagicMock()
+    runtime.context = {"thread_id": "t1"}
+    with pytest.raises(RuntimeError, match="not a DeerFlowContext"):
+        resolve_context(runtime)
+```
+
+- [ ] **Step 2: Simplify `resolve_context()`**
+
+```python
+def resolve_context(runtime: Any) -> DeerFlowContext:
+    ctx = getattr(runtime, "context", None)
+    if isinstance(ctx, DeerFlowContext):
+        return ctx
+    raise RuntimeError(
+        "runtime.context is not a DeerFlowContext. Every caller must "
+        "construct and inject one explicitly; there is no global fallback."
+    )
+```
+
+- [ ] **Step 3: Run `test_deer_flow_context.py`**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_deer_flow_context.py -v
+```
+
+- [ ] **Step 4: Run full test suite to catch any missed dict-context callers**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest -v
+```
+
+If failures surface, they indicate a caller that was still relying on dict-context fallback. Fix by constructing proper `DeerFlowContext`.
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add backend/packages/harness/deerflow/config/deer_flow_context.py backend/tests/test_deer_flow_context.py
+git commit -m "refactor(config): resolve_context requires typed DeerFlowContext"
+```
+
+---
+
+## Task P2-10: Delete `AppConfig` lifecycle
+
+**Files:**
+- Modify: `backend/packages/harness/deerflow/config/app_config.py`
+- Modify: `backend/tests/conftest.py` (remove `_auto_app_config` autouse fixture)
+- Modify: `backend/tests/test_app_config_reload.py` (delete or rewrite as pure `from_file()` test)
+- Modify: `backend/CLAUDE.md` (update Config Lifecycle section)
+
+Final deletion. Grep must show no callers of `AppConfig.current()`, `AppConfig.init()`, `AppConfig.set_override()`, `AppConfig.reset_override()` in production or tests.
+
+- [ ] **Step 1: Final grep — verify no callers remain**
+
+```bash
+cd backend && grep -rn "AppConfig\.\(current\|init\|set_override\|reset_override\)" packages/ app/ tests/
+```
+Expected: no matches (except the `app_config.py` definitions themselves).
+
+If any match, return to the relevant Category task and finish the migration.
+
+- [ ] **Step 2: Delete from `app_config.py`**
+
+Remove:
+- `_global: ClassVar[AppConfig | None]`
+- `_override: ClassVar[ContextVar[AppConfig]]`
+- `init()`, `set_override()`, `reset_override()`, `current()`
+- The comment block `"# -- Lifecycle (process-global + per-context override) --"`
+- Unused imports: `ContextVar`, `Token`, `ClassVar`
+
+The class reduces to: Pydantic fields + `from_file()`, `resolve_config_path()`, `resolve_env_variables()`, `_check_config_version()`, `get_model_config()`, `get_tool_config()`, `get_tool_group_config()`.
+
+- [ ] **Step 3: Remove `_auto_app_config` autouse fixture from `conftest.py`**
+
+Keep only the explicit `test_config` fixture (non-autouse).
+
+- [ ] **Step 4: Delete or rewrite `test_app_config_reload.py`**
+
+The tests covered `init` / `set_override` / auto-load, all of which are gone. Rewrite as a single test:
+
+```python
+def test_from_file_is_pure(tmp_path):
+    config_file = tmp_path / "config.yaml"
+    config_file.write_text("config_version: 6\nsandbox:\n  use: test\n")
+
+    result1 = AppConfig.from_file(str(config_file))
+    result2 = AppConfig.from_file(str(config_file))
+
+    # Different objects (Pydantic doesn't intern)
+    assert result1 is not result2
+    # But equal values
+    assert result1 == result2
+    # Frozen — cannot mutate
+    with pytest.raises(ValidationError):
+        result1.log_level = "debug"
+```
+
+- [ ] **Step 5: Update `backend/CLAUDE.md`**
+
+Rewrite the "Config Lifecycle" section:
+
+```markdown
+**Config Lifecycle**: All config models are `frozen=True` (immutable after construction). `AppConfig.from_file()` is a pure function — no side effects. There is no process-global or ContextVar — every consumer receives `AppConfig` as an explicit parameter.
+
+- `app/gateway/app.py` loads config at startup and stores on `app.state.config`; routers access via `Depends(get_config)`
+- `DeerFlowClient.__init__(config_path=..., config=...)` captures config as `self._config`
+- Agent execution path: `DeerFlowContext(app_config=..., thread_id=...)` injected via LangGraph `Runtime[DeerFlowContext]`
+- Background threads (memory debounce Timer): config captured at enqueue time in closure
+- Tests: use the `test_config` fixture or construct `AppConfig` directly
+```
+
+- [ ] **Step 6: Run full test suite**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest -v
+```
+Expected: all pass.
+
+- [ ] **Step 7: Run linter**
+
+```bash
+cd backend && make lint
+```
+
+- [ ] **Step 8: Commit**
+
+```bash
+git add backend/packages/harness/deerflow/config/app_config.py backend/tests/conftest.py backend/tests/test_app_config_reload.py backend/CLAUDE.md
+git commit -m "refactor(config): delete AppConfig process-global and ContextVar lifecycle"
+```
+
+---
+
+## Verification — Phase 2 complete
+
+- [ ] **No global lookup remains**
+
+```bash
+cd backend && grep -rn "AppConfig\.current()\|AppConfig\._global\|AppConfig\._override\|AppConfig\.init(\|AppConfig\.set_override(\|AppConfig\.reset_override(" packages/ app/ tests/
+```
+Expected: no matches.
+
+- [ ] **`AppConfig` is a pure value object**
+
+Read `backend/packages/harness/deerflow/config/app_config.py`. It should contain: Pydantic fields, `from_file()`, `resolve_config_path()`, `resolve_env_variables()`, `_check_config_version()`, `get_model_config()`, `get_tool_config()`, `get_tool_group_config()`. Nothing else.
+
+- [ ] **Multi-client isolation works**
+
+`tests/test_client_multi_isolation.py` passes — two clients with different configs coexist.
+
+- [ ] **Full test suite green**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest -v && make lint
+```
+
+- [ ] **Commit log tells the story**
+
+```bash
+git log --oneline refactor/explicit-config-p2
+```
+Shows ~10 commits, each scoped to one Category.
diff --git a/docs/superpowers/plans/2026-04-10-event-store-history.md b/docs/superpowers/plans/2026-04-10-event-store-history.md
new file mode 100644
index 000000000..0e3eb1c35
--- /dev/null
+++ b/docs/superpowers/plans/2026-04-10-event-store-history.md
@@ -0,0 +1,471 @@
+# Event Store History — Backend Compatibility Layer
+
+> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
+
+**Goal:** Replace checkpoint state with the append-only event store as the message source in the thread state/history endpoints, so summarization never causes message loss.
+
+**Architecture:** The Gateway's `get_thread_state` and `get_thread_history` endpoints currently read messages from `checkpoint.channel_values["messages"]`. After summarization, those messages are replaced with a synthetic summary-as-human message and all pre-summarize messages are gone. We modify these endpoints to read messages from the RunEventStore instead (append-only, unaffected by summarization). The response shape for each message stays identical so the chat render path needs no changes, but the frontend's feedback hook must be aligned to use the same full-history view (see Task 4).
+
+**Tech Stack:** Python (FastAPI, SQLAlchemy), pytest, TypeScript (React Query)
+
+**Scope:** Gateway mode only (`make dev-pro`). Standard mode uses the LangGraph Server directly and does not go through these endpoints; the summarize bug is still present there and must be tracked as a separate follow-up (see §"Follow-ups" at end of plan).
+
+**Prerequisite already landed:** `backend/packages/harness/deerflow/runtime/journal.py` now unwraps `Command(update={'messages':[ToolMessage(...)]})` in `on_tool_end`, so new runs that use state-updating tools (e.g. `present_files`) write the inner `ToolMessage` content to the event store instead of `str(Command(...))`. Legacy data captured before this fix is cleaned up defensively by the new helper (see Task 1 Step 3 `_sanitize_legacy_command_repr`).
+
+---
+
+## Real Data Alignment Analysis
+
+Compared real `POST /history` response (checkpoint-based) with `run_events` table for thread `6d30913e-dcd4-41c8-8941-f66c716cf359` (docs/resp.json + backend/.deer-flow/data/deerflow.db). See `docs/superpowers/specs/2026-04-11-runjournal-history-evaluation.md` for full evidence chain.
+
+| Message type | Fields compared | Difference |
+|-------------|----------------|------------|
+| human_message | all fields | `id` is `None` in event store, has UUID in checkpoint |
+| ai_message (tool_call) | all fields, 6 overlapping | **IDENTICAL** (0 diffs) |
+| ai_message (final) | all fields | **IDENTICAL** |
+| tool_result (normal) | all fields | Only `id` differs (`None` vs UUID) |
+| tool_result (from `Command`-returning tool) | content | **Legacy data stored `str(Command(...))` repr instead of inner ToolMessage** — fixed in journal.py for new runs; legacy rows sanitized by helper |
+
+**Root cause for id difference:** LangGraph's checkpoint assigns `id` to HumanMessage and ToolMessage during graph execution. Event store writes happen earlier, when those ids are still None. AI messages receive `id` from the LLM response (`lc_run--*`) and are unaffected.
+
+**Fix for id:** Generate deterministic UUIDs for `id=None` messages using `uuid5(NAMESPACE_URL, f"{thread_id}:{seq}")` at read time. Patch a **copy** of the content dict, never the live store object.
+
+**Summarize impact quantified on the reproducer thread**: event_store has 16 messages (7 AI + 9 others); checkpoint has 12 after summarize (5 AI + 7 others). AI id overlap: 5 of 7 — the 2 missing AI messages are pre-summarize.
+
+---
+
+## File Structure
+
+| File | Action | Responsibility |
+|------|--------|----------------|
+| `backend/app/gateway/routers/threads.py` | Modify | Replace checkpoint messages with event store messages in `get_thread_state` and `get_thread_history` |
+| `backend/tests/test_thread_state_event_store.py` | Create | Tests for the modified endpoints |
+
+---
+
+### Task 1: Add `_get_event_store_messages` helper to `threads.py`
+
+A shared helper that loads the **full** message stream from the event store, patches `id=None` messages with deterministic UUIDs, and defensively sanitizes legacy `Command(update=...)` reprs captured before the journal.py fix. Patches a copy of each content dict so the live store is never mutated.
+
+**Design constraints (derived from evaluation §3, §4, §5):**
+- **Full pagination**, not `limit=1000`. `RunEventStore.list_messages` returns "latest N records" — a fixed limit silently truncates older messages. Use `count_messages()` to size the request or loop with `after_seq` cursors.
+- **Copy before mutate**. `MemoryRunEventStore` returns live dict references; the JSONL/DB stores may return detached rows but we must not rely on that. Always `content = dict(evt["content"])` before patching `id`.
+- **Legacy Command sanitization.** Legacy data contains `content["content"] == "Command(update={'artifacts': [...], 'messages': [ToolMessage(content='X', ...)]})"`. Regex-extract the inner ToolMessage content string and replace; if extraction fails, leave content as-is (still strictly better than nothing because checkpoint fallback is also wrong for summarized threads).
+- **User context.** `DbRunEventStore.list_messages` is user-scoped via `resolve_user_id(AUTO)` and relies on the auth contextvar set by `@require_permission`. Both endpoints are already decorated — document this dependency in the helper docstring.
+
+**Files:**
+- Modify: `backend/app/gateway/routers/threads.py`
+- Test: `backend/tests/test_thread_state_event_store.py`
+
+- [ ] **Step 1: Write the test**
+
+Create `backend/tests/test_thread_state_event_store.py`:
+
+```python
+"""Tests for event-store-backed message loading in thread state/history endpoints."""
+
+from __future__ import annotations
+
+import uuid
+
+import pytest
+
+from deerflow.runtime.events.store.memory import MemoryRunEventStore
+
+
+@pytest.fixture()
+def event_store():
+    return MemoryRunEventStore()
+
+
+async def _seed_conversation(event_store: MemoryRunEventStore, thread_id: str = "t1"):
+    """Seed a realistic multi-turn conversation matching real checkpoint format."""
+    # human_message: id is None (same as real data)
+    await event_store.put(
+        thread_id=thread_id, run_id="r1",
+        event_type="human_message", category="message",
+        content={
+            "type": "human", "id": None,
+            "content": [{"type": "text", "text": "Hello"}],
+            "additional_kwargs": {}, "response_metadata": {}, "name": None,
+        },
+    )
+    # ai_tool_call: id is set by LLM
+    await event_store.put(
+        thread_id=thread_id, run_id="r1",
+        event_type="ai_tool_call", category="message",
+        content={
+            "type": "ai", "id": "lc_run--abc123",
+            "content": "",
+            "tool_calls": [{"name": "search", "args": {"q": "cats"}, "id": "call_1", "type": "tool_call"}],
+            "invalid_tool_calls": [],
+            "additional_kwargs": {}, "response_metadata": {}, "name": None,
+            "usage_metadata": {"input_tokens": 100, "output_tokens": 50, "total_tokens": 150},
+        },
+    )
+    # tool_result: id is None (same as real data)
+    await event_store.put(
+        thread_id=thread_id, run_id="r1",
+        event_type="tool_result", category="message",
+        content={
+            "type": "tool", "id": None,
+            "content": "Found 10 results",
+            "tool_call_id": "call_1", "name": "search",
+            "artifact": None, "status": "success",
+            "additional_kwargs": {}, "response_metadata": {},
+        },
+    )
+    # ai_message: id is set by LLM
+    await event_store.put(
+        thread_id=thread_id, run_id="r1",
+        event_type="ai_message", category="message",
+        content={
+            "type": "ai", "id": "lc_run--def456",
+            "content": "I found 10 results about cats.",
+            "tool_calls": [], "invalid_tool_calls": [],
+            "additional_kwargs": {}, "response_metadata": {"finish_reason": "stop"}, "name": None,
+            "usage_metadata": {"input_tokens": 200, "output_tokens": 100, "total_tokens": 300},
+        },
+    )
+    # Also add a trace event — should NOT appear
+    await event_store.put(
+        thread_id=thread_id, run_id="r1",
+        event_type="llm_request", category="trace",
+        content={"model": "gpt-4"},
+    )
+
+
+class TestGetEventStoreMessages:
+    """Verify event store message extraction with id patching."""
+
+    @pytest.mark.asyncio
+    async def test_extracts_all_message_types(self, event_store):
+        await _seed_conversation(event_store)
+        events = await event_store.list_messages("t1", limit=500)
+        messages = [evt["content"] for evt in events if isinstance(evt.get("content"), dict) and "type" in evt["content"]]
+        assert len(messages) == 4
+        assert [m["type"] for m in messages] == ["human", "ai", "tool", "ai"]
+
+    @pytest.mark.asyncio
+    async def test_null_ids_get_patched(self, event_store):
+        """Messages with id=None should get deterministic UUIDs."""
+        await _seed_conversation(event_store)
+        events = await event_store.list_messages("t1", limit=500)
+        messages = []
+        for evt in events:
+            content = evt.get("content")
+            if isinstance(content, dict) and "type" in content:
+                if content.get("id") is None:
+                    content["id"] = str(uuid.uuid5(uuid.NAMESPACE_URL, f"t1:{evt['seq']}"))
+                messages.append(content)
+
+        # All messages now have an id
+        for m in messages:
+            assert m["id"] is not None
+            assert isinstance(m["id"], str)
+            assert len(m["id"]) > 0
+
+        # AI messages keep their original id
+        assert messages[1]["id"] == "lc_run--abc123"
+        assert messages[3]["id"] == "lc_run--def456"
+
+        # Human and tool messages get deterministic ids (same input = same output)
+        human_id_1 = str(uuid.uuid5(uuid.NAMESPACE_URL, "t1:1"))
+        assert messages[0]["id"] == human_id_1
+
+    @pytest.mark.asyncio
+    async def test_empty_thread(self, event_store):
+        events = await event_store.list_messages("nonexistent", limit=500)
+        messages = [evt["content"] for evt in events if isinstance(evt.get("content"), dict)]
+        assert messages == []
+
+    @pytest.mark.asyncio
+    async def test_tool_call_fields_preserved(self, event_store):
+        await _seed_conversation(event_store)
+        events = await event_store.list_messages("t1", limit=500)
+        messages = [evt["content"] for evt in events if isinstance(evt.get("content"), dict) and "type" in evt["content"]]
+
+        # AI tool_call message
+        ai_tc = messages[1]
+        assert ai_tc["tool_calls"][0]["name"] == "search"
+        assert ai_tc["tool_calls"][0]["id"] == "call_1"
+
+        # Tool result
+        tool = messages[2]
+        assert tool["tool_call_id"] == "call_1"
+        assert tool["status"] == "success"
+```
+
+- [ ] **Step 2: Run tests to verify they pass**
+
+Run: `cd backend && PYTHONPATH=. uv run pytest tests/test_thread_state_event_store.py -v`
+
+- [ ] **Step 3: Add the helper function and modify `get_thread_history`**
+
+In `backend/app/gateway/routers/threads.py`:
+
+1. Add import at the top:
+```python
+import uuid  # ADD (may already exist, check first)
+from app.gateway.deps import get_run_event_store  # ADD
+```
+
+2. Add the helper function (before the endpoint functions, after the model definitions):
+
+```python
+_LEGACY_CMD_INNER_CONTENT_RE = re.compile(
+    r"ToolMessage\(content=(?P<q>['\"])(?P<inner>.*?)(?P=q)",
+    re.DOTALL,
+)
+
+
+def _sanitize_legacy_command_repr(content_field: Any) -> Any:
+    """Recover the inner ToolMessage text from a legacy ``str(Command(...))`` repr.
+
+    Runs that pre-date the ``on_tool_end`` fix in ``journal.py`` stored
+    ``str(Command(update={'messages':[ToolMessage(content='X', ...)]}))`` as the
+    tool_result content. New runs store ``'X'`` directly. For old threads, try
+    to extract ``'X'`` defensively; return the original string if extraction
+    fails (still no worse than the current checkpoint-based fallback, which is
+    broken for summarized threads anyway).
+    """
+    if not isinstance(content_field, str) or not content_field.startswith("Command(update="):
+        return content_field
+    match = _LEGACY_CMD_INNER_CONTENT_RE.search(content_field)
+    return match.group("inner") if match else content_field
+
+
+async def _get_event_store_messages(request: Request, thread_id: str) -> list[dict] | None:
+    """Load messages from the event store, returning None if unavailable.
+
+    The event store is append-only and immune to summarization. Each
+    message event's ``content`` field contains a ``model_dump()``'d
+    LangChain Message dict that is already JSON-serialisable.
+
+    **Full pagination, not a fixed limit.** ``RunEventStore.list_messages``
+    returns the newest ``limit`` records when no cursor is given, which
+    silently drops older messages. We call ``count_messages()`` first and
+    request that many records. For stores that may return fewer (e.g. filtered
+    by user), we also fall back to ``after_seq``-cursor pagination.
+
+    **Copy-on-read.** Each content dict is copied before ``id`` is patched so
+    the live store object is never mutated; ``MemoryRunEventStore`` returns
+    live references.
+
+    **Legacy Command repr sanitization.** See ``_sanitize_legacy_command_repr``.
+
+    **User context.** ``DbRunEventStore`` is user-scoped by default via
+    ``resolve_user_id(AUTO)`` (see ``runtime/user_context.py``). Callers of
+    this helper must be inside a request where ``@require_permission`` has
+    populated the user contextvar. Both ``get_thread_history`` and
+    ``get_thread_state`` satisfy that. Do not call this helper from CLI or
+    migration scripts without passing ``user_id=None`` explicitly.
+
+    Returns ``None`` when the event store is not configured or contains no
+    messages for this thread, so callers can fall back to checkpoint messages.
+    """
+    try:
+        event_store = get_run_event_store(request)
+    except Exception:
+        return None
+
+    try:
+        total = await event_store.count_messages(thread_id)
+    except Exception:
+        logger.exception("count_messages failed for thread %s", sanitize_log_param(thread_id))
+        return None
+    if not total:
+        return None
+
+    # Batch by page_size to keep memory bounded for very long threads.
+    page_size = 500
+    collected: list[dict] = []
+    after_seq: int | None = None
+    while True:
+        page = await event_store.list_messages(thread_id, limit=page_size, after_seq=after_seq)
+        if not page:
+            break
+        collected.extend(page)
+        if len(page) < page_size:
+            break
+        after_seq = page[-1].get("seq")
+        if after_seq is None:
+            break
+
+    messages: list[dict] = []
+    for evt in collected:
+        raw = evt.get("content")
+        if not isinstance(raw, dict) or "type" not in raw:
+            continue
+        # Copy to avoid mutating the store-owned dict.
+        content = dict(raw)
+        if content.get("id") is None:
+            content["id"] = str(uuid.uuid5(uuid.NAMESPACE_URL, f"{thread_id}:{evt['seq']}"))
+        # Sanitize legacy Command reprs on tool_result messages only.
+        if content.get("type") == "tool":
+            content["content"] = _sanitize_legacy_command_repr(content.get("content"))
+        messages.append(content)
+    return messages if messages else None
+```
+
+Also add `import re` at the top of the file if it isn't already imported.
+
+3. In `get_thread_history` (around line 585-590), replace the messages section:
+
+**Before:**
+```python
+            # Attach messages from checkpointer only for the latest checkpoint
+            if is_latest_checkpoint:
+                messages = channel_values.get("messages")
+                if messages:
+                    values["messages"] = serialize_channel_values({"messages": messages}).get("messages", [])
+            is_latest_checkpoint = False
+```
+
+**After:**
+```python
+            # Attach messages: prefer event store (immune to summarization),
+            # fall back to checkpoint messages when event store is unavailable.
+            if is_latest_checkpoint:
+                es_messages = await _get_event_store_messages(request, thread_id)
+                if es_messages is not None:
+                    values["messages"] = es_messages
+                else:
+                    messages = channel_values.get("messages")
+                    if messages:
+                        values["messages"] = serialize_channel_values({"messages": messages}).get("messages", [])
+            is_latest_checkpoint = False
+```
+
+- [ ] **Step 4: Modify `get_thread_state` similarly**
+
+In `get_thread_state` (around line 443-444), replace:
+
+**Before:**
+```python
+    return ThreadStateResponse(
+        values=serialize_channel_values(channel_values),
+```
+
+**After:**
+```python
+    values = serialize_channel_values(channel_values)
+
+    # Override messages with event store data (immune to summarization)
+    es_messages = await _get_event_store_messages(request, thread_id)
+    if es_messages is not None:
+        values["messages"] = es_messages
+
+    return ThreadStateResponse(
+        values=values,
+```
+
+- [ ] **Step 5: Run all backend tests**
+
+Run: `cd backend && PYTHONPATH=. uv run pytest tests/ -v --timeout=30 -x`
+
+- [ ] **Step 6: Commit**
+
+```bash
+git add backend/app/gateway/routers/threads.py backend/tests/test_thread_state_event_store.py
+git commit -m "feat(threads): load messages from event store instead of checkpoint state
+
+Event store is append-only and immune to summarization. Messages with
+null ids (human, tool) get deterministic UUIDs based on thread_id:seq
+for stable frontend rendering."
+```
+
+---
+
+### Task 2 (OPTIONAL, deferred): Reduce flush_threshold for shorter mid-stream gap
+
+**Status:** Not a correctness fix. Re-evaluation (see spec) found that `RunJournal` already flushes on `run_end`, `run_error`, cancel, and worker `finally` paths. The only window this tuning narrows is a hard process crash or mid-run reload. Defer and decide separately; do not couple with Task 1 merge.
+
+If pursued: change `flush_threshold` default from 20 → 5 in `journal.py:42`, rerun `tests/test_run_journal.py`, commit as a separate `perf(journal): …` commit.
+
+---
+
+### Task 3: Fix `useThreadFeedback` pagination in frontend
+
+Once `/history` returns the full event-store-backed message stream, the frontend's `runIdByAiIndex` map must also cover the full stream or its positional AI-index mapping drifts and feedback clicks go to the wrong `run_id`. The current hook hardcodes `limit=200`.
+
+**Files:**
+- Modify: `frontend/src/core/threads/hooks.ts` (around line 679)
+
+- [ ] **Step 1: Replace the fixed `?limit=200` with full pagination**
+
+Change:
+
+```ts
+const res = await fetchWithAuth(
+  `${getBackendBaseURL()}/api/threads/${encodeURIComponent(threadId)}/messages?limit=200`,
+);
+```
+
+to a loop that pages via `after_seq` (or an equivalent query param exposed by the `/messages` endpoint — check `backend/app/gateway/routers/thread_runs.py:285-323` for the actual parameter names before writing the TS code). Accumulate `messages` until a page returns fewer than the page size.
+
+- [ ] **Step 2: Defensive index guard**
+
+`runIdByAiIndex[aiMessageIndex]` can still be `undefined` when the frontend renders optimistic state before the messages query refreshes. The current `?? undefined` in `message-list.tsx:71` already handles this; do not remove it.
+
+- [ ] **Step 3: Invalidate `["thread-feedback", threadId]` after a new run**
+
+In `useThreadStream` (or wherever stream-end is handled), call `queryClient.invalidateQueries({ queryKey: ["thread-feedback", threadId] })` when the stream closes so the runIdByAiIndex picks up the new run's AI message immediately.
+
+- [ ] **Step 4: Run `pnpm check`**
+
+```bash
+cd frontend && pnpm check
+```
+
+- [ ] **Step 5: Commit**
+
+```bash
+git add frontend/src/core/threads/hooks.ts
+git commit -m "fix(feedback): paginate useThreadFeedback and invalidate after stream"
+```
+
+---
+
+### Task 4: End-to-end test — summarize + multi-run feedback
+
+Add a regression test that exercises the exact bug class we are fixing: a summarized thread with at least two runs, where feedback clicks must target the correct `run_id`.
+
+**Files:**
+- Modify: `backend/tests/test_thread_state_event_store.py`
+
+- [ ] **Step 1: Write the test**
+
+Seed a `MemoryRunEventStore` with two runs worth of messages (`r1`: human + ai + human + ai, `r2`: human + ai), then simulate a summarized checkpoint state that drops the `r1` messages. Call `_get_event_store_messages` and assert:
+
+- Length matches the event store, not the checkpoint
+- The first message is the original `r1` human, not a summary
+- AI messages preserve their `lc_run--*` ids in order
+- Any `id=None` messages get a stable `uuid5(...)` id
+- A legacy `str(Command(update=...))` content field in a tool_result is sanitized to the inner text
+
+- [ ] **Step 2: Run the new test**
+
+```bash
+cd backend && PYTHONPATH=. uv run pytest tests/test_thread_state_event_store.py -v
+```
+
+- [ ] **Step 3: Commit with Tasks 1, 3 changes**
+
+Bundle with the Task 1 commit so tests always land alongside the implementation.
+
+---
+
+### Task 5: Standard mode follow-up (documentation only)
+
+Standard mode (`make dev`) hits LangGraph Server directly for `/threads/{id}/history` and does not go through the Gateway router we just patched. The summarize bug is still present there.
+
+**Files:**
+- Modify: this plan (add follow-up section at the bottom, see below) OR create a separate tracking issue
+
+- [ ] **Step 1: Record the gap**
+
+Append to the bottom of this plan (or open a GitHub issue and link it):
+
+> **Follow-up — Standard mode summarize bug**
+> `get_thread_history` in `backend/app/gateway/routers/threads.py` is only hit in Gateway mode. Standard mode proxies `/api/langgraph/*` directly to the LangGraph Server (see `backend/CLAUDE.md` nginx routing and `frontend/CLAUDE.md` `NEXT_PUBLIC_LANGGRAPH_BASE_URL`). The summarize-message-loss symptom is still reproducible there. Options: (a) teach the LangGraph Server checkpointer to branch on an override, (b) move `/history` behind Gateway in Standard mode as well, (c) accept as known limitation for Standard mode. Decide before GA.
diff --git a/docs/superpowers/specs/2026-04-11-runjournal-history-evaluation.md b/docs/superpowers/specs/2026-04-11-runjournal-history-evaluation.md
new file mode 100644
index 000000000..44a466960
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-11-runjournal-history-evaluation.md
@@ -0,0 +1,191 @@
+# RunJournal 替换 History Messages — 方案评估与对比
+
+**日期**：2026-04-11
+**分支**：`rayhpeng/fix-persistence-new`
+**相关 plan**：[`docs/superpowers/plans/2026-04-10-event-store-history.md`](../plans/2026-04-10-event-store-history.md)（尚未落地）
+
+---
+
+## 1. 问题与数据核对
+
+**症状**：SummarizationMiddleware 触发后，前端历史中无法展示 summarize 之前的真实用户消息。
+
+**复现数据**（thread `6d30913e-dcd4-41c8-8941-f66c716cf359`）：
+
+| 数据源 | seq=1 的 message | 总 message 数 | 是否保留原始 human |
+|---|---|---:|---|
+| `run_events`（SQLite） | human `"最新伊美局势"` | 9（1 human + 7 ai_tool_call + 9 tool_result + 1 ai_message） | ✅ |
+| `/history` 响应（`docs/resp.json`） | type=human，content=`"Here is a summary of the conversation to date:…"` | 不定 | ❌（已被 summary 替换）|
+
+**根因**：`backend/app/gateway/routers/threads.py:587-589` 的 `get_thread_history` 从 `checkpoint.channel_values["messages"]` 读取，而 LangGraph 的 SummarizationMiddleware 会原地改写这个列表。
+
+---
+
+## 2. 候选方案
+
+| 方案 | 描述 | 本次是否推荐 |
+|---|---|---|
+| **A. event_store 覆盖 messages**（已有 plan） | `/history`、`/state` 改读 `RunEventStore.list_messages()`，覆盖 `channel_values["messages"]`；其它字段保持 checkpoint 来源 | ✅ 主方案 |
+| B. 修 SummarizationMiddleware | 让 summarize 不原地替换 messages（作为附加 system message） | ❌ 违背 summarize 的 token 预算初衷 |
+| C. 双读合并（checkpoint + event_store diff） | 合并 summarize 切点前后的两段 | ❌ 合并逻辑复杂无额外收益 |
+| D. 切到现有 `/api/threads/{id}/messages` 端点 | 前端直接消费已经存在的 event-store 消息端点（`thread_runs.py:285-323`）| ⚠️ 更干净但需要前端改动 |
+
+---
+
+## 3. Claude 自评 vs Codex 独立评估
+
+两方独立分析了同一份 plan。重合点基本一致，但 **Codex 发现了一个我遗漏的关键 bug**。
+
+### 3.1 一致结论
+
+| 维度 | 结论 |
+|---|---|
+| 正确性方向 | event_store 是 append-only + 不受 summarize 影响，方向正确 |
+| ID 补齐 | `uuid5(NAMESPACE_URL, f"{thread_id}:{seq}")` 稳定且确定性，安全 |
+| 前端 schema | 零改动 |
+| Non-message 字段（artifacts/todos/title/thread_data） | summarize 只影响 messages，不需要覆盖其它字段 |
+| 多 checkpoint 语义 | 前端 `useStream` 只取 `limit: 1`（`frontend/src/core/threads/hooks.ts:203-210`），不做时间旅行；latest-only 可接受但应在注释/文档写清楚 |
+| 作用域 | 仅 Gateway mode；Standard mode 直连 LangGraph Server，bug 在默认部署路径仍然存在 |
+
+### 3.2 Claude 的独立观察
+
+1. 已验证数据对齐：plan 文档第 15-28 行的真实数据对齐表与本次 `run_events` 导出一致（9 条消息 id 分布：AI 来自 LLM `lc_run--*`、human/tool 为 None）。
+2. 担心 `run_end` / `run_error` / `cancel` 路径未必都 flush —— 这一点 Codex 实际核查了代码并给出确定结论（见下）。
+3. 方案 A 的单文件改动约 60 行，复杂度小。
+
+### 3.3 Codex 的关键补充（Claude 遗漏）
+
+> **Bug #1 — Plan 用 `limit=1000` 并非全量**
+> `RunEventStore.list_messages()` 的语义是"返回最新 limit 条"（`base.py:51-65`、`db.py:151-181`）。对于消息数超过 1000 的长对话，plan 当前写法会**丢掉最早的消息**，再次引入"消息丢失"bug（只是换了丢失的段）。
+
+> **Bug #2 — helper 就地修改了 store 的 dict**
+> plan 的 helper 里对 `content` 原地写 `id`；`MemoryRunEventStore` 返回的是**活引用**，会污染 store 中的对象。应 deep-copy 或 dict 推导出新对象。
+
+> **Flush 路径已核查**：
+> `RunJournal` 在 threshold (`journal.py:360-373`)、`run_end` (`91-96`)、`run_error` (`97-106`)、worker `finally` (`worker.py:280-286`) 都会 flush；`CancelledError` 也走 finally。**正常 end/error/cancel 都 flush，仅硬 kill / 进程崩溃会丢缓冲区**。
+> 因此 `flush_threshold 20 → 5` 的意义**仅在于硬崩溃窗口**与 mid-run reload 可见性，**不是正确性修复**，属于可选 tuning。代价是更多 put_batch / SQLite churn；且 `_flush_sync()` (`383-398`) 已防止并发 flush，所以"每 5 条一 flush"是 best-effort 非严格保证。
+
+### 3.4 Codex 未否决但提示的次要点
+
+- 方案 D（消费现有 `/api/threads/{id}/messages` 端点）更干净但需前端改动。
+- `/history` 一旦被方案 A 改过，就不再是严格意义上的"按 checkpoint 快照"API（对 messages 字段），应写进注释和 API 文档。
+- Standard mode 的 summarize bug 应建立独立 follow-up issue。
+
+---
+
+## 4. 最终合并判决
+
+**Codex**：APPROVE-WITH-CHANGES
+**Claude**：同意 Codex 的判决
+
+### 合并前必须修改（Top 3）
+
+1. **修复分页 bug**：不能用固定 `limit=1000`。必须用以下之一：
+   - `count = await event_store.count_messages(thread_id)`，再 `list_messages(thread_id, limit=count)`
+   - 或循环 cursor 分页（`after_seq`）直到耗尽
+2. **不要原地修改 store dict**：helper 对 `content` 的 id 补齐需要 copy（`dict(content)` 浅拷贝足够，因为只写 top-level `id`）
+3. **Standard mode 显式 follow-up**：在 plan 文末加 "Standard-mode follow-up: TODO #xxx"，或在合并 PR 描述中明确这是 Gateway-only 止血
+
+### 可选（非阻塞）
+
+4. `flush_threshold 20 → 5` 降级为"可选 tuning"，不是修复的一部分；或独立一条 commit 并说明只对硬崩溃窗口有用
+5. `get_thread_history` 新增注释，说明 messages 字段脱离了 checkpoint 快照语义
+6. 测试覆盖：模拟 summarize 后的 checkpoint + 真实 event_store，端到端验证 `/history` 返回包含原始 human 消息
+
+---
+
+## 5. 推荐执行顺序
+
+1. 按本文档 §4 修订 `docs/superpowers/plans/2026-04-10-event-store-history.md`（主要是 Task 1 的 helper 实现 + 分页）
+2. 按修订后的 plan 执行（走 `superpowers:executing-plans`）
+3. 合并后立即建 Standard mode follow-up issue
+
+## 6. Feedback 影响分析（2026-04-11 补充）
+
+### 6.1 数据模型
+
+`feedback` 表（`persistence/feedback/model.py`）：
+
+| 字段 | 说明 |
+|---|---|
+| `feedback_id` PK | - |
+| `run_id` NOT NULL | 反馈目标 run |
+| `thread_id` NOT NULL | - |
+| `user_id` | - |
+| `message_id` nullable | 注释明确写：`optional RunEventStore event identifier` — 已经面向 event_store 设计 |
+| UNIQUE(thread_id, run_id, user_id) | 每 run 每用户至多一条 |
+
+**结论**：feedback **不按 message uuid 存**，按 `run_id` 存，所以 summarize 导致的 checkpoint messages 丢失**不会影响 feedback 存储**。schema 天生与 event_store 兼容，**无需数据迁移**。
+
+### 6.2 前端的 runId 映射：发现隐藏 bug
+
+前端 feedback 目前走两条并行的数据链：
+
+| 用途 | 数据源 | 位置 |
+|---|---|---|
+| 渲染消息体 | `POST /history`（checkpoint） | `useStream` → `thread.messages` |
+| 拿 `runId` 映射 | `GET /api/threads/{id}/messages?limit=200`（**event_store**） | `useThreadFeedback` (`hooks.ts:669-709`) |
+
+两者通过 **"AI 消息的序号"** 对齐：
+
+```ts
+// hooks.ts:691-698
+for (const msg of messages) {
+  if (msg.event_type === "ai_message") {
+    runIdByAiIndex.push(msg.run_id);  // 只按 AI 顺序 push
+  }
+}
+// message-list.tsx:70-71
+runId = feedbackData.runIdByAiIndex[aiMessageIndex]
+```
+
+**Bug**：summarize 过的 thread 里，两条数据链的 AI 消息数量和顺序**不一致**：
+
+| 数据源 | 本 thread 的 AI 消息序列 | 数量 |
+|---|---|---:|
+| `/history`（checkpoint，summarize 后） | seq=19,31,37,45,53 | 5 |
+| `/messages`（event_store，完整） | seq=5,13,19,31,37,45,53 | 7 |
+
+结果：前端渲染的"第 0 条 AI 消息"是 seq=19，但 `runIdByAiIndex[0]` 指向 seq=5 的 run（本例同一 run 里没事，**跨多 run 的 thread 点赞就会打到错的 run 上**）。
+
+**这个 bug 和本次 plan 无关，已经存在了**。只是用户未必注意到。
+
+### 6.3 方案 A 对 feedback 的影响
+
+**负面**：无。feedback 存储不受影响。
+
+**正面（意外收益）**：`/history` 切换到 event_store 后，**两条数据链的 AI 消息序列自动对齐**，§6.2 的隐藏 bug 被顺带修好。
+
+**前提条件**（加入 Top 3 改动之一同等重要）：
+
+- 新 helper 必须和 `/messages` 端点用**同样的消息获取逻辑**（same store, same filter）。否则两条链仍然可能在边界条件下漂移
+- 具体说：**两边都要做完整分页**。目前 `/messages?limit=200` 在前端硬编码 200，如果 thread 有 >200 条消息就会截断；plan 的 `limit=1000` 也一样有上限。两个上限不一致 → 两边顺序不再对齐 → feedback 映射错位
+- **必须修**：`useThreadFeedback` 的 `limit=200` 需要改成分页获取全部，或者 `/messages` 后端改为默认全量
+
+### 6.4 对前端改造顺序的影响
+
+原 plan 声明"零前端改动"，但加入 feedback 考虑后应修正为：
+
+| 改动 | 必须 | 可选 |
+|---|---|---|
+| 后端 `/history` 改读 event_store | ✅ | - |
+| 后端 helper 用分页而非 `limit=1000` | ✅ | - |
+| 前端 `useThreadFeedback` 改用分页或提升 limit | ✅ | - |
+| `runIdByAiIndex` 增加防御：索引越界 fallback `undefined`（已有）| - | ✅ 已经是 |
+| 前端改用 `/messages` 直接做渲染（方案 D） | - | ✅ 长期更干净 |
+
+### 6.5 feedback 相关的新 Top 3 补充
+
+在原来的 Top 3 之外，再加：
+
+4. **前端 `useThreadFeedback` 必须分页或拉全**（`frontend/src/core/threads/hooks.ts:679`），否则和 `/history` 的新全量行为仍然错位
+5. **端到端测试**：一个 thread 跨 >1 个 run + 触发 summarize + 给历史 AI 消息点赞，确认 feedback 打到正确的 run_id
+6. **TanStack Query 缓存协调**：`thread-feedback` 与 history 查询的 `staleTime` / invalidation 需要在新 run 结束时同步刷新，否则新消息写入后 `runIdByAiIndex` 没更新，点赞会打到上一个 run
+
+---
+
+## 8. 未决问题
+
+- `RunEventStore.count_messages()` 与 `list_messages(after_seq=...)` 的实际性能（SQLite 上对于数千消息级别应无问题，但未压测）
+- `MemoryRunEventStore` 与 `DbRunEventStore` 分页语义是否一致（Codex 只核查了 `db.py`，`memory.py` 需确认）
+- 是否应把 `/api/threads/{id}/messages` 提升为前端主用 endpoint，把 `/history` 保留为纯 checkpoint API —— 架构层面更干净但成本更高
diff --git a/docs/superpowers/specs/2026-04-11-summarize-marker-design.md b/docs/superpowers/specs/2026-04-11-summarize-marker-design.md
new file mode 100644
index 000000000..79cd748d4
--- /dev/null
+++ b/docs/superpowers/specs/2026-04-11-summarize-marker-design.md
@@ -0,0 +1,203 @@
+# Summarize Marker in History — Design & Verification
+
+**Date**: 2026-04-11
+**Branch**: `rayhpeng/fix-persistence-new`
+**Status**: Design approved, implementation deferred to a follow-up PR
+**Depends on**: [`2026-04-11-runjournal-history-evaluation.md`](./2026-04-11-runjournal-history-evaluation.md) (the event-store-backed history fix this builds on)
+
+---
+
+## 1. Goal
+
+Display a "summarization happened here" marker in the conversation history UI when `SummarizationMiddleware` ran mid-run, so users understand why earlier messages look condensed or missing. The event-store-backed `/history` fix already recovered the original messages; this spec adds a **visible marker** at the seq position where summarization occurred, optionally showing the generated summary text.
+
+## 2. Investigation findings
+
+### 2.1 Today's state: zero middleware records
+
+Full scan of `backend/.deer-flow/data/deerflow.db` `run_events`:
+
+| category | rows |
+|---|---:|
+| trace | 76 |
+| message | 34 |
+| lifecycle | 8 |
+| **middleware** | **0** |
+
+No row has `event_type` containing `summariz` or `middleware`. The middleware category is dead in production.
+
+### 2.2 Why: two dead code paths in `journal.py`
+
+| Location | Status |
+|---|---|
+| `journal.py:343-362` — `on_custom_event("summarization", ...)` writes one trace event + one `category="middleware"` event. | Dead. Only fires when something calls `adispatch_custom_event("summarization", {...})`. The upstream LangChain `SummarizationMiddleware` (`.venv/.../langchain/agents/middleware/summarization.py:272`) **never emits custom events** — its `before_model`/`abefore_model` just mutate messages in place and return `{'messages': new_messages}`. Callback never triggered. |
+| `journal.py:449` — `record_middleware(tag, *, name, hook, action, changes)` helper | Dead. Grep shows zero callers in the harness. Added speculatively, never wired up. |
+
+### 2.3 Concrete evidence of summarize running unlogged
+
+Thread `3d5dea4a-0983-4727-a4e8-41a64428933a`:
+
+- `run_events` seq=1 → original human `"写一份关于deer-flow的详细技术报告"` ✓ (event store is fine)
+- `run_events` seq=43 → `llm_request` trace whose `messages[0]` literal contains `"Here is a summary of the conversation to date:"` — proof that SummarizationMiddleware did inject a summary mid-run
+- Zero rows with `category='middleware'` for this thread → nothing captured for UI to render
+
+## 3. Approaches considered
+
+### A. Subclass `SummarizationMiddleware` and dispatch a custom event
+
+Wrap the upstream class, override `abefore_model`, call `await adispatch_custom_event("summarization", {...})` after super(). Journal's existing `on_custom_event` path captures it.
+
+### B. Frontend-only diff heuristic
+
+Compare `event_store.count_messages()` vs rendered count, infer summarization happened from the gap. **Rejected**: can't pinpoint position in the stream, can't show summary text. Only yields a vague badge.
+
+### C. Hybrid A + frontend inline card rendered at the middleware event's seq position
+
+Same backend as A, plus frontend renders an inline `[N messages condensed]` card at the correct chronological position. **Recommended terminal state**.
+
+## 4. Subagent's wrong claim and its rebuttal
+
+An independent agent flagged approach A as structurally broken because:
+
+> `RunnableCallable(trace=False)` skips `set_config_context`, therefore `var_child_runnable_config` is never set, therefore `adispatch_custom_event` raises `RuntimeError("Unable to dispatch an adhoc event without a parent run id")`.
+
+**This is wrong.** The user's counter-intuition was correct: `trace=False` does not prevent `adispatch_custom_event` from working, as long as the middleware signature explicitly accepts `config: RunnableConfig`. The mechanism:
+
+1. `RunnableCallable.__init__` (`langgraph/_internal/_runnable.py:293-319`) inspects the function signature. If it accepts `config: RunnableConfig`, that parameter is recorded in `self.func_accepts`.
+2. Both `trace=True` and `trace=False` branches of `ainvoke` run the same kwarg-injection loop (`_runnable.py:349-356`): `if kw == "config": kw_value = config`. The `config` passed to `ainvoke` (from Pregel's `task.proc.ainvoke(task.input, config)` at `pregel/_retry.py:138`) is the task config with callbacks already bound.
+3. Inside the middleware, passing that `config` explicitly to `adispatch_custom_event(..., config=config)` means the function doesn't rely on `var_child_runnable_config.get()` at all. The LangChain docstring at `langchain_core/callbacks/manager.py:2574-2579` even says "If using python 3.10 and async, you MUST specify the config parameter" — which is exactly this path.
+
+`trace=False` only changes whether **this runnable layer creates a new child callback scope**. It does not affect whether the outer-layer config (with callbacks including `RunJournal`) is passed down to the function.
+
+## 5. Verification
+
+Ran `/tmp/verify_summarize_event.py` (standalone minimal reproduction):
+
+- Minimal `AgentMiddleware` subclass with `abefore_model(self, state, runtime, config: RunnableConfig)`
+- Calls `await adispatch_custom_event("summarization", {...}, config=config)` inside
+- `create_agent(model=FakeChatModel, middleware=[probe])`
+- `agent.ainvoke({...}, config={"callbacks": [RecordingHandler()]})`
+
+**Result**:
+
+```
+INFO verify: ProbeMiddleware.abefore_model called
+INFO verify:   config keys: ['callbacks', 'configurable', 'metadata']
+INFO verify:   config.callbacks type: AsyncCallbackManager
+INFO verify:   config.metadata: {'langgraph_step': 1, 'langgraph_node': 'probe.before_model', ...}
+INFO verify: on_custom_event fired: name=summarization
+             run_id=019d7d19-1727-7830-aa33-648ecbee4b95
+             data={'summary': 'fake summary', 'replaced_count': 3}
+SUCCESS: approach A is viable (config injection + adispatch work)
+```
+
+All five predictions held:
+
+1. ✅ `config: RunnableConfig` signature triggers auto-injection despite `trace=False`
+2. ✅ `config.callbacks` is an `AsyncCallbackManager` with `parent_run_id` set
+3. ✅ `adispatch_custom_event(..., config=config)` runs without error
+4. ✅ `RecordingHandler.on_custom_event` receives the event
+5. ✅ The received `run_id` is a valid UUID tied to the running graph
+
+**Bonus finding**: `config.metadata` contains `langgraph_step` and `langgraph_node`. These can be included in the middleware event's metadata to help the frontend position the marker on the timeline.
+
+## 6. Recommended implementation (approach C)
+
+### 6.1 Backend
+
+**New wrapper middleware** in `backend/packages/harness/deerflow/agents/lead_agent/agent.py`:
+
+```python
+from langchain.agents.middleware.summarization import SummarizationMiddleware
+from langchain_core.callbacks import adispatch_custom_event
+from langchain_core.runnables import RunnableConfig
+
+
+class _TrackingSummarizationMiddleware(SummarizationMiddleware):
+    """Wraps upstream SummarizationMiddleware to emit a ``summarization``
+    custom event on every actual summarization, so RunJournal can persist
+    a middleware:summarize row to the event store.
+
+    The upstream class does not emit events of its own. Declaring
+    ``config: RunnableConfig`` in the override lets LangGraph's
+    ``RunnableCallable`` inject the Pregel task config (with callbacks
+    and parent_run_id) regardless of ``trace=False`` on the node.
+    """
+
+    async def abefore_model(self, state, runtime, config: RunnableConfig):
+        before_count = len(state.get("messages") or [])
+        result = await super().abefore_model(state, runtime)
+        if result is None:
+            return None
+
+        new_messages = result.get("messages") or []
+        replaced_count = max(0, before_count - len(new_messages))
+        summary_text = _extract_summary_text(new_messages)
+
+        await adispatch_custom_event(
+            "summarization",
+            {
+                "summary": summary_text,
+                "replaced_count": replaced_count,
+            },
+            config=config,
+        )
+        return result
+
+
+def _extract_summary_text(messages: list) -> str:
+    """Pull the summary string out of the HumanMessage the upstream class
+    injects as ``Here is a summary of the conversation to date:...``."""
+    for msg in messages:
+        if getattr(msg, "type", None) == "human":
+            content = getattr(msg, "content", "")
+            text = content if isinstance(content, str) else ""
+            if text.startswith("Here is a summary of the conversation to date"):
+                return text
+    return ""
+```
+
+Swap the existing `SummarizationMiddleware()` instantiation in `_build_middlewares` for `_TrackingSummarizationMiddleware(...)` with the same args.
+
+**Journal change**: **zero**. `on_custom_event("summarization", ...)` in `journal.py:343-362` already writes both a trace and a `category="middleware"` row.
+
+**History helper change**: extend `_get_event_store_messages` in `backend/app/gateway/routers/threads.py` to surface `category="middleware"` rows as pseudo-messages, e.g.:
+
+```python
+# In the per-event loop, after the existing message branch:
+if evt.get("category") == "middleware" and evt.get("event_type") == "middleware:summarize":
+    meta = evt.get("metadata") or {}
+    messages.append({
+        "id": f"summary-marker-{evt['seq']}",
+        "type": "summary_marker",
+        "replaced_count": meta.get("replaced_count", 0),
+        "summary": (raw or {}).get("content", "") if isinstance(raw, dict) else "",
+        "run_id": evt.get("run_id"),
+    })
+```
+
+The marker uses a sentinel `type` (`summary_marker`) that doesn't collide with any LangChain message type, so downstream consumers that loop over messages can skip or render it explicitly.
+
+### 6.2 Frontend
+
+- `core/messages/utils.ts`: extend the message grouping to recognize `type === "summary_marker"` and yield it as its own group (`"assistant:summary-marker"`)
+- `components/workspace/messages/message-list.tsx`: add a branch in the grouped render switch that renders a distinctive inline card showing `N messages condensed` and a collapsible panel with the summary text
+- No changes to feedback logic: the marker has no `feedback` field so the button naturally doesn't render on it
+
+## 7. Risks
+
+1. **Synchronous path**. The upstream class has both `before_model` and `abefore_model`. Our wrapper only overrides the async variant. If any deer-flow code path ever uses the sync flow, those summarizations won't be captured. Mitigation: also override `before_model` and use `dispatch_custom_event` (sync variant) with the same pattern.
+2. **`_extract_summary_text` fragility**. It depends on the upstream class prefix `"Here is a summary of the conversation to date"` in the injected `HumanMessage`. Any upstream template change breaks detection. Mitigation: pick the first new `HumanMessage` that wasn't in `state["messages"]` before super() — resilient to template wording changes at the cost of a small diff helper.
+3. **`replaced_count` accuracy when concurrent updates**. If another middleware in the chain also modifies `state["messages"]` before super() returns, the naive `before_count - len(new_messages)` arithmetic is wrong. Mitigation: inspect the `RemoveMessage(id=REMOVE_ALL_MESSAGES)` that upstream emits and count from the original input list directly.
+4. **History helper contract change**. Introducing a non-LangChain-typed entry (`type="summary_marker"`) in the `/history` response could break frontend code that blindly casts entries to `Message`. Mitigation: the frontend change above adds an explicit branch; type-check the frontend end-to-end before merging.
+
+## 8. Out of scope / deferred
+
+- Other middleware types (Title, Guardrail, HITL) do not emit custom events either. If we want markers for those too, repeat the wrapper pattern for each. Not in this design.
+- Retroactive markers for old threads (captured before this patch) are impossible without re-running the graph. Legacy threads will show the event-store-recovered messages without a marker.
+- Standard mode (`make dev`) — agent runs inside LangGraph Server, not the Gateway-embedded runtime. `RunJournal` may not be wired there, so the custom event fires but is captured by no one. Tracked as a separate follow-up.
+
+## 9. Next actions
+
+1. Land the current summarize-message-loss fixes (journal `Command` unwrap + event-store-backed `/history` + inline feedback) — implementation verified, being committed now as three commits on `rayhpeng/fix-persistence-new`
+2. Summarize-marker implementation (this spec) → separate follow-up PR based on the above verified design
diff --git a/frontend/package.json b/frontend/package.json
index ed8b0a950..2ce4e2f6d 100644
--- a/frontend/package.json
+++ b/frontend/package.json
@@ -54,7 +54,6 @@
     "@xyflow/react": "^12.10.0",
     "ai": "^6.0.33",
     "best-effort-json-parser": "^1.2.1",
-    "better-auth": "^1.3",
     "canvas-confetti": "^1.9.4",
     "class-variance-authority": "^0.7.1",
     "clsx": "^2.1.1",
diff --git a/frontend/src/app/(auth)/layout.tsx b/frontend/src/app/(auth)/layout.tsx
new file mode 100644
index 000000000..0b35d4ac1
--- /dev/null
+++ b/frontend/src/app/(auth)/layout.tsx
@@ -0,0 +1,46 @@
+import Link from "next/link";
+import { redirect } from "next/navigation";
+import { type ReactNode } from "react";
+
+import { AuthProvider } from "@/core/auth/AuthProvider";
+import { getServerSideUser } from "@/core/auth/server";
+import { assertNever } from "@/core/auth/types";
+
+export const dynamic = "force-dynamic";
+
+export default async function AuthLayout({
+  children,
+}: {
+  children: ReactNode;
+}) {
+  const result = await getServerSideUser();
+
+  switch (result.tag) {
+    case "authenticated":
+      redirect("/workspace");
+    case "needs_setup":
+      // Allow access to setup page
+      return <AuthProvider initialUser={result.user}>{children}</AuthProvider>;
+    case "system_setup_required":
+    case "unauthenticated":
+      return <AuthProvider initialUser={null}>{children}</AuthProvider>;
+    case "gateway_unavailable":
+      return (
+        <div className="flex h-screen flex-col items-center justify-center gap-4">
+          <p className="text-muted-foreground">
+            Service temporarily unavailable.
+          </p>
+          <Link
+            href="/login"
+            className="bg-primary text-primary-foreground hover:bg-primary/90 rounded-md px-4 py-2 text-sm"
+          >
+            Retry
+          </Link>
+        </div>
+      );
+    case "config_error":
+      throw new Error(result.message);
+    default:
+      assertNever(result);
+  }
+}
diff --git a/frontend/src/app/(auth)/login/page.tsx b/frontend/src/app/(auth)/login/page.tsx
new file mode 100644
index 000000000..82fcf8b90
--- /dev/null
+++ b/frontend/src/app/(auth)/login/page.tsx
@@ -0,0 +1,213 @@
+"use client";
+
+import Link from "next/link";
+import { useRouter, useSearchParams } from "next/navigation";
+import { useTheme } from "next-themes";
+import { useEffect, useState } from "react";
+
+import { Button } from "@/components/ui/button";
+import { FlickeringGrid } from "@/components/ui/flickering-grid";
+import { Input } from "@/components/ui/input";
+import { useAuth } from "@/core/auth/AuthProvider";
+import { parseAuthError } from "@/core/auth/types";
+
+/**
+ * Validate next parameter
+ * Prevent open redirect attacks
+ * Per RFC-001: Only allow relative paths starting with /
+ */
+function validateNextParam(next: string | null): string | null {
+  if (!next) {
+    return null;
+  }
+
+  // Need start with / (relative path)
+  if (!next.startsWith("/")) {
+    return null;
+  }
+
+  // Disallow protocol-relative URLs
+  if (
+    next.startsWith("//") ||
+    next.startsWith("http://") ||
+    next.startsWith("https://")
+  ) {
+    return null;
+  }
+
+  // Disallow URLs with different protocols (e.g., javascript:, data:, etc)
+  if (next.includes(":") && !next.startsWith("/")) {
+    return null;
+  }
+
+  // Valid relative path
+  return next;
+}
+
+export default function LoginPage() {
+  const router = useRouter();
+  const searchParams = useSearchParams();
+  const { isAuthenticated } = useAuth();
+  const { theme, resolvedTheme } = useTheme();
+
+  const [email, setEmail] = useState("");
+  const [password, setPassword] = useState("");
+  const [isLogin, setIsLogin] = useState(true);
+  const [error, setError] = useState("");
+  const [loading, setLoading] = useState(false);
+
+  // Get next parameter for validated redirect
+  const nextParam = searchParams.get("next");
+  const redirectPath = validateNextParam(nextParam) ?? "/workspace";
+
+  // Redirect if already authenticated (client-side, post-login)
+  useEffect(() => {
+    if (isAuthenticated) {
+      router.push(redirectPath);
+    }
+  }, [isAuthenticated, redirectPath, router]);
+
+  // Redirect to setup if the system has no users yet
+  useEffect(() => {
+    let cancelled = false;
+
+    void fetch("/api/v1/auth/setup-status")
+      .then((r) => r.json())
+      .then((data: { needs_setup?: boolean }) => {
+        if (!cancelled && data.needs_setup) {
+          router.push("/setup");
+        }
+      })
+      .catch(() => {
+        // Ignore errors; user stays on login page
+      });
+
+    return () => {
+      cancelled = true;
+    };
+  }, [router]);
+
+  const handleSubmit = async (e: React.FormEvent) => {
+    e.preventDefault();
+    setError("");
+    setLoading(true);
+
+    try {
+      const endpoint = isLogin
+        ? "/api/v1/auth/login/local"
+        : "/api/v1/auth/register";
+      const body = isLogin
+        ? `username=${encodeURIComponent(email)}&password=${encodeURIComponent(password)}`
+        : JSON.stringify({ email, password });
+
+      const headers: HeadersInit = isLogin
+        ? { "Content-Type": "application/x-www-form-urlencoded" }
+        : { "Content-Type": "application/json" };
+
+      const res = await fetch(endpoint, {
+        method: "POST",
+        headers,
+        body,
+        credentials: "include", // Important: include HttpOnly cookie
+      });
+
+      if (!res.ok) {
+        const data = await res.json();
+        const authError = parseAuthError(data);
+        setError(authError.message);
+        return;
+      }
+
+      // Both login and register set a cookie — redirect to workspace
+      router.push(redirectPath);
+    } catch {
+      setError("Network error. Please try again.");
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  const actualTheme = theme === "system" ? resolvedTheme : theme;
+
+  return (
+    <div className="bg-background flex min-h-screen items-center justify-center">
+      <FlickeringGrid
+        className="absolute inset-0 z-0 mask-[url(/images/deer.svg)] mask-size-[100vw] mask-center mask-no-repeat md:mask-size-[72vh]"
+        squareSize={4}
+        gridGap={4}
+        color={actualTheme === "dark" ? "white" : "black"}
+        maxOpacity={0.3}
+        flickerChance={0.25}
+      />
+      <div className="border-border/20 bg-background/5 w-full max-w-md space-y-6 rounded-3xl border p-8 backdrop-blur-sm">
+        <div className="text-center">
+          <h1 className="text-foreground font-serif text-3xl">DeerFlow</h1>
+          <p className="text-muted-foreground mt-2">
+            {isLogin ? "Sign in to your account" : "Create a new account"}
+          </p>
+        </div>
+
+        <form onSubmit={handleSubmit} className="space-y-2">
+          <div className="flex flex-col space-y-1">
+            <label htmlFor="email" className="text-sm font-medium">
+              Email
+            </label>
+            <Input
+              id="email"
+              type="email"
+              value={email}
+              onChange={(e) => setEmail(e.target.value)}
+              placeholder="you@example.com"
+              required
+            />
+          </div>
+          <div className="flex flex-col space-y-1">
+            <label htmlFor="password" className="text-sm font-medium">
+              Password
+            </label>
+            <Input
+              id="password"
+              type="password"
+              value={password}
+              onChange={(e) => setPassword(e.target.value)}
+              placeholder="•••••••"
+              required
+              minLength={isLogin ? 6 : 8}
+            />
+          </div>
+
+          {error && <p className="text-sm text-red-500">{error}</p>}
+
+          <Button type="submit" className="w-full" disabled={loading}>
+            {loading
+              ? "Please wait..."
+              : isLogin
+                ? "Sign In"
+                : "Create Account"}
+          </Button>
+        </form>
+
+        <div className="text-center text-sm">
+          <button
+            type="button"
+            onClick={() => {
+              setIsLogin(!isLogin);
+              setError("");
+            }}
+            className="text-blue-500 hover:underline"
+          >
+            {isLogin
+              ? "Don't have an account? Sign up"
+              : "Already have an account? Sign in"}
+          </button>
+        </div>
+
+        <div className="text-muted-foreground text-center text-xs">
+          <Link href="/" className="hover:underline">
+            ← Back to home
+          </Link>
+        </div>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/app/(auth)/setup/page.tsx b/frontend/src/app/(auth)/setup/page.tsx
new file mode 100644
index 000000000..4f1d21eae
--- /dev/null
+++ b/frontend/src/app/(auth)/setup/page.tsx
@@ -0,0 +1,287 @@
+"use client";
+
+import { useRouter } from "next/navigation";
+import { useTheme } from "next-themes";
+import { useEffect, useState } from "react";
+
+import { Button } from "@/components/ui/button";
+import { FlickeringGrid } from "@/components/ui/flickering-grid";
+import { Input } from "@/components/ui/input";
+import { getCsrfHeaders } from "@/core/api/fetcher";
+import { useAuth } from "@/core/auth/AuthProvider";
+import { parseAuthError } from "@/core/auth/types";
+
+type SetupMode = "loading" | "init_admin" | "change_password";
+
+export default function SetupPage() {
+  const router = useRouter();
+  const { user, isAuthenticated } = useAuth();
+  const { theme, resolvedTheme } = useTheme();
+  const [mode, setMode] = useState<SetupMode>("loading");
+
+  // --- Shared state ---
+  const [email, setEmail] = useState("");
+  const [newPassword, setNewPassword] = useState("");
+  const [confirmPassword, setConfirmPassword] = useState("");
+  const [error, setError] = useState("");
+  const [loading, setLoading] = useState(false);
+
+  // --- Change-password mode only ---
+  const [currentPassword, setCurrentPassword] = useState("");
+
+  useEffect(() => {
+    let cancelled = false;
+
+    if (isAuthenticated && user?.needs_setup) {
+      setMode("change_password");
+    } else if (!isAuthenticated) {
+      // Check if the system has no users yet
+      void fetch("/api/v1/auth/setup-status")
+        .then((r) => r.json())
+        .then((data: { needs_setup?: boolean }) => {
+          if (cancelled) return;
+          if (data.needs_setup) {
+            setMode("init_admin");
+          } else {
+            // System already set up and user is not logged in — go to login
+            router.push("/login");
+          }
+        })
+        .catch(() => {
+          if (!cancelled) router.push("/login");
+        });
+    } else {
+      // Authenticated but needs_setup is false — already set up
+      router.push("/workspace");
+    }
+
+    return () => {
+      cancelled = true;
+    };
+  }, [isAuthenticated, user, router]);
+
+  // ── Init-admin handler ─────────────────────────────────────────────
+  const handleInitAdmin = async (e: React.SubmitEvent) => {
+    e.preventDefault();
+    setError("");
+
+    if (newPassword !== confirmPassword) {
+      setError("Passwords do not match");
+      return;
+    }
+
+    setLoading(true);
+    try {
+      const res = await fetch("/api/v1/auth/initialize", {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        credentials: "include",
+        body: JSON.stringify({
+          email,
+          password: newPassword,
+        }),
+      });
+
+      if (!res.ok) {
+        const data = await res.json();
+        const authError = parseAuthError(data);
+        setError(authError.message);
+        return;
+      }
+
+      router.push("/workspace");
+    } catch {
+      setError("Network error. Please try again.");
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  // ── Change-password handler ────────────────────────────────────────
+  const handleChangePassword = async (e: React.SubmitEvent) => {
+    e.preventDefault();
+    setError("");
+
+    if (newPassword !== confirmPassword) {
+      setError("Passwords do not match");
+      return;
+    }
+    if (newPassword.length < 8) {
+      setError("Password must be at least 8 characters");
+      return;
+    }
+
+    setLoading(true);
+    try {
+      const res = await fetch("/api/v1/auth/change-password", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          ...getCsrfHeaders(),
+        },
+        credentials: "include",
+        body: JSON.stringify({
+          current_password: currentPassword,
+          new_password: newPassword,
+          new_email: email || undefined,
+        }),
+      });
+
+      if (!res.ok) {
+        const data = await res.json();
+        const authError = parseAuthError(data);
+        setError(authError.message);
+        return;
+      }
+
+      router.push("/workspace");
+    } catch {
+      setError("Network error. Please try again.");
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  const actualTheme = theme === "system" ? resolvedTheme : theme;
+
+  if (mode === "loading") {
+    return (
+      <div className="flex min-h-screen items-center justify-center">
+        <p className="text-muted-foreground text-sm">Loading…</p>
+      </div>
+    );
+  }
+
+  // ── Admin initialization form ──────────────────────────────────────
+  if (mode === "init_admin") {
+    return (
+      <div className="bg-background flex min-h-screen items-center justify-center">
+        <FlickeringGrid
+          className="absolute inset-0 z-0 mask-[url(/images/deer.svg)] mask-size-[100vw] mask-center mask-no-repeat md:mask-size-[72vh]"
+          squareSize={4}
+          gridGap={4}
+          color={actualTheme === "dark" ? "white" : "black"}
+          maxOpacity={0.3}
+          flickerChance={0.25}
+        />
+        <div className="border-border/20 bg-background/5 w-full max-w-md space-y-6 rounded-3xl border p-8 backdrop-blur-sm">
+          <div className="text-center">
+            <h1 className="font-serif text-3xl">DeerFlow</h1>
+            <p className="text-muted-foreground mt-2">Create admin account</p>
+            <p className="text-muted-foreground mt-1 text-xs">
+              Set up the administrator account to get started.
+            </p>
+          </div>
+          <form onSubmit={handleInitAdmin} className="space-y-2">
+            <div className="flex flex-col space-y-1">
+              <label htmlFor="email" className="text-sm font-medium">
+                Email
+              </label>
+              <Input
+                id="email"
+                type="email"
+                placeholder="you@example.com"
+                value={email}
+                onChange={(e) => setEmail(e.target.value)}
+                required
+              />
+            </div>
+            <div className="flex flex-col space-y-1">
+              <label htmlFor="password" className="text-sm font-medium">
+                Password
+              </label>
+              <Input
+                id="password"
+                type="password"
+                placeholder="Password (min. 8 characters)"
+                value={newPassword}
+                onChange={(e) => setNewPassword(e.target.value)}
+                required
+                minLength={8}
+              />
+            </div>
+            <div className="flex flex-col space-y-1">
+              <label htmlFor="confirmPassword" className="text-sm font-medium">
+                Confirm Password
+              </label>
+              <Input
+                id="confirmPassword"
+                type="password"
+                placeholder="Confirm password"
+                value={confirmPassword}
+                onChange={(e) => setConfirmPassword(e.target.value)}
+                required
+                minLength={8}
+              />
+            </div>
+            {error && <p className="ms-1 text-sm text-red-500">{error}</p>}
+            <Button type="submit" className="w-full" disabled={loading}>
+              {loading ? "Creating account…" : "Create Admin Account"}
+            </Button>
+          </form>
+        </div>
+      </div>
+    );
+  }
+
+  // ── Change-password form (needs_setup after login) ─────────────────
+  return (
+    <div className="bg-background flex min-h-screen items-center justify-center">
+      <FlickeringGrid
+        className="absolute inset-0 z-0 mask-[url(/images/deer.svg)] mask-size-[100vw] mask-center mask-no-repeat md:mask-size-[72vh]"
+        squareSize={4}
+        gridGap={4}
+        color={actualTheme === "dark" ? "white" : "black"}
+        maxOpacity={0.3}
+        flickerChance={0.25}
+      />
+      <div className="border-border/20 bg-background/5 w-full max-w-md space-y-6 rounded-3xl border p-8 backdrop-blur-sm">
+        <div className="text-center">
+          <h1 className="font-serif text-3xl">DeerFlow</h1>
+          <p className="text-muted-foreground mt-2">
+            Complete admin account setup
+          </p>
+          <p className="text-muted-foreground mt-1 text-xs">
+            Set your real email and a new password.
+          </p>
+        </div>
+        <form onSubmit={handleChangePassword} className="space-y-4">
+          <Input
+            type="email"
+            placeholder="Your email"
+            value={email}
+            onChange={(e) => setEmail(e.target.value)}
+            required
+          />
+          <Input
+            type="password"
+            placeholder="Current password"
+            value={currentPassword}
+            onChange={(e) => setCurrentPassword(e.target.value)}
+            required
+          />
+          <Input
+            type="password"
+            placeholder="New password"
+            value={newPassword}
+            onChange={(e) => setNewPassword(e.target.value)}
+            required
+            minLength={8}
+          />
+          <Input
+            type="password"
+            placeholder="Confirm new password"
+            value={confirmPassword}
+            onChange={(e) => setConfirmPassword(e.target.value)}
+            required
+            minLength={8}
+          />
+          {error && <p className="text-sm text-red-500">{error}</p>}
+          <Button type="submit" className="w-full" disabled={loading}>
+            {loading ? "Setting up…" : "Complete Setup"}
+          </Button>
+        </form>
+      </div>
+    </div>
+  );
+}
diff --git a/frontend/src/app/[lang]/docs/layout.tsx b/frontend/src/app/[lang]/docs/layout.tsx
index f63d6ae7b..895da1da8 100644
--- a/frontend/src/app/[lang]/docs/layout.tsx
+++ b/frontend/src/app/[lang]/docs/layout.tsx
@@ -34,14 +34,14 @@ export default async function DocLayout({ children, params }) {
     <Layout
       navbar={
         <Header
-          className="relative max-w-full px-10"
+          className="sticky max-w-full px-10"
           homeURL="/"
           locale={locale}
         />
       }
       pageMap={pageMap}
       docsRepositoryBase="https://github.com/bytedance/deerflow/tree/main/frontend/src/content"
-      footer={<Footer />}
+      footer={<Footer className="mt-0" />}
       i18n={i18n}
       // ... Your additional layout options
     >
diff --git a/frontend/src/app/api/auth/[...all]/route.ts b/frontend/src/app/api/auth/[...all]/route.ts
deleted file mode 100644
index cde6018a8..000000000
--- a/frontend/src/app/api/auth/[...all]/route.ts
+++ /dev/null
@@ -1,5 +0,0 @@
-import { toNextJsHandler } from "better-auth/next-js";
-
-import { auth } from "@/server/better-auth";
-
-export const { GET, POST } = toNextJsHandler(auth.handler);
diff --git a/frontend/src/app/workspace/layout.tsx b/frontend/src/app/workspace/layout.tsx
index 4c1dd2036..c2d567339 100644
--- a/frontend/src/app/workspace/layout.tsx
+++ b/frontend/src/app/workspace/layout.tsx
@@ -1,35 +1,60 @@
-import { cookies } from "next/headers";
-import { Toaster } from "sonner";
+import Link from "next/link";
+import { redirect } from "next/navigation";
 
-import { QueryClientProvider } from "@/components/query-client-provider";
-import { SidebarInset, SidebarProvider } from "@/components/ui/sidebar";
-import { CommandPalette } from "@/components/workspace/command-palette";
-import { WorkspaceSidebar } from "@/components/workspace/workspace-sidebar";
+import { AuthProvider } from "@/core/auth/AuthProvider";
+import { getServerSideUser } from "@/core/auth/server";
+import { assertNever } from "@/core/auth/types";
 
-function parseSidebarOpenCookie(
-  value: string | undefined,
-): boolean | undefined {
-  if (value === "true") return true;
-  if (value === "false") return false;
-  return undefined;
-}
+import { WorkspaceContent } from "./workspace-content";
+
+export const dynamic = "force-dynamic";
 
 export default async function WorkspaceLayout({
   children,
 }: Readonly<{ children: React.ReactNode }>) {
-  const cookieStore = await cookies();
-  const initialSidebarOpen = parseSidebarOpenCookie(
-    cookieStore.get("sidebar_state")?.value,
-  );
+  const result = await getServerSideUser();
 
-  return (
-    <QueryClientProvider>
-      <SidebarProvider className="h-screen" defaultOpen={initialSidebarOpen}>
-        <WorkspaceSidebar />
-        <SidebarInset className="min-w-0">{children}</SidebarInset>
-      </SidebarProvider>
-      <CommandPalette />
-      <Toaster position="top-center" />
-    </QueryClientProvider>
-  );
+  switch (result.tag) {
+    case "authenticated":
+      return (
+        <AuthProvider initialUser={result.user}>
+          <WorkspaceContent>{children}</WorkspaceContent>
+        </AuthProvider>
+      );
+    case "needs_setup":
+      redirect("/setup");
+    case "system_setup_required":
+      redirect("/setup");
+    case "unauthenticated":
+      redirect("/login");
+    case "gateway_unavailable":
+      return (
+        <div className="flex h-screen flex-col items-center justify-center gap-4">
+          <p className="text-muted-foreground">
+            Service temporarily unavailable.
+          </p>
+          <p className="text-muted-foreground text-xs">
+            The backend may be restarting. Please wait a moment and try again.
+          </p>
+          <div className="flex gap-3">
+            <Link
+              href="/workspace"
+              className="bg-primary text-primary-foreground hover:bg-primary/90 rounded-md px-4 py-2 text-sm"
+            >
+              Retry
+            </Link>
+            <Link
+              href="/api/v1/auth/logout"
+              className="text-muted-foreground hover:bg-muted rounded-md border px-4 py-2 text-sm"
+            >
+              Logout &amp; Reset
+            </Link>
+          </div>
+        </div>
+      );
+    case "config_error":
+      throw new Error(result.message);
+    default:
+      assertNever(result);
+  }
 }
diff --git a/frontend/src/app/workspace/workspace-content.tsx b/frontend/src/app/workspace/workspace-content.tsx
new file mode 100644
index 000000000..85c20b2ca
--- /dev/null
+++ b/frontend/src/app/workspace/workspace-content.tsx
@@ -0,0 +1,35 @@
+import { cookies } from "next/headers";
+import { Toaster } from "sonner";
+
+import { QueryClientProvider } from "@/components/query-client-provider";
+import { SidebarInset, SidebarProvider } from "@/components/ui/sidebar";
+import { CommandPalette } from "@/components/workspace/command-palette";
+import { WorkspaceSidebar } from "@/components/workspace/workspace-sidebar";
+
+function parseSidebarOpenCookie(
+  value: string | undefined,
+): boolean | undefined {
+  if (value === "true") return true;
+  if (value === "false") return false;
+  return undefined;
+}
+
+export async function WorkspaceContent({
+  children,
+}: Readonly<{ children: React.ReactNode }>) {
+  const cookieStore = await cookies();
+  const initialSidebarOpen = parseSidebarOpenCookie(
+    cookieStore.get("sidebar_state")?.value,
+  );
+
+  return (
+    <QueryClientProvider>
+      <SidebarProvider className="h-screen" defaultOpen={initialSidebarOpen}>
+        <WorkspaceSidebar />
+        <SidebarInset className="min-w-0">{children}</SidebarInset>
+      </SidebarProvider>
+      <CommandPalette />
+      <Toaster position="top-center" />
+    </QueryClientProvider>
+  );
+}
diff --git a/frontend/src/components/landing/footer.tsx b/frontend/src/components/landing/footer.tsx
index 4176fb353..0c86be205 100644
--- a/frontend/src/components/landing/footer.tsx
+++ b/frontend/src/components/landing/footer.tsx
@@ -1,9 +1,20 @@
 import { useMemo } from "react";
 
-export function Footer() {
+import { cn } from "@/lib/utils";
+
+export type FooterProps = {
+  className?: string;
+};
+
+export function Footer({ className }: FooterProps) {
   const year = useMemo(() => new Date().getFullYear(), []);
   return (
-    <footer className="container-md mx-auto mt-32 flex flex-col items-center justify-center">
+    <footer
+      className={cn(
+        "container-md mx-auto mt-32 flex flex-col items-center justify-center",
+        className,
+      )}
+    >
       <hr className="from-border/0 to-border/0 m-0 h-px w-full border-none bg-linear-to-r via-white/20" />
       <div className="text-muted-foreground container flex h-20 flex-col items-center justify-center text-sm">
         <p className="text-center font-serif text-lg md:text-xl">
diff --git a/frontend/src/components/workspace/input-box.tsx b/frontend/src/components/workspace/input-box.tsx
index ddc682744..83bd75952 100644
--- a/frontend/src/components/workspace/input-box.tsx
+++ b/frontend/src/components/workspace/input-box.tsx
@@ -55,6 +55,7 @@ import {
   DropdownMenuLabel,
   DropdownMenuSeparator,
 } from "@/components/ui/dropdown-menu";
+import { fetchWithAuth } from "@/core/api/fetcher";
 import { getBackendBaseURL } from "@/core/config";
 import { useI18n } from "@/core/i18n/hooks";
 import { useModels } from "@/core/models/hooks";
@@ -395,16 +396,19 @@ export function InputBox({
     setFollowupsLoading(true);
     setFollowups([]);
 
-    fetch(`${getBackendBaseURL()}/api/threads/${threadId}/suggestions`, {
-      method: "POST",
-      headers: { "Content-Type": "application/json" },
-      body: JSON.stringify({
-        messages: recent,
-        n: 3,
-        model_name: context.model_name ?? undefined,
-      }),
-      signal: controller.signal,
-    })
+    fetchWithAuth(
+      `${getBackendBaseURL()}/api/threads/${threadId}/suggestions`,
+      {
+        method: "POST",
+        headers: { "Content-Type": "application/json" },
+        body: JSON.stringify({
+          messages: recent,
+          n: 3,
+          model_name: context.model_name ?? undefined,
+        }),
+        signal: controller.signal,
+      },
+    )
       .then(async (res) => {
         if (!res.ok) {
           return { suggestions: [] as string[] };
diff --git a/frontend/src/components/workspace/settings/account-settings-page.tsx b/frontend/src/components/workspace/settings/account-settings-page.tsx
new file mode 100644
index 000000000..c00d6961e
--- /dev/null
+++ b/frontend/src/components/workspace/settings/account-settings-page.tsx
@@ -0,0 +1,133 @@
+"use client";
+
+import { LogOutIcon } from "lucide-react";
+import { useState } from "react";
+
+import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
+import { fetchWithAuth, getCsrfHeaders } from "@/core/api/fetcher";
+import { useAuth } from "@/core/auth/AuthProvider";
+import { parseAuthError } from "@/core/auth/types";
+
+import { SettingsSection } from "./settings-section";
+
+export function AccountSettingsPage() {
+  const { user, logout } = useAuth();
+  const [currentPassword, setCurrentPassword] = useState("");
+  const [newPassword, setNewPassword] = useState("");
+  const [confirmPassword, setConfirmPassword] = useState("");
+  const [message, setMessage] = useState("");
+  const [error, setError] = useState("");
+  const [loading, setLoading] = useState(false);
+
+  const handleChangePassword = async (e: React.FormEvent) => {
+    e.preventDefault();
+    setError("");
+    setMessage("");
+
+    if (newPassword !== confirmPassword) {
+      setError("New passwords do not match");
+      return;
+    }
+    if (newPassword.length < 8) {
+      setError("Password must be at least 8 characters");
+      return;
+    }
+
+    setLoading(true);
+    try {
+      const res = await fetchWithAuth("/api/v1/auth/change-password", {
+        method: "POST",
+        headers: {
+          "Content-Type": "application/json",
+          ...getCsrfHeaders(),
+        },
+        body: JSON.stringify({
+          current_password: currentPassword,
+          new_password: newPassword,
+        }),
+      });
+
+      if (!res.ok) {
+        const data = await res.json();
+        const authError = parseAuthError(data);
+        setError(authError.message);
+        return;
+      }
+
+      setMessage("Password changed successfully");
+      setCurrentPassword("");
+      setNewPassword("");
+      setConfirmPassword("");
+    } catch {
+      setError("Network error. Please try again.");
+    } finally {
+      setLoading(false);
+    }
+  };
+
+  return (
+    <div className="space-y-8">
+      <SettingsSection title="Profile">
+        <div className="space-y-2">
+          <div className="grid grid-cols-[max-content_max-content] items-center gap-4">
+            <span className="text-muted-foreground text-sm">Email</span>
+            <span className="text-sm font-medium">{user?.email ?? "—"}</span>
+            <span className="text-muted-foreground text-sm">Role</span>
+            <span className="text-sm font-medium capitalize">
+              {user?.system_role ?? "—"}
+            </span>
+          </div>
+        </div>
+      </SettingsSection>
+
+      <SettingsSection
+        title="Change Password"
+        description="Update your account password."
+      >
+        <form onSubmit={handleChangePassword} className="max-w-sm space-y-3">
+          <Input
+            type="password"
+            placeholder="Current password"
+            value={currentPassword}
+            onChange={(e) => setCurrentPassword(e.target.value)}
+            required
+          />
+          <Input
+            type="password"
+            placeholder="New password"
+            value={newPassword}
+            onChange={(e) => setNewPassword(e.target.value)}
+            required
+            minLength={8}
+          />
+          <Input
+            type="password"
+            placeholder="Confirm new password"
+            value={confirmPassword}
+            onChange={(e) => setConfirmPassword(e.target.value)}
+            required
+            minLength={8}
+          />
+          {error && <p className="text-sm text-red-500">{error}</p>}
+          {message && <p className="text-sm text-green-500">{message}</p>}
+          <Button type="submit" variant="outline" size="sm" disabled={loading}>
+            {loading ? "Updating..." : "Update Password"}
+          </Button>
+        </form>
+      </SettingsSection>
+
+      <SettingsSection title="" description="">
+        <Button
+          variant="destructive"
+          size="sm"
+          onClick={logout}
+          className="gap-2"
+        >
+          <LogOutIcon className="size-4" />
+          Sign Out
+        </Button>
+      </SettingsSection>
+    </div>
+  );
+}
diff --git a/frontend/src/content/en/_meta.ts b/frontend/src/content/en/_meta.ts
index 49b33a784..2451ce3be 100644
--- a/frontend/src/content/en/_meta.ts
+++ b/frontend/src/content/en/_meta.ts
@@ -25,6 +25,15 @@ const meta: MetaRecord = {
   blog: {
     type: "page",
   },
+  posts: {
+    type: "page",
+  },
+  login: {
+    type: "page",
+  },
+  setup: {
+    type: "page",
+  },
 };
 
 export default meta;
diff --git a/frontend/src/content/en/application/agents-and-threads.mdx b/frontend/src/content/en/application/agents-and-threads.mdx
index 9ad5eb851..bbf3cfc7e 100644
--- a/frontend/src/content/en/application/agents-and-threads.mdx
+++ b/frontend/src/content/en/application/agents-and-threads.mdx
@@ -1,3 +1,141 @@
+---
+title: Agents and Threads
+description: DeerFlow App supports multiple named agents and maintains conversation state across sessions through threads and checkpointing.
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
 # Agents and Threads
 
-TBD
+DeerFlow App supports multiple named agents and maintains conversation state across sessions through threads and checkpointing.
+
+## Agents
+
+### The default agent
+
+The default agent is the Lead Agent with no custom configuration. It loads all globally enabled skills, has access to all configured tools, and uses the first model in `config.yaml` as its default.
+
+### Custom agents
+
+Custom agents are named variants of the Lead Agent. Each one can have:
+
+- a **display name** and an auto-derived ASCII slug (the `name` used internally)
+- a specific **model** to use by default
+- a restricted set of **skills** (or all globally enabled skills if unspecified)
+- a restricted set of **tool groups**
+- a custom **system prompt** or agent-specific instructions
+
+Custom agents are created and managed through:
+
+- **The App UI**: open the Agents section in the settings panel.
+- **The Gateway API**: `POST /api/agents` with the agent definition.
+
+The slug (`name`) is automatically derived from the `display_name` and must be unique. The system checks for conflicts and appends a suffix if needed (`/api/agents/check`).
+
+Agent configuration is stored in `agents/{name}/config.yaml` relative to the backend directory.
+
+### Restricting agent capabilities
+
+To restrict a custom agent to specific skills:
+
+```yaml
+# agents/my-researcher/config.yaml
+name: my-researcher
+display_name: My Researcher
+skills:
+  - deep-research
+  - academic-paper-review
+# Omit skills key entirely to inherit all globally enabled skills
+# Set skills: [] to disable all skills for this agent
+```
+
+To restrict to specific tool groups:
+
+```yaml
+tool_groups:
+  - research # only tools in the 'research' group are available
+```
+
+## Threads
+
+A **thread** is a persistent conversation session. Each thread has:
+
+- a unique thread ID
+- a message history
+- accumulated artifacts (output files)
+- a title (auto-generated after the first exchange)
+- a reference to the agent used
+
+Threads are listed in the sidebar. Clicking a thread resumes the conversation from where it left off.
+
+### Thread lifecycle
+
+<Steps>
+
+#### Create
+
+A new thread is created when you start a conversation. The thread ID is generated and the initial configuration (model, agent, skills) is recorded.
+
+#### Run
+
+Each user message triggers an agent run. The run streams tokens and tool calls back to the browser in real time. The thread state (messages, artifacts) is updated after each turn.
+
+#### Checkpoint
+
+If a checkpointer is configured, the thread state is persisted after each turn. This means the conversation survives process restarts.
+
+#### Resume
+
+Opening a thread from the sidebar loads its full history from the checkpointer. The agent picks up from where it left off.
+
+</Steps>
+
+### Checkpointer configuration
+
+The checkpointer controls how thread state is persisted:
+
+```yaml
+# In-memory (default if omitted — state lost on restart)
+# checkpointer:
+#   type: memory
+
+# SQLite (survives restarts, single-process)
+checkpointer:
+  type: sqlite
+  connection_string: checkpoints.db
+
+# PostgreSQL (multi-process, production)
+# checkpointer:
+#   type: postgres
+#   connection_string: postgresql://user:password@localhost:5432/deerflow
+```
+
+<Callout type="info">
+  The LangGraph Server manages its own state separately. The
+  <code>checkpointer</code> setting in <code>config.yaml</code> applies to the
+  embedded <code>DeerFlowClient</code> (used in direct Python integrations), not
+  to the LangGraph Server deployment used by DeerFlow App.
+</Callout>
+
+### Thread data storage
+
+Working files produced during a thread (uploads, intermediate files, output artifacts) are stored under:
+
+```
+backend/.deer-flow/threads/{thread_id}/user-data/
+```
+
+This directory is mounted into the sandbox as `/mnt/user-data/` for the agent to read and write.
+
+In Docker deployments, this path is bind-mounted from the host so that thread data persists across container restarts. Set `DEER_FLOW_ROOT` to the absolute path of your deer-flow repository root to ensure the correct host path is used.
+
+<Cards num={2}>
+  <Cards.Card
+    title="Workspace Usage"
+    href="/docs/application/workspace-usage"
+  />
+  <Cards.Card
+    title="Operations & Troubleshooting"
+    href="/docs/application/operations-and-troubleshooting"
+  />
+</Cards>
diff --git a/frontend/src/content/en/application/configuration.mdx b/frontend/src/content/en/application/configuration.mdx
index 79de38b76..fb0fedc89 100644
--- a/frontend/src/content/en/application/configuration.mdx
+++ b/frontend/src/content/en/application/configuration.mdx
@@ -1,3 +1,273 @@
+---
+title: Configuration
+description: DeerFlow App is configured through two files and a set of environment variables. This page covers the application-level configuration that most operators need to set up before deploying.
+---
+
+import { Callout, Cards, Tabs } from "nextra/components";
+
 # Configuration
 
-TBD
+DeerFlow App is configured through two files and a set of environment variables. This page covers the application-level configuration that most operators need to set up before deploying.
+
+## Configuration files
+
+| File | Purpose |
+|---|---|
+| `config.yaml` | Backend configuration: models, sandbox, tools, skills, memory, and all Harness settings |
+| `extensions_config.json` | MCP servers and skill enable/disable state (managed by the App UI and Gateway API) |
+
+Frontend environment variables control the Next.js build and runtime behavior.
+
+## config.yaml
+
+Start by copying the example:
+
+```bash
+cp config.example.yaml config.yaml
+```
+
+The most important sections for application configuration are:
+
+### Models
+
+Configure the LLM providers the agent can use. At least one model is required.
+
+<Tabs items={["OpenAI", "Anthropic", "DeepSeek", "Ollama", "Gemini"]}>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: claude-3-5-sonnet
+    use: langchain_anthropic:ChatAnthropic
+    model: claude-3-5-sonnet-20241022
+    api_key: $ANTHROPIC_API_KEY
+    default_request_timeout: 600.0
+    max_retries: 2
+    max_tokens: 8192
+    supports_vision: true
+    supports_thinking: true
+    when_thinking_enabled:
+      thinking:
+        type: enabled
+    when_thinking_disabled:
+      thinking:
+        type: disabled
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: deepseek-v3
+    use: deerflow.models.patched_deepseek:PatchedChatDeepSeek
+    model: deepseek-reasoner
+    api_key: $DEEPSEEK_API_KEY
+    timeout: 600.0
+    max_retries: 2
+    supports_thinking: true
+    when_thinking_enabled:
+      extra_body:
+        thinking:
+          type: enabled
+    when_thinking_disabled:
+      extra_body:
+        thinking:
+          type: disabled
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: qwen3-local
+    use: langchain_ollama:ChatOllama
+    model: qwen3:32b
+    base_url: http://localhost:11434  # No /v1 suffix — uses native Ollama API
+    num_predict: 8192
+    temperature: 0.7
+    reasoning: true
+    supports_thinking: true
+    supports_vision: false
+```
+
+Install Ollama provider: `cd backend && uv add 'deerflow-harness[ollama]'`
+
+<Callout type="warning">
+  Use <code>langchain_ollama:ChatOllama</code> (not the OpenAI-compatible
+  endpoint) for Ollama models. The native API correctly separates thinking
+  content; the OpenAI-compatible endpoint may flatten or drop it.
+</Callout>
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: gemini-2.5-pro
+    use: langchain_google_genai:ChatGoogleGenerativeAI
+    model: gemini-2.5-pro
+    gemini_api_key: $GEMINI_API_KEY
+    timeout: 600.0
+    max_retries: 2
+    max_tokens: 8192
+    supports_vision: true
+```
+  </Tabs.Tab>
+</Tabs>
+
+### Sandbox
+
+Choose the execution environment for agent file and command operations:
+
+<Tabs items={["Local (default)", "Docker container", "K8s Provisioner"]}>
+  <Tabs.Tab>
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+  allow_host_bash: false  # set true only for trusted single-user workflows
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+  image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+  replicas: 3
+  idle_timeout: 600
+```
+
+Install: `cd backend && uv add 'deerflow-harness[aio-sandbox]'`
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+  provisioner_url: http://provisioner:8002
+```
+  </Tabs.Tab>
+</Tabs>
+
+### Tools
+
+Configure which tools the agent has access to. The defaults use DuckDuckGo (no API key) and Jina AI for web operations:
+
+```yaml
+tools:
+  # Web search (choose one)
+  - use: deerflow.community.ddg_search.tools:web_search_tool  # default, no key required
+  # - use: deerflow.community.tavily.tools:web_search_tool
+  #   api_key: $TAVILY_API_KEY
+
+  # Web fetch (choose one)
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+
+  # Image search
+  - use: deerflow.community.image_search.tools:image_search_tool
+
+  # File operations
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:glob_tool
+  - use: deerflow.sandbox.tools:grep_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:str_replace_tool
+  - use: deerflow.sandbox.tools:bash_tool
+```
+
+### Thread state persistence (checkpointer)
+
+By default, DeerFlow uses an SQLite checkpointer for thread state persistence:
+
+```yaml
+checkpointer:
+  type: sqlite
+  connection_string: checkpoints.db  # stored in backend/.deer-flow/
+```
+
+For production deployments with multiple processes:
+
+```yaml
+checkpointer:
+  type: postgres
+  connection_string: postgresql://user:password@localhost:5432/deerflow
+```
+
+Install PostgreSQL support: `cd backend && uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool`
+
+For in-memory only (state lost on restart):
+
+```yaml
+checkpointer:
+  type: memory
+```
+
+### Memory
+
+```yaml
+memory:
+  enabled: true
+  storage_path: memory.json
+  debounce_seconds: 30
+  max_facts: 100
+  injection_enabled: true
+  max_injection_tokens: 2000
+```
+
+## Frontend environment variables
+
+Set these before running `pnpm build` or starting the frontend in production:
+
+| Variable | Required | Description |
+|---|---|---|
+| `BETTER_AUTH_SECRET` | **Required** in production | Secret for session signing. Use `openssl rand -base64 32`. |
+| `BETTER_AUTH_URL` | Recommended | Public-facing base URL (e.g., `https://your-domain.com`) |
+| `SKIP_ENV_VALIDATION` | Optional | Set to `1` to skip env validation during build (not recommended) |
+| `NEXT_PUBLIC_API_URL` | Optional | Override the API base URL for the frontend |
+
+In development, set these in a `.env` file at the repo root:
+
+```bash
+BETTER_AUTH_SECRET=your-strong-secret-here-min-32-chars
+```
+
+## extensions_config.json
+
+This file manages MCP server connections and skill enable/disable state. It is created automatically when you first manage extensions through the App UI or Gateway API.
+
+Manual example:
+
+```json
+{
+  "mcpServers": {
+    "my-server": {
+      "command": "npx",
+      "args": ["-y", "@my-org/my-mcp-server"],
+      "enabled": true
+    }
+  },
+  "skills": {
+    "deep-research": { "enabled": true },
+    "data-analysis": { "enabled": true }
+  }
+}
+```
+
+## Config upgrade
+
+When the config schema changes, `config_version` is bumped. To merge new fields into your existing config without losing customizations:
+
+```bash
+make config-upgrade
+```
+
+<Cards num={2}>
+  <Cards.Card title="Deployment Guide" href="/docs/application/deployment-guide" />
+  <Cards.Card title="Harness Configuration" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/application/deployment-guide.mdx b/frontend/src/content/en/application/deployment-guide.mdx
index 05fe72289..8d5542dbb 100644
--- a/frontend/src/content/en/application/deployment-guide.mdx
+++ b/frontend/src/content/en/application/deployment-guide.mdx
@@ -1,3 +1,217 @@
+---
+title: Deployment Guide 
+description: "This guide covers all supported deployment methods for DeerFlow App: local development, Docker Compose, and production with Kubernetes-managed sandboxes."
+---
+
+import { Callout, Cards, Steps, Tabs } from "nextra/components";
+
 # Deployment Guide
 
-TBD
+This guide covers all supported deployment methods for DeerFlow App: local development, Docker Compose, and production with Kubernetes-managed sandboxes.
+
+## Local development deployment
+
+The local workflow is the fastest way to run DeerFlow. All services run as native processes on your machine.
+
+<Tabs items={["Start", "Stop", "Logs"]}>
+  <Tabs.Tab>
+```bash
+make dev
+```
+
+Services started:
+
+| Service     | Port | Description              |
+| ----------- | ---- | ------------------------ |
+| LangGraph   | 2024 | DeerFlow Harness runtime |
+| Gateway API | 8001 | FastAPI backend          |
+| Frontend    | 3000 | Next.js UI               |
+| nginx       | 2026 | Unified reverse proxy    |
+
+Access the app at **http://localhost:2026**.
+
+  </Tabs.Tab>
+  <Tabs.Tab>
+```bash
+make stop
+```
+
+Stops all four services. Safe to run even if a service is not running.
+
+  </Tabs.Tab>
+  <Tabs.Tab>
+```
+logs/langgraph.log   # Agent runtime logs
+logs/gateway.log     # API gateway logs
+logs/frontend.log    # Next.js dev server logs
+logs/nginx.log       # nginx access/error logs
+```
+
+Tail a log in real time:
+
+```bash
+tail -f logs/langgraph.log
+```
+
+  </Tabs.Tab>
+</Tabs>
+
+## Docker Compose deployment
+
+Docker Compose runs all services in containers. Use this for a more production-like local setup or for team environments.
+
+### Prerequisites
+
+- Docker (or Docker Desktop / OrbStack on macOS)
+- A configured `config.yaml` at the repo root
+
+### Development compose
+
+```bash
+# Set the absolute path to your deer-flow repo root
+export DEER_FLOW_ROOT=/path/to/deer-flow
+
+docker compose -f docker/docker-compose-dev.yaml up --build
+```
+
+Services: nginx, frontend, gateway, langgraph, and optionally provisioner (for K8s-managed sandboxes).
+
+Access the app at **http://localhost:2026**.
+
+### Environment variables
+
+Create a `.env` file in the repo root for secrets and runtime configuration:
+
+```bash
+# .env
+OPENAI_API_KEY=sk-...
+DEER_FLOW_ROOT=/absolute/path/to/deer-flow
+BETTER_AUTH_SECRET=your-secret-here-min-32-chars
+```
+
+The `docker-compose*.yaml` files include an `env_file: ../.env` directive that loads this automatically.
+
+<Callout type="warning">
+  Always set <code>BETTER_AUTH_SECRET</code> to a strong random string before
+  deploying. Without it, the frontend build uses a default that is publicly
+  known.
+</Callout>
+
+### Data persistence
+
+Thread data is stored in `backend/.deer-flow/threads/`. In Docker deployments, this directory is bind-mounted into the langgraph container.
+
+To avoid data loss when containers are recreated:
+
+1. Set `DEER_FLOW_ROOT` to the absolute repo root path (or a stable host path).
+2. Verify the `threads/` and `skills/` directories are mounted correctly.
+
+For production, use a named volume or a Persistent Volume Claim (PVC) instead of a host bind-mount.
+
+## Production deployment considerations
+
+### Sandbox mode selection
+
+| Sandbox                                | Use case                                   |
+| -------------------------------------- | ------------------------------------------ |
+| `LocalSandboxProvider`                 | Single-user, trusted local workflows       |
+| `AioSandboxProvider` (Docker)          | Multi-user, moderate isolation requirement |
+| `AioSandboxProvider` + K8s Provisioner | Production, strong isolation, multi-user   |
+
+For any deployment with more than one concurrent user, use a container-based sandbox to prevent users from interfering with each other's execution environments.
+
+### K8s Provisioner setup
+
+The provisioner manages sandbox Pods in a Kubernetes cluster. It is included in `docker/docker-compose-dev.yaml`.
+
+<Steps>
+
+#### Configure the provisioner
+
+Set required environment variables in your `.env` or compose override:
+
+```bash
+K8S_NAMESPACE=deer-flow
+SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+DEER_FLOW_ROOT=/absolute/path/to/deer-flow
+```
+
+#### Configure the sandbox provider
+
+```yaml
+# config.yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+  provisioner_url: http://provisioner:8002
+```
+
+#### Configure data persistence
+
+For production, use PVCs instead of hostPath volumes:
+
+```bash
+# In .env or compose environment
+USERDATA_PVC_NAME=deer-flow-userdata-pvc
+SKILLS_PVC_NAME=deer-flow-skills-pvc
+```
+
+When `USERDATA_PVC_NAME` is set, the provisioner automatically uses subPath (`threads/{thread_id}/user-data`) so each thread gets its own directory in the PVC.
+
+</Steps>
+
+### nginx configuration
+
+nginx routes all traffic. Key environment variables that control routing:
+
+| Variable             | Default          | Description                             |
+| -------------------- | ---------------- | --------------------------------------- |
+| `LANGGRAPH_UPSTREAM` | `langgraph:2024` | LangGraph service address               |
+| `LANGGRAPH_REWRITE`  | `/`              | URL rewrite prefix for LangGraph routes |
+
+These are set in the Docker Compose environment and processed by `envsubst` at container startup.
+
+### Authentication
+
+DeerFlow App uses [Better Auth](https://www.better-auth.com/) for session management. In production:
+
+1. Set `BETTER_AUTH_SECRET` to a strong random string (minimum 32 characters).
+2. Set `BETTER_AUTH_URL` to your public-facing URL (e.g., `https://your-domain.com`).
+
+```bash
+# Generate a secret
+openssl rand -base64 32
+```
+
+### Resource recommendations
+
+| Service                         | Minimum          | Recommended      |
+| ------------------------------- | ---------------- | ---------------- |
+| LangGraph (agent runtime)       | 2 vCPU, 4 GB RAM | 4 vCPU, 8 GB RAM |
+| Gateway                         | 0.5 vCPU, 512 MB | 1 vCPU, 1 GB     |
+| Frontend                        | 0.5 vCPU, 512 MB | 1 vCPU, 1 GB     |
+| Sandbox container (per session) | 1 vCPU, 1 GB     | 2 vCPU, 2 GB     |
+
+## Deployment verification
+
+After starting, verify the deployment:
+
+```bash
+# Check Gateway health
+curl http://localhost:8001/health
+
+# Check LangGraph health
+curl http://localhost:2024/ok
+
+# List configured models (through nginx)
+curl http://localhost:2026/api/models
+```
+
+A working deployment returns a `200` response from each endpoint. The `/api/models` call returns the list of models from your `config.yaml`.
+
+<Cards num={2}>
+  <Cards.Card title="Configuration" href="/docs/application/configuration" />
+  <Cards.Card
+    title="Operations & Troubleshooting"
+    href="/docs/application/operations-and-troubleshooting"
+  />
+</Cards>
diff --git a/frontend/src/content/en/application/index.mdx b/frontend/src/content/en/application/index.mdx
index a951ea81b..f61f18775 100644
--- a/frontend/src/content/en/application/index.mdx
+++ b/frontend/src/content/en/application/index.mdx
@@ -1,3 +1,72 @@
+---
+title: DeerFlow App
+description: DeerFlow App is the reference implementation of what a production DeerFlow experience looks like. It assembles the Harness runtime, a web-based conversation workspace, an API gateway, and a reverse proxy into a single deployable system.
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # DeerFlow App
 
-TBD
+<Callout type="info" emoji="🚀">
+  DeerFlow App is a complete Super Agent application built on top of DeerFlow
+  Harness. It packages the runtime capabilities into a ready-to-deploy product
+  with a web UI, API gateway, and operational tooling.
+</Callout>
+
+DeerFlow App is the reference implementation of what a production DeerFlow experience looks like. It assembles the Harness runtime, a web-based conversation workspace, an API gateway, and a reverse proxy into a single deployable system.
+
+## What the App provides
+
+| Capability | Description |
+|---|---|
+| **Web workspace** | Browser-based conversation UI with support for threads, artifacts, file uploads, and skill selection |
+| **Custom agents** | Create and manage named agents with different models, skills, and tool sets |
+| **Thread management** | Persistent conversation threads with checkpointing and history |
+| **Streaming responses** | Real-time token streaming with thinking steps and tool call visibility |
+| **Artifact viewer** | In-browser preview and download of files and outputs produced by the agent |
+| **Extensions UI** | Enable/disable MCP servers and skills without editing config files |
+| **Gateway API** | FastAPI-based REST API that bridges the frontend and the LangGraph runtime |
+
+## Architecture
+
+The DeerFlow App runs as four services behind a single nginx reverse proxy:
+
+```
+                ┌──────────────────┐
+     Browser → │  nginx :2026      │
+                └──────────────────┘
+                  │          │
+         ┌────────┘          └────────┐
+         ▼                            ▼
+┌──────────────────┐       ┌──────────────────────┐
+│  Frontend :3000  │       │  Gateway API :8001    │
+│  (Next.js)       │       │  (FastAPI)            │
+└──────────────────┘       └──────────────────────┘
+                                      │
+                            ┌─────────┘
+                            ▼
+                  ┌──────────────────────┐
+                  │  LangGraph :2024     │
+                  │  (DeerFlow Harness)  │
+                  └──────────────────────┘
+```
+
+- **nginx**: routes requests — `/api/*` to the Gateway, LangGraph streaming endpoints to LangGraph directly, and everything else to the frontend.
+- **Frontend** (Next.js + React): the browser UI. Communicates with both the Gateway and LangGraph.
+- **Gateway** (FastAPI): handles API operations — model listing, agent CRUD, memory, extensions management, file uploads.
+- **LangGraph**: the DeerFlow Harness runtime. Manages thread state, agent execution, and streaming.
+
+## Technology stack
+
+| Layer | Technology |
+|---|---|
+| Frontend | Next.js 16, React 19, TypeScript, pnpm |
+| Gateway | FastAPI, Python 3.12, uvicorn |
+| Agent runtime | LangGraph, LangChain, DeerFlow Harness |
+| Reverse proxy | nginx |
+| State persistence | LangGraph Server (default) + optional SQLite/PostgreSQL checkpointer |
+
+<Cards num={2}>
+  <Cards.Card title="Quick Start" href="/docs/application/quick-start" />
+  <Cards.Card title="Deployment Guide" href="/docs/application/deployment-guide" />
+</Cards>
diff --git a/frontend/src/content/en/application/operations-and-troubleshooting.mdx b/frontend/src/content/en/application/operations-and-troubleshooting.mdx
index 6bf022503..e12f4d4f1 100644
--- a/frontend/src/content/en/application/operations-and-troubleshooting.mdx
+++ b/frontend/src/content/en/application/operations-and-troubleshooting.mdx
@@ -1,3 +1,176 @@
+---
+title: Operations and Troubleshooting
+description: This page covers day-to-day operational tasks and solutions to common problems when running DeerFlow App.
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # Operations and Troubleshooting
 
-TBD
+This page covers day-to-day operational tasks and solutions to common problems when running DeerFlow App.
+
+## Log files
+
+All services write logs to the `logs/` directory when started with `make dev`:
+
+| File | Service |
+|---|---|
+| `logs/langgraph.log` | LangGraph / DeerFlow Harness runtime |
+| `logs/gateway.log` | FastAPI Gateway API |
+| `logs/frontend.log` | Next.js frontend dev server |
+| `logs/nginx.log` | nginx reverse proxy |
+
+Tail logs in real time:
+
+```bash
+tail -f logs/langgraph.log
+tail -f logs/gateway.log
+```
+
+Adjust the runtime log level in `config.yaml`:
+
+```yaml
+log_level: debug  # debug | info | warning | error
+```
+
+## Health checks
+
+Verify each service is responding:
+
+```bash
+# Gateway health
+curl http://localhost:8001/health
+
+# LangGraph health
+curl http://localhost:2024/ok
+
+# Through nginx (verifies full proxy chain)
+curl http://localhost:2026/api/models
+```
+
+## Config upgrade
+
+When you pull a new version of DeerFlow, the `config_version` in `config.example.yaml` may be higher than your `config.yaml`. To merge new fields without losing your customizations:
+
+```bash
+make config-upgrade
+```
+
+Check the current version in your config:
+
+```bash
+grep config_version config.yaml
+```
+
+## Common problems
+
+### The app loads but the agent doesn't respond
+
+1. Check `logs/langgraph.log` for startup errors.
+2. Verify your model is correctly configured in `config.yaml` with a valid API key.
+3. Confirm the API key environment variable is set in the shell that ran `make dev`.
+4. Test the model endpoint directly with `curl` to rule out network issues.
+
+---
+
+### Model API errors in the agent response
+
+The agent reports an error like `"No chat models are configured"` or `"model not found"`:
+
+- Check that the `models:` section in `config.yaml` has at least one entry.
+- Verify the `name:` field matches what you are requesting in the UI.
+- Check that `api_key:` is referencing the correct environment variable and that the variable is set.
+
+---
+
+### Frontend build fails with `BETTER_AUTH_SECRET`
+
+```
+Error: BETTER_AUTH_SECRET is required
+```
+
+Set the environment variable before building:
+
+```bash
+export BETTER_AUTH_SECRET=$(openssl rand -base64 32)
+pnpm build
+```
+
+Or set it in your `.env` file at the repo root.
+
+---
+
+### Sandbox-related tool failures
+
+If file tools (`read_file`, `ls`, `bash`) fail with permission or path errors:
+
+1. For `LocalSandboxProvider`: check that `allow_host_bash` is set correctly and that the agent has read/write access to the thread data directory.
+2. For container sandboxes: verify Docker (or Apple Container) is running and the sandbox image is accessible.
+3. For K8s Provisioner: check that the provisioner service is healthy (`curl http://localhost:8002/health`) and that the K8s cluster is reachable.
+
+---
+
+### K8s Provisioner not connecting
+
+```
+Connection refused: http://provisioner:8002
+```
+
+- Ensure the provisioner service is running: `docker compose ps`.
+- Check that `K8S_API_SERVER` is set correctly (should point to the K8s API from inside the container, not `localhost`).
+- Verify `~/.kube/config` is mounted and the cluster is reachable from the container host.
+
+---
+
+### MCP tools not loading
+
+If MCP tools appear in `extensions_config.json` but are not available in the agent:
+
+1. Check `logs/langgraph.log` for MCP initialization errors.
+2. Verify the MCP server command is installed (`npx`, `uvx`, or the relevant binary).
+3. Test the server command manually to confirm it starts without errors.
+4. Set `log_level: debug` to see detailed MCP loading output.
+
+---
+
+### Memory not persisting across sessions
+
+- Verify `memory.enabled: true` in `config.yaml`.
+- Check that the storage path is writable: `ls -la backend/.deer-flow/`.
+- Look for memory update errors in `logs/langgraph.log` (search for "memory").
+
+## Data backup
+
+Thread data and memory are stored under `backend/.deer-flow/`:
+
+```
+backend/.deer-flow/
+  memory.json           # global agent memory
+  agents/               # per-agent memory
+  threads/              # thread working directories
+    {thread_id}/
+      user-data/
+        uploads/
+        outputs/
+  checkpoints.db        # SQLite checkpoints (if configured)
+```
+
+Back up this entire directory to preserve conversation history, artifacts, and learned memory.
+
+In Docker deployments, the bind-mounted host path (`$DEER_FLOW_ROOT/backend/.deer-flow/`) is the source of truth — back up the host path.
+
+## Restarting services
+
+To restart a single service (local deployment):
+
+```bash
+make stop
+make dev
+```
+
+Individual service restart scripts are in `scripts/`. For targeted restarts, you can kill and relaunch individual processes manually using the PIDs in the log files.
+
+<Cards num={2}>
+  <Cards.Card title="Deployment Guide" href="/docs/application/deployment-guide" />
+  <Cards.Card title="Configuration" href="/docs/application/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/application/quick-start.mdx b/frontend/src/content/en/application/quick-start.mdx
index 81c102836..d9b4a3e7d 100644
--- a/frontend/src/content/en/application/quick-start.mdx
+++ b/frontend/src/content/en/application/quick-start.mdx
@@ -1,3 +1,129 @@
+---
+title: Quick Start
+description: This guide walks you through starting DeerFlow App on your local machine using the `make dev` workflow. All four services (LangGraph, Gateway, Frontend, nginx) start together and are accessible through a single URL.
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
 # Quick Start
 
-TBD
+<Callout type="info" emoji="⚡">
+  Get DeerFlow App running locally in about 10 minutes. You need a machine with
+  Python 3.12+, Node.js 22+, and at least one LLM API key.
+</Callout>
+
+This guide walks you through starting DeerFlow App on your local machine using the `make dev` workflow. All four services (LangGraph, Gateway, Frontend, nginx) start together and are accessible through a single URL.
+
+## Prerequisites
+
+Check that all required tools are installed:
+
+```bash
+make check
+```
+
+Required:
+
+| Tool | Minimum version |
+|---|---|
+| Python | 3.12 |
+| uv | latest |
+| Node.js | 22 |
+| pnpm | 10 |
+| nginx | any recent version |
+
+On macOS, install with `brew install python uv node pnpm nginx`. On Linux, use your distribution's package manager.
+
+## Steps
+
+<Steps>
+
+### Clone the repository
+
+```bash
+git clone https://github.com/bytedance/deer-flow.git
+cd deer-flow
+```
+
+### Install dependencies
+
+```bash
+make install
+```
+
+This installs both backend Python dependencies (via `uv`) and frontend Node.js dependencies (via `pnpm`).
+
+### Create your config file
+
+```bash
+cp config.example.yaml config.yaml
+```
+
+Then edit `config.yaml` to add at least one model. The minimum change is adding a model under the `models:` section:
+
+```yaml
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+```
+
+Set the corresponding environment variable before starting:
+
+```bash
+export OPENAI_API_KEY=sk-...
+```
+
+See the [Application Configuration](/docs/application/configuration) page for examples with other model providers.
+
+### Start all services
+
+```bash
+make dev
+```
+
+This starts:
+- LangGraph server on port `2024`
+- Gateway API on port `8001`
+- Frontend on port `3000`
+- nginx reverse proxy on port `2026`
+
+Open [http://localhost:2026](http://localhost:2026) in your browser.
+
+### Stop all services
+
+```bash
+make stop
+```
+
+</Steps>
+
+## What happens when you run `make dev`
+
+- Existing service processes are stopped first (safe to run after an interrupted start).
+- Each service is started in the background and writes logs to the `logs/` directory.
+- nginx proxies all traffic through port `2026`, so you only need one URL.
+
+Log files:
+
+| Service | Log file |
+|---|---|
+| LangGraph | `logs/langgraph.log` |
+| Gateway | `logs/gateway.log` |
+| Frontend | `logs/frontend.log` |
+| nginx | `logs/nginx.log` |
+
+<Callout type="tip">
+  If something is not working, check the log files first. Most startup errors
+  (missing API keys, config parsing failures) appear in `logs/langgraph.log` or
+  `logs/gateway.log`.
+</Callout>
+
+<Cards num={2}>
+  <Cards.Card title="Deployment Guide" href="/docs/application/deployment-guide" />
+  <Cards.Card title="Configuration" href="/docs/application/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/application/workspace-usage.mdx b/frontend/src/content/en/application/workspace-usage.mdx
index 567a6fb98..8589c624c 100644
--- a/frontend/src/content/en/application/workspace-usage.mdx
+++ b/frontend/src/content/en/application/workspace-usage.mdx
@@ -1,3 +1,79 @@
+---
+title: Workspace Usage
+description: The DeerFlow App workspace is a browser-based interface for having multi-turn conversations with the agent, tracking task progress, viewing artifacts, and managing files.
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # Workspace Usage
 
-TBD
+The DeerFlow App workspace is a browser-based interface for having multi-turn conversations with the agent, tracking task progress, viewing artifacts, and managing files.
+
+## Starting a conversation
+
+Open the app at `http://localhost:2026` (or your deployment URL). The workspace is split into:
+
+- **Sidebar** (left): thread list, new thread button, and navigation to agents and settings.
+- **Conversation area** (center): the active thread's message history.
+- **Input bar** (bottom): text input, skill selector, model selector, and attachment controls.
+
+To start a new thread, click **New Thread** in the sidebar or use the keyboard shortcut. Each thread is independent — it has its own conversation history, artifacts, and state.
+
+## Selecting a model
+
+Use the model picker in the input bar to choose which configured model to use for the current request. Models listed here correspond to the `models:` entries in your `config.yaml`.
+
+The selected model applies to the next message only. You can switch models between messages in the same thread.
+
+If a model supports **thinking mode**, a toggle appears next to the model selector. When thinking is enabled, the agent's internal reasoning steps are shown inline in the response.
+
+## Selecting a skill
+
+Click the **Skills** button in the input bar to open the skill selector. Enabled skills are listed here. Selecting a skill tells the agent to apply that skill's workflow and instructions for the current message.
+
+<Callout type="tip">
+  Skills are most useful when you want the agent to follow a specific approach,
+  such as deep research methodology or structured data analysis. For general
+  questions, you typically do not need to select a skill.
+</Callout>
+
+## Plan mode
+
+Toggle **Plan Mode** in the input bar to enable the todo list middleware. In plan mode, the agent creates and maintains a visible task list as it works through a complex multi-step objective. Each task shows its status (`pending`, `in_progress`, `completed`) in real time.
+
+Plan mode is most useful for complex tasks with 3 or more distinct steps.
+
+## Uploading files
+
+Click the attachment icon in the input bar to upload files. Supported file types include PDFs, text files, spreadsheets, and images.
+
+Uploaded files are stored under the thread's working directory (`/mnt/user-data/uploads/`) and are accessible to the agent during the conversation. PDFs are automatically converted to Markdown for better model comprehension (the converter can be configured via `uploads.pdf_converter` in `config.yaml`).
+
+## Viewing artifacts
+
+When the agent produces output files (reports, charts, code, etc.), they appear in the **Artifacts** panel. Each artifact shows a preview (for supported types) and a download link.
+
+Artifacts are tracked in the thread state and persist across page reloads.
+
+## Understanding the message stream
+
+Each agent response in the conversation may contain:
+
+- **Text**: the agent's direct reply.
+- **Thinking** (if thinking mode is enabled): the model's internal reasoning, shown in a collapsible block.
+- **Tool calls**: a record of which tools were called and with what arguments.
+- **Tool results**: the output returned by each tool.
+- **Subagent output**: if the agent delegated a task, the subagent's progress appears inline.
+
+Tool calls and thinking steps are collapsed by default. Click to expand them.
+
+## Switching agents
+
+If you have created custom agents, use the **Agent** selector in the input bar to switch to a different agent. The selected agent persists for the duration of the thread.
+
+Custom agents may have different models, skills, tool sets, and system prompts. See [Agents and Threads](/docs/application/agents-and-threads) for how to create and manage custom agents.
+
+<Cards num={2}>
+  <Cards.Card title="Agents and Threads" href="/docs/application/agents-and-threads" />
+  <Cards.Card title="Configuration" href="/docs/application/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/harness/_meta.ts b/frontend/src/content/en/harness/_meta.ts
index f96fd39f9..b682b71cb 100644
--- a/frontend/src/content/en/harness/_meta.ts
+++ b/frontend/src/content/en/harness/_meta.ts
@@ -10,6 +10,12 @@ const meta: MetaRecord = {
   "design-principles": {
     title: "Design Principles",
   },
+  "lead-agent": {
+    title: "Lead Agent",
+  },
+  middlewares: {
+    title: "Middlewares",
+  },
   configuration: {
     title: "Configuration",
   },
@@ -25,6 +31,12 @@ const meta: MetaRecord = {
   sandbox: {
     title: "Sandbox",
   },
+  subagents: {
+    title: "Subagents",
+  },
+  mcp: {
+    title: "MCP Integration",
+  },
   customization: {
     title: "Customization",
   },
diff --git a/frontend/src/content/en/harness/configuration.mdx b/frontend/src/content/en/harness/configuration.mdx
index 79de38b76..8043e2beb 100644
--- a/frontend/src/content/en/harness/configuration.mdx
+++ b/frontend/src/content/en/harness/configuration.mdx
@@ -1,3 +1,162 @@
+---
+title: Configuration
+description: "DeerFlow's configuration system is designed around one goal: every meaningful behavior should be expressible in a config file, not hardcoded in the application. This makes deployments reproducible, auditable, and easy to customize per environment."
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # Configuration
 
-TBD
+<Callout type="info" emoji="⚙️">
+  All DeerFlow Harness behaviors are driven by <code>config.yaml</code>. One
+  file controls which models are available, how the sandbox runs, what tools are
+  loaded, and how each subsystem behaves.
+</Callout>
+
+DeerFlow's configuration system is designed around one goal: every meaningful behavior should be expressible in a config file, not hardcoded in the application. This makes deployments reproducible, auditable, and easy to customize per environment.
+
+## Config file location
+
+DeerFlow resolves `config.yaml` using the following priority order:
+
+1. The path passed to `AppConfig.from_file(config_path)` explicitly.
+2. The `DEER_FLOW_CONFIG_PATH` environment variable.
+3. `backend/config.yaml` (relative to the backend directory).
+4. `config.yaml` in the repository root.
+
+If none of these paths exist, the application raises an error at startup.
+
+To use a custom location:
+
+```bash
+export DEER_FLOW_CONFIG_PATH=/path/to/my-config.yaml
+```
+
+## Environment variable interpolation
+
+Any field value can reference an environment variable using `$VAR_NAME` syntax:
+
+```yaml
+models:
+  - name: gpt-4o
+    api_key: $OPENAI_API_KEY
+```
+
+This keeps secrets out of the config file itself. The variable is resolved at runtime from the process environment.
+
+## The `use` field
+
+Many configuration entries use a `use:` field to specify the Python class or object to instantiate. The format is:
+
+```
+package.subpackage.module:ClassName
+```
+
+or for module-level objects:
+
+```
+package.subpackage.module:variable_name
+```
+
+Examples:
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+tools:
+  - use: deerflow.community.tavily.tools:web_search_tool
+    api_key: $TAVILY_API_KEY
+```
+
+This pattern is how DeerFlow achieves pluggability without hardcoding class references.
+
+## Extra fields are passed through
+
+For model configuration, `ModelConfig` uses `pydantic ConfigDict(extra="allow")`. This means any extra fields you add under a model entry are passed directly to the model constructor. This allows provider-specific options (like `extra_body`, `reasoning`, or custom timeout keys) to work without modifying the harness:
+
+```yaml
+models:
+  - name: my-model
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    some_provider_specific_option: value  # passed through to ChatOpenAI constructor
+```
+
+## Configuration version
+
+`config.yaml` includes a `config_version` field that tracks the schema version:
+
+```yaml
+config_version: 6
+```
+
+When the schema changes (new fields, renamed sections), this number is bumped. If your local `config.yaml` is behind the current version, run:
+
+```bash
+make config-upgrade
+```
+
+This merges new fields from `config.example.yaml` into your existing `config.yaml` without overwriting your customizations.
+
+## Module configuration reference
+
+The following table maps each top-level `config.yaml` section to its documentation page:
+
+| Section | Description | Documentation |
+|---|---|---|
+| `log_level` | Logging level (`debug`/`info`/`warning`/`error`) | — |
+| `models` | Available LLM models | [Lead Agent](/docs/harness/lead-agent) |
+| `token_usage` | Token tracking per model call | [Middlewares](/docs/harness/middlewares) |
+| `tools` | Available agent tools | [Tools](/docs/harness/tools) |
+| `tool_groups` | Named groups of tools | [Tools](/docs/harness/tools) |
+| `tool_search` | Deferred/on-demand tool loading | [Tools](/docs/harness/tools) |
+| `sandbox` | Sandbox provider and options | [Sandbox](/docs/harness/sandbox) |
+| `skills` | Skills directory and container path | [Skills](/docs/harness/skills) |
+| `skill_evolution` | Agent-managed skill creation | [Skills](/docs/harness/skills) |
+| `subagents` | Subagent timeouts and max turns | [Subagents](/docs/harness/subagents) |
+| `acp_agents` | External ACP agent integrations | [Subagents](/docs/harness/subagents) |
+| `memory` | Cross-session memory storage | [Memory](/docs/harness/memory) |
+| `summarization` | Conversation summarization | [Middlewares](/docs/harness/middlewares) |
+| `title` | Automatic thread title generation | [Middlewares](/docs/harness/middlewares) |
+| `checkpointer` | Thread state persistence | [Agents & Threads](/docs/application/agents-and-threads) |
+| `guardrails` | Tool call authorization | — |
+| `stream_bridge` | Streaming configuration | — |
+| `uploads` | File upload settings (PDF converter) | — |
+| `channels` | IM channel integrations (Feishu, Slack, etc.) | — |
+
+## Minimal config to get started
+
+The minimum valid `config.yaml` requires at least one model and a sandbox:
+
+```yaml
+config_version: 6
+
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+tools:
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:bash_tool
+```
+
+Start from `config.example.yaml` in the repository root and uncomment the sections you need.
+
+<Cards num={2}>
+  <Cards.Card title="Deployment Guide" href="/docs/application/deployment-guide" />
+  <Cards.Card title="Application Configuration" href="/docs/application/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/harness/customization.mdx b/frontend/src/content/en/harness/customization.mdx
index c900eb2ca..9d8c6b934 100644
--- a/frontend/src/content/en/harness/customization.mdx
+++ b/frontend/src/content/en/harness/customization.mdx
@@ -1,3 +1,175 @@
+---
+title: Customization
+description: DeerFlow's pluggable architecture means most parts of the system can be replaced or extended without forking the core. This page maps the extension points and explains how to use each one.
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # Customization
 
-TBD
+<Callout type="info" emoji="🔧">
+  DeerFlow is designed to be adapted. You can extend agent behavior by writing
+  custom middlewares, adding new tools, building skill packs, and replacing any
+  built-in component through the config.yaml <code>use:</code> field.
+</Callout>
+
+DeerFlow's pluggable architecture means most parts of the system can be replaced or extended without forking the core. This page maps the extension points and explains how to use each one.
+
+## Custom middlewares
+
+Middlewares are the primary extension point for adding behavior to the Lead Agent. They wrap every LLM turn and can read and modify the agent's state before or after the model call.
+
+To add a custom middleware:
+
+1. Implement the `AgentMiddleware` interface from `langchain.agents.middleware`.
+2. Pass your middleware to the `custom_middlewares` parameter when building the agent.
+
+```python
+from langchain.agents.middleware import AgentMiddleware
+from deerflow.agents.thread_state import ThreadState
+
+class AuditMiddleware(AgentMiddleware):
+    async def on_start(self, state: ThreadState, config):
+        # Runs before each model call
+        print(f"[audit] turn starts: {len(state.messages)} messages in context")
+        return state, config
+
+    async def on_end(self, state: ThreadState, config):
+        # Runs after each model call
+        print(f"[audit] turn ends: last message type = {state.messages[-1].type}")
+        return state, config
+```
+
+Custom middlewares are injected into the chain immediately before `ClarificationMiddleware`, which always runs last.
+
+## Custom tools
+
+Add new tools to the agent by registering them in `config.yaml` under `tools:`:
+
+```yaml
+tools:
+  - use: mypackage.tools:my_custom_tool
+    api_key: $MY_TOOL_API_KEY
+```
+
+Your tool must be a LangChain `BaseTool` or a function decorated with `@tool`. It will be instantiated using the `use:` class path and any additional fields from the config entry.
+
+For community-style tools, the pattern is a module-level function or class that returns a `BaseTool`:
+
+```python
+# mypackage/tools.py
+from langchain_core.tools import tool
+
+@tool
+def my_custom_tool(query: str) -> str:
+    """Search my custom data source."""
+    return do_search(query)
+```
+
+## Custom sandbox provider
+
+The sandbox can be replaced by implementing the `SandboxProvider` interface:
+
+```python
+from deerflow.sandbox.sandbox_provider import SandboxProvider
+from deerflow.sandbox.sandbox import Sandbox
+
+class MyCustomSandboxProvider(SandboxProvider):
+    def acquire(self, thread_id: str | None = None) -> str:
+        # Return a sandbox_id
+        ...
+
+    def get(self, sandbox_id: str) -> Sandbox | None:
+        # Return the sandbox instance for this id
+        ...
+
+    def release(self, sandbox_id: str) -> None:
+        # Cleanup
+        ...
+```
+
+Then reference it in `config.yaml`:
+
+```yaml
+sandbox:
+  use: mypackage.sandbox:MyCustomSandboxProvider
+```
+
+## Custom memory storage
+
+Replace the file-based memory with any persistent store by implementing `MemoryStorage`:
+
+```python
+from deerflow.agents.memory.storage import MemoryStorage
+from typing import Any
+
+class RedisMemoryStorage(MemoryStorage):
+    def load(self, agent_name: str | None = None) -> dict[str, Any]:
+        ...
+
+    def reload(self, agent_name: str | None = None) -> dict[str, Any]:
+        ...
+
+    def save(self, memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
+        ...
+```
+
+Configure it in `config.yaml`:
+
+```yaml
+memory:
+  storage_class: mypackage.storage:RedisMemoryStorage
+```
+
+## Custom skills
+
+Skills are the easiest extension point. Create a directory under `skills/custom/your-skill-name/` with a `SKILL.md` file. The skill is discovered automatically on the next `load_skills()` call.
+
+See [Skills](/docs/harness/skills) for the full directory structure and `SKILL.md` format.
+
+## Custom models
+
+Any LangChain-compatible chat model can be used by specifying it in the `use:` field:
+
+```yaml
+models:
+  - name: my-custom-model
+    use: mypackage.models:MyCustomChatModel
+    # Any extra fields are passed as kwargs to the constructor
+    base_url: http://my-model-server:8080
+    api_key: $MY_MODEL_API_KEY
+```
+
+The model class must implement the LangChain `BaseChatModel` interface.
+
+## Custom checkpointer
+
+Thread state persistence can use any LangGraph-compatible checkpointer:
+
+```yaml
+checkpointer:
+  type: sqlite
+  connection_string: ./my-checkpoints.db
+```
+
+For custom backends, implement the LangGraph `BaseCheckpointSaver` interface and configure it programmatically when initializing the `DeerFlowClient`.
+
+## Guardrails
+
+Add pre-execution authorization for tool calls through the `guardrails:` config:
+
+```yaml
+guardrails:
+  enabled: true
+  provider:
+    use: deerflow.guardrails.builtin:AllowlistProvider
+    config:
+      denied_tools: ["bash", "write_file"]
+```
+
+For custom guardrail logic, implement a class with `evaluate()` and `aevaluate()` methods and reference it via `use:`.
+
+<Cards num={2}>
+  <Cards.Card title="Integration Guide" href="/docs/harness/integration-guide" />
+  <Cards.Card title="Configuration" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/harness/design-principles.mdx b/frontend/src/content/en/harness/design-principles.mdx
index bae1cc217..bda534217 100644
--- a/frontend/src/content/en/harness/design-principles.mdx
+++ b/frontend/src/content/en/harness/design-principles.mdx
@@ -1,3 +1,126 @@
+---
+title: Design Principles
+description: Understanding the design principles behind DeerFlow Harness helps you use it effectively, extend it confidently, and reason about how your agents will behave in production.
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # Design Principles
 
-TBD
+<Callout type="info" emoji="🏗️">
+  DeerFlow is built around one central idea: agent behavior should be composed
+  from small, observable, replaceable pieces — not hardcoded into a fixed
+  workflow graph.
+</Callout>
+
+Understanding the design principles behind DeerFlow Harness helps you use it effectively, extend it confidently, and reason about how your agents will behave in production.
+
+## Why a harness, not a framework
+
+A framework gives you abstractions and building blocks. You assemble the parts and write the glue code that connects them.
+
+A **harness** goes further. It packages an opinionated, ready-to-run runtime so that agents can do real work without you rebuilding the same infrastructure every time.
+
+DeerFlow is a harness because it bundles:
+
+- a lead agent with tool routing,
+- a middleware chain that wraps every LLM turn,
+- sandboxed execution for files and commands,
+- skills that load specialized capabilities on demand,
+- subagents for delegated parallel work,
+- memory for cross-session continuity, and
+- a configuration system that controls all of it.
+
+You do not need to design the orchestration layer from scratch. The harness is the orchestration layer.
+
+## Long-horizon tasks are the primary case
+
+DeerFlow is designed for tasks that require more than a single prompt-response exchange. A useful long-horizon agent must:
+
+1. make a plan,
+2. call tools in sequence,
+3. inspect and modify files,
+4. recover when something fails,
+5. delegate work to subagents when the task is too broad, and
+6. return a concrete artifact at the end.
+
+Every architectural decision in DeerFlow is evaluated against this use case. Short, stateless exchanges are easy. Long, multi-step workflows under real-world pressure are the target.
+
+## Middleware chain over inheritance
+
+DeerFlow does not ask you to subclass an agent or override methods to change its behavior. Instead, it uses a **middleware chain** that wraps every LLM turn.
+
+Each middleware is a small, focused plugin that can inspect or modify the agent's state before and after the model call. The lead agent's behavior is entirely determined by which middlewares are active.
+
+This design has several benefits:
+
+- Individual behaviors (memory, summarization, clarification, loop detection) are isolated and testable independently.
+- The chain can be extended without touching the agent's core logic.
+- Each middleware's effect is visible and auditable because it only touches the state it declares.
+
+See the [Middlewares](/docs/harness/middlewares) page for the full list and configuration.
+
+## Skills provide specialization without contamination
+
+A **skill** is a task-oriented capability package. It contains instructions, workflows, best practices, and any tools or resources that make the agent effective at a specific class of work.
+
+The key design decision is that skills are loaded **on demand**. The base agent stays general. When a task requires deep research, the research skill is loaded. When a task requires data analysis, the analysis skill is loaded.
+
+This matters because it keeps the base agent's context clean. A specialized prompt for writing academic papers does not pollute a session focused on coding. Skills inject their content exactly when relevant and no further.
+
+Skills also make the system extensible. Adding a new capability to DeerFlow means writing a new skill pack, not modifying the agent core.
+
+## Sandbox is the execution environment
+
+DeerFlow gives agents a **sandbox**: an isolated workspace where they can read files, write outputs, run commands, and produce artifacts.
+
+This turns the agent from a text generator into a system that can do work. Instead of only describing what code to write, the agent can write it, run it, and verify the result.
+
+Isolation is important because execution should be reproducible and controllable. The sandbox is the reason DeerFlow can support genuine action rather than only conversation.
+
+Two modes are available:
+
+- **LocalSandbox**: commands run directly on the host. Suitable for trusted, single-user local workflows.
+- **Container-based sandbox**: commands run in an isolated container (Docker or Apple Container). Suitable for multi-user environments and production deployments.
+
+## Context engineering keeps long tasks tractable
+
+Context pressure is the primary challenge for long-horizon agents. If everything accumulates in the context window indefinitely, the agent becomes slower, noisier, and less reliable.
+
+DeerFlow addresses this through **context engineering** — deliberate control of what the agent sees, remembers, and ignores at each step:
+
+- **Summarization**: when the conversation grows too long, older turns are summarized and replaced. The agent retains the meaning without the bulk.
+- **Scoped subagent context**: when work is delegated to a subagent, that subagent receives only the information it needs for its piece of the task, not the full parent history.
+- **External working memory**: files and artifacts produced during a task live on disk, not in the context window. The agent references them when needed.
+- **Memory injection**: cross-session facts are injected into the system prompt at a controlled token budget.
+
+This is one of the most important ideas in DeerFlow. Good agent behavior is not only about a stronger model. It is also about giving the model the right working set at the right time.
+
+## Configuration drives behavior
+
+All meaningful behaviors in DeerFlow are controlled through `config.yaml`. The system is designed so that operators can change how the agent behaves — which models to use, whether summarization is active, how subagents are limited, what tools are available — without touching code.
+
+This design principle has three implications:
+
+1. **Reproducibility**: a config file is a complete description of the agent's behavior at a point in time.
+2. **Deployability**: the same code runs differently in different environments because the config is different.
+3. **Auditability**: what the agent can and cannot do is visible in one place.
+
+Environment variable interpolation (`api_key: $OPENAI_API_KEY`) keeps secrets out of committed config files while preserving the same structure.
+
+## Summary
+
+| Principle | What it means in practice |
+|---|---|
+| Harness, not framework | Ready-to-run runtime with all the infrastructure already wired |
+| Long-horizon first | Architecture assumes multi-step, multi-tool, multi-turn tasks |
+| Middleware over inheritance | Behavior is composed from small, isolated plugins |
+| Skills for specialization | Domain capability injected on demand, keeping the base clean |
+| Sandbox for execution | Isolated workspace for real file and command work |
+| Context engineering | Active management of what the agent sees to stay effective |
+| Config-driven | All key behaviors are controlled through `config.yaml` |
+
+<Cards num={2}>
+  <Cards.Card title="Lead Agent" href="/docs/harness/lead-agent" />
+  <Cards.Card title="Middlewares" href="/docs/harness/middlewares" />
+</Cards>
diff --git a/frontend/src/content/en/harness/index.mdx b/frontend/src/content/en/harness/index.mdx
index 02c9e46f3..ea0c098ec 100644
--- a/frontend/src/content/en/harness/index.mdx
+++ b/frontend/src/content/en/harness/index.mdx
@@ -1,3 +1,8 @@
+---
+title: Install DeerFlow Harness
+description: The DeerFlow Harness is the Python SDK and runtime foundation for building your own Super Agent systems.
+---
+
 import { Callout, Cards } from "nextra/components";
 
 # Install DeerFlow Harness
diff --git a/frontend/src/content/en/harness/integration-guide.mdx b/frontend/src/content/en/harness/integration-guide.mdx
index c81251a8b..61fffc9af 100644
--- a/frontend/src/content/en/harness/integration-guide.mdx
+++ b/frontend/src/content/en/harness/integration-guide.mdx
@@ -1,3 +1,180 @@
+---
+title: Integration Guide
+description: DeerFlow Harness is not only a standalone application. It is a Python library you can import and use inside your own backend, API server, automation system, or multi-agent orchestrator.
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # Integration Guide
 
-TBD
+<Callout type="info" emoji="🔌">
+  DeerFlow Harness can be embedded into any Python application. This guide
+  covers the integration patterns for using DeerFlow as a library inside your
+  own system.
+</Callout>
+
+DeerFlow Harness is not only a standalone application. It is a Python library you can import and use inside your own backend, API server, automation system, or multi-agent orchestrator.
+
+## Embedding DeerFlowClient
+
+The primary integration point is `DeerFlowClient`. It wraps the LangGraph runtime and exposes a clean API for sending messages and streaming responses from any Python application.
+
+```python
+from deerflow.client import DeerFlowClient
+from deerflow.config import load_config
+
+# Load configuration (reads config.yaml or DEER_FLOW_CONFIG_PATH)
+load_config()
+
+client = DeerFlowClient()
+```
+
+The client is thread-safe and designed to be instantiated once and reused across requests.
+
+## Async streaming
+
+The recommended integration pattern is async streaming. This gives you real-time access to each token and event as the agent produces it:
+
+```python
+import asyncio
+
+async def run_agent(thread_id: str, user_message: str):
+    async for event in client.astream(
+        thread_id=thread_id,
+        message=user_message,
+        config={
+            "configurable": {
+                "model_name": "gpt-4o",
+                "subagent_enabled": True,
+            }
+        },
+    ):
+        # Process each streaming event
+        yield event
+
+# In a FastAPI handler:
+# from fastapi.responses import StreamingResponse
+# return StreamingResponse(run_agent(thread_id, message), media_type="text/event-stream")
+```
+
+## Non-streaming invocation
+
+For batch processing or when you only need the final result:
+
+```python
+async def run_agent_sync(thread_id: str, user_message: str) -> dict:
+    result = await client.ainvoke(
+        thread_id=thread_id,
+        message=user_message,
+    )
+    return result
+```
+
+## Thread management
+
+Threads represent persistent conversations. Use unique thread IDs to isolate different user sessions:
+
+```python
+import uuid
+
+# New conversation
+thread_id = str(uuid.uuid4())
+
+# Continuing an existing conversation (same thread_id)
+# The agent will see the full history if a checkpointer is configured
+await client.ainvoke(thread_id=existing_thread_id, message="Follow up question")
+```
+
+## Custom per-agent configuration
+
+Build domain-specific agents by creating named agent configs and passing the `agent_name` at runtime:
+
+```python
+# agents/research-assistant/config.yaml must exist with skills and tool config
+
+result = await client.ainvoke(
+    thread_id=thread_id,
+    message=user_message,
+    config={
+        "configurable": {
+            "agent_name": "research-assistant",
+            "model_name": "gpt-4o",
+        }
+    },
+)
+```
+
+## Integrating with FastAPI
+
+DeerFlow Gateway is itself a FastAPI application. You can mount it as a sub-application or router:
+
+```python
+from fastapi import FastAPI
+from deerflow.config import load_config
+
+load_config()
+
+app = FastAPI()
+
+# Mount the DeerFlow gateway router
+from deerflow.app.gateway.main import app as gateway_app
+app.mount("/deerflow", gateway_app)
+```
+
+Or use `DeerFlowClient` directly in your own FastAPI routes with streaming:
+
+```python
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from deerflow.client import DeerFlowClient
+
+app = FastAPI()
+client = DeerFlowClient()
+
+@app.post("/chat/{thread_id}")
+async def chat(thread_id: str, body: dict):
+    async def generate():
+        async for event in client.astream(thread_id=thread_id, message=body["message"]):
+            yield f"data: {event}\n\n"
+    return StreamingResponse(generate(), media_type="text/event-stream")
+```
+
+## Integrating with LangGraph
+
+DeerFlow Harness is built on LangGraph. The Lead Agent is a standard LangGraph graph. You can compose it with your own LangGraph nodes and graphs:
+
+```python
+from deerflow.agents.lead_agent.agent import make_lead_agent
+from langgraph.graph import StateGraph
+
+# Access the underlying LangGraph agent factory
+agent = make_lead_agent(config)
+```
+
+## Configuration in embedded mode
+
+When embedded in another application, set the config path explicitly to avoid ambiguity:
+
+```python
+import os
+os.environ["DEER_FLOW_CONFIG_PATH"] = "/path/to/my-deerflow-config.yaml"
+
+from deerflow.config import load_config
+load_config()
+```
+
+Or pass the path directly:
+
+```python
+from deerflow.config import load_config
+load_config(config_path="/path/to/my-deerflow-config.yaml")
+```
+
+## MCP server integration
+
+DeerFlow can expose its agent as an MCP server, allowing other MCP-compatible systems to call it as a tool. Refer to the DeerFlow repository for MCP server integration examples.
+
+<Cards num={2}>
+  <Cards.Card title="Customization" href="/docs/harness/customization" />
+  <Cards.Card title="Configuration" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/harness/lead-agent.mdx b/frontend/src/content/en/harness/lead-agent.mdx
new file mode 100644
index 000000000..4aafa80a1
--- /dev/null
+++ b/frontend/src/content/en/harness/lead-agent.mdx
@@ -0,0 +1,156 @@
+---
+title: Lead Agent
+description: The Lead Agent is the central executor in a DeerFlow thread. Every conversation, task, and workflow flows through it. Understanding how it works helps you configure it effectively and extend it when needed.
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
+# Lead Agent
+
+<Callout type="info" emoji="🧠">
+  The Lead Agent is the primary reasoning and orchestration unit in every
+  DeerFlow thread. It decides what to do, calls tools, delegates to subagents,
+  and returns artifacts.
+</Callout>
+
+The Lead Agent is the central executor in a DeerFlow thread. Every conversation, task, and workflow flows through it. Understanding how it works helps you configure it effectively and extend it when needed.
+
+## What the Lead Agent does
+
+The Lead Agent is responsible for:
+
+- receiving user messages and maintaining conversation state,
+- reasoning about what to do next (planning, tool selection, delegation),
+- calling tools — built-in, community, MCP, or skill tools,
+- delegating subtasks to subagents via the `task` tool,
+- managing artifacts (files, outputs, deliverables),
+- updating the todo list in plan mode, and
+- returning final responses or artifacts to the user.
+
+The Lead Agent does not hardcode a specific workflow. It uses the model's reasoning to adapt to whatever task the user provides, guided by the system prompt and the skills currently in scope.
+
+## Runtime foundation
+
+The Lead Agent is built on **LangGraph** and **LangChain Agent** primitives. Specifically:
+
+- [`create_agent`](https://python.langchain.com/docs/concepts/agents/) from `langchain.agents` wraps the LLM into a tool-calling agent loop.
+- LangGraph manages the `ThreadState` and provides the checkpointing, streaming, and graph execution model.
+- A **middleware chain** wraps every turn of the agent loop, providing cross-cutting capabilities like memory, summarization, and clarification.
+
+## Execution flow
+
+<Steps>
+
+### Receive message
+
+The user message arrives and is added to `ThreadState.messages`. The `ThreadState` holds the full conversation history, any active todo list, accumulated artifacts, and runtime metadata.
+
+### Middleware pre-processing
+
+Before the model is called, each active middleware has a chance to modify the state. For example, the `MemoryMiddleware` injects persisted memory facts into the system prompt, and the `SummarizationMiddleware` may condense old messages if the token budget is exceeded.
+
+### LLM reasoning
+
+The model receives the current messages (including system prompt with active skill instructions) and produces either a direct reply or one or more tool call requests.
+
+### Tool execution
+
+If tool calls are requested, they are dispatched to the appropriate handlers — sandbox tools for file and command work, community tools for web access, or the `task` tool for subagent delegation.
+
+### Middleware post-processing
+
+After tool results are returned and before the next model call, middlewares run again. The `TitleMiddleware` may generate a thread title on the first exchange, and the `TodoMiddleware` may update the task list.
+
+### Loop or respond
+
+If the model needs more information (e.g., a tool returned partial results), the loop continues. When the model decides the task is complete, it produces a final message and the loop ends.
+
+### State update
+
+`ThreadState` is updated with new messages, artifacts, and memory queues. If a checkpointer is configured, the state is persisted.
+
+</Steps>
+
+## Model selection
+
+The Lead Agent resolves which model to use at runtime using the following priority order:
+
+1. `model_name` (or `model`) from the per-request configuration, if provided and valid.
+2. The `model` field of the active custom agent's config, if an agent is specified.
+3. The first model in the `models:` list in `config.yaml` (the global default).
+
+If the requested model name is not found in the config, the system falls back to the default model and logs a warning.
+
+```yaml
+models:
+  - name: my-primary-model
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+
+  - name: my-fast-model
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o-mini
+    api_key: $OPENAI_API_KEY
+```
+
+The first entry (`my-primary-model`) becomes the default. Any request that does not specify a model, or specifies an unknown model name, will use it.
+
+## Thinking mode
+
+If the model supports extended thinking (e.g., DeepSeek Reasoner, Doubao with thinking enabled, Anthropic Claude with thinking), the Lead Agent can run in **thinking mode**. In this mode, the model's internal reasoning steps are visible in the response stream.
+
+Thinking mode is controlled per-request through the `thinking_enabled` flag. If thinking is enabled but the configured model does not support it, the system falls back gracefully and logs a warning.
+
+```yaml
+models:
+  - name: deepseek-v3
+    use: deerflow.models.patched_deepseek:PatchedChatDeepSeek
+    model: deepseek-reasoner
+    api_key: $DEEPSEEK_API_KEY
+    supports_thinking: true
+    when_thinking_enabled:
+      extra_body:
+        thinking:
+          type: enabled
+    when_thinking_disabled:
+      extra_body:
+        thinking:
+          type: disabled
+```
+
+## Plan mode
+
+When `is_plan_mode` is set to `true` in the request configuration, the `TodoMiddleware` is activated. The agent then maintains a structured task list, marking items as `in_progress`, `completed`, or `pending` as it works through a complex task. This provides visibility into the agent's progress for the user.
+
+Plan mode is appropriate for complex, multi-step tasks where showing incremental progress is valuable. For simple requests, it is better left disabled to avoid unnecessary overhead.
+
+## Custom agents
+
+The same Lead Agent runtime powers both the default agent and any custom agents you create. A custom agent differs only in:
+
+- its **name** (ASCII slug, auto-derived from `display_name`),
+- its **system prompt** or agent-specific instructions,
+- which **skills** it has access to,
+- which **tool groups** it can use, and
+- which **model** it defaults to.
+
+Custom agents are created through the DeerFlow App UI or via the `/api/agents` endpoint. Their configuration is stored in `agents/{name}/config.yaml` relative to the backend directory.
+
+<Callout type="tip">
+  When a custom agent is selected in a thread, the Lead Agent loads that
+  agent's config at runtime. Switching models or skills for a specific agent
+  does not require restarting the server.
+</Callout>
+
+## Bootstrap mode
+
+DeerFlow includes a special **bootstrap mode** for the initial setup of custom agents. When `is_bootstrap: true` is passed in the request config, the Lead Agent runs with a minimal system prompt and only the core setup tools exposed. This is used internally to guide the first-run agent configuration flow.
+
+<Cards num={2}>
+  <Cards.Card title="Middlewares" href="/docs/harness/middlewares" />
+  <Cards.Card title="Tools" href="/docs/harness/tools" />
+</Cards>
diff --git a/frontend/src/content/en/harness/mcp.mdx b/frontend/src/content/en/harness/mcp.mdx
new file mode 100644
index 000000000..0ea0f186d
--- /dev/null
+++ b/frontend/src/content/en/harness/mcp.mdx
@@ -0,0 +1,114 @@
+---
+title: MCP Integration
+description: The **Model Context Protocol (MCP)** is an open standard for connecting language models to external tools and data sources. DeerFlow's MCP integration allows you to extend the agent with any tool server that implements the MCP protocol — without modifying the harness itself.
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
+# MCP Integration
+
+<Callout type="info" emoji="🔌">
+  Model Context Protocol (MCP) lets DeerFlow connect to any external tool
+  server. Once connected, MCP tools are available to the Lead Agent exactly like
+  built-in tools.
+</Callout>
+
+The **Model Context Protocol (MCP)** is an open standard for connecting language models to external tools and data sources. DeerFlow's MCP integration allows you to extend the agent with any tool server that implements the MCP protocol — without modifying the harness itself.
+
+## Configuration
+
+MCP servers are configured in `extensions_config.json`, a file separate from `config.yaml`. This separation allows MCP and skill configurations to be managed independently and updated at runtime through the Gateway API.
+
+The default location is the project root (same directory as `config.yaml`). The path is determined by `ExtensionsConfig.resolve_config_path()`.
+
+```json
+{
+  "mcpServers": {
+    "my-server": {
+      "command": "npx",
+      "args": ["-y", "@my-org/my-mcp-server"],
+      "enabled": true
+    },
+    "filesystem": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"],
+      "enabled": true
+    },
+    "sqlite": {
+      "command": "uvx",
+      "args": ["mcp-server-sqlite", "--db-path", "/path/to/db.sqlite"],
+      "enabled": false
+    }
+  }
+}
+```
+
+Each server entry supports:
+- `command`: the executable to run (e.g., `npx`, `uvx`, `python`)
+- `args`: command arguments as an array
+- `enabled`: whether the server is active (can be toggled without removing the entry)
+- `env`: optional environment variables injected into the server process
+
+## How tools are loaded
+
+<Steps>
+
+### Startup initialization
+
+When the DeerFlow server starts, `initialize_mcp_tools()` is called. This connects to all enabled MCP servers, retrieves their tool schemas, and caches the results.
+
+### Lazy initialization fallback
+
+If the server starts before MCP tools are initialized (e.g., in LangGraph Studio), `get_cached_mcp_tools()` performs lazy initialization on the first tool call.
+
+### Cache invalidation
+
+The MCP tools cache tracks the modification time (`mtime`) of `extensions_config.json`. When the file changes — for example, when a server is enabled or disabled through the Gateway API — the cache is marked stale and tools are reloaded on the next request.
+
+This means MCP server changes take effect without restarting the DeerFlow server.
+
+### Tool availability
+
+Once loaded, MCP tools appear in the Lead Agent's tool list alongside built-in and community tools. The agent selects and calls them using the same mechanism as any other tool.
+
+</Steps>
+
+## Tool search integration
+
+When many MCP servers expose a large number of tools, loading all of them into the agent's context at once can increase token usage and reduce tool selection accuracy.
+
+Enable **tool search** to load MCP tools on demand instead:
+
+```yaml
+# config.yaml
+tool_search:
+  enabled: true
+```
+
+With tool search enabled, MCP tools are listed by name in the system prompt but not included in the full tool schema. The agent discovers them using the `tool_search` built-in tool and loads only the ones it needs for a given task.
+
+## OAuth support
+
+Some MCP servers require OAuth authentication. DeerFlow's `mcp/oauth.py` handles the OAuth flow for servers that declare OAuth requirements in their capability headers.
+
+When an OAuth-protected MCP server is connected, DeerFlow will:
+1. Detect the OAuth requirement from the server's capability headers
+2. Build the appropriate authorization headers using `get_initial_oauth_headers()`
+3. Wrap tool calls with an OAuth interceptor via `build_oauth_tool_interceptor()`
+
+The OAuth flow is transparent to the Lead Agent — it simply calls the tool, and DeerFlow handles the authentication.
+
+## Managing MCP servers
+
+MCP servers can be managed in several ways:
+
+- **Through the DeerFlow App UI**: the extensions panel shows connected MCP servers and lets you enable/disable them.
+- **Through the Gateway API**: `POST /api/extensions/mcp/{name}/enable` and `/disable`.
+- **By editing `extensions_config.json` directly**: useful for scripted or programmatic configuration.
+
+Changes are picked up automatically due to the file mtime-based cache invalidation.
+
+<Cards num={2}>
+  <Cards.Card title="Tools" href="/docs/harness/tools" />
+  <Cards.Card title="Configuration" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/harness/memory.mdx b/frontend/src/content/en/harness/memory.mdx
index 30fd1060f..4904c54bb 100644
--- a/frontend/src/content/en/harness/memory.mdx
+++ b/frontend/src/content/en/harness/memory.mdx
@@ -1,3 +1,124 @@
+---
+title: Memory
+description: Memory is a runtime feature of the DeerFlow Harness. It is not a simple conversation log — it is a structured store of facts and context summaries that persist across separate sessions and inform the agent's behavior in future conversations.
+---
+
+import { Callout, Cards } from "nextra/components";
+
 # Memory
 
-TBD
+<Callout type="info" emoji="💾">
+  Memory lets DeerFlow carry useful information across sessions. The agent
+  remembers user preferences, project context, and recurring facts so it can
+  give better responses without starting from zero every time.
+</Callout>
+
+Memory is a runtime feature of the DeerFlow Harness. It is not a simple conversation log — it is a structured store of facts and context summaries that persist across separate sessions and inform the agent's behavior in future conversations.
+
+## What memory stores
+
+The memory store holds several categories of information:
+
+- **Work context**: summaries of ongoing projects, goals, and recurring topics the user works on.
+- **Personal context**: preferences, communication style, and other user-specific details the agent has learned.
+- **Top of mind**: the most recent focus areas and active tasks.
+- **History**: recent months' context, earlier background, and long-term facts.
+- **Facts**: discrete, specific facts the agent has extracted from conversations (e.g., preferred tools, team names, project constraints).
+
+Each category is updated over time as the agent learns from ongoing conversations.
+
+## How it works
+
+Memory is managed by `MemoryMiddleware`, which runs on every Lead Agent turn:
+
+1. **Injection**: at the start of each conversation, the agent's current memory is injected into the system prompt at a controlled token budget (`max_injection_tokens`).
+2. **Learning**: after a conversation, a background job extracts new facts and updates the relevant memory categories. Updates are debounced by `debounce_seconds` to batch rapid changes.
+3. **Per-agent memory**: when a custom agent is active, its memory is stored separately from the global memory. This keeps different agents' knowledge isolated.
+
+## Configuration
+
+```yaml
+memory:
+  enabled: true
+
+  # Storage path for the global memory file.
+  # Default: {base_dir}/memory.json  (resolves to backend/.deer-flow/memory.json)
+  # Absolute paths are used as-is.
+  # Relative paths are resolved against base_dir (not the backend working directory).
+  storage_path: memory.json
+
+  # Storage class (default: file-based JSON storage)
+  storage_class: deerflow.agents.memory.storage.FileMemoryStorage
+
+  # Seconds to wait before processing queued memory updates (debounce)
+  debounce_seconds: 30
+
+  # Model for memory update extraction (null = use default model)
+  model_name: null
+
+  # Maximum number of facts to store
+  max_facts: 100
+
+  # Minimum confidence score required to store a fact (0.0–1.0)
+  fact_confidence_threshold: 0.7
+
+  # Whether to inject memory into the system prompt
+  injection_enabled: true
+
+  # Maximum tokens to use for memory injection into system prompt
+  max_injection_tokens: 2000
+```
+
+## Global vs per-agent memory
+
+DeerFlow supports two levels of memory:
+
+- **Global memory**: stored at `{base_dir}/memory.json`. Used when no specific agent is active or when the agent has no per-agent memory file.
+- **Per-agent memory**: stored at `{base_dir}/agents/{agent_name}/memory.json`. Used when a custom agent is active, keeping that agent's learned knowledge separate.
+
+The `MemoryMiddleware` automatically selects the correct memory file based on the active `agent_name` in the request configuration.
+
+Agent names used for memory storage are validated against `AGENT_NAME_PATTERN` to ensure filesystem safety.
+
+## Storage location
+
+By default, memory files are stored under the backend base directory:
+
+- Base directory: `backend/.deer-flow/`
+- Global memory: `backend/.deer-flow/memory.json`
+- Per-agent memory: `backend/.deer-flow/agents/{agent_name}/memory.json`
+
+You can change the storage path with the `storage_path` field. Relative paths are resolved against the base directory. Use an absolute path to store memory in a custom location.
+
+## Custom storage backend
+
+The `storage_class` field allows you to replace the default file-based storage with a custom implementation. Any class that extends `MemoryStorage` and implements `load()`, `reload()`, and `save()` methods can be used:
+
+```yaml
+memory:
+  storage_class: mypackage.storage.RedisMemoryStorage
+```
+
+If the configured class cannot be loaded, the system falls back to the default `FileMemoryStorage` and logs an error.
+
+## Disabling memory
+
+To disable memory entirely:
+
+```yaml
+memory:
+  enabled: false
+```
+
+To keep memory storage but prevent injection into the system prompt:
+
+```yaml
+memory:
+  enabled: true
+  injection_enabled: false
+```
+
+<Cards num={2}>
+  <Cards.Card title="Configuration" href="/docs/harness/configuration" />
+  <Cards.Card title="Middlewares" href="/docs/harness/middlewares" />
+</Cards>
diff --git a/frontend/src/content/en/harness/middlewares.mdx b/frontend/src/content/en/harness/middlewares.mdx
new file mode 100644
index 000000000..fca391cf3
--- /dev/null
+++ b/frontend/src/content/en/harness/middlewares.mdx
@@ -0,0 +1,217 @@
+---
+title: Middlewares
+description: Every time the Lead Agent calls the LLM, it runs through a **middleware chain** before and after the model call. Middlewares can read and modify the agent's state, inject content into the system prompt, intercept tool calls, and react to model outputs.
+---
+
+import { Callout } from "nextra/components";
+
+# Middlewares
+
+<Callout type="info" emoji="🔌">
+  Middlewares wrap every LLM turn in the Lead Agent. They are the primary
+  extension point for adding cross-cutting behaviors like memory, summarization,
+  clarification, and token tracking.
+</Callout>
+
+Every time the Lead Agent calls the LLM, it runs through a **middleware chain** before and after the model call. Middlewares can read and modify the agent's state, inject content into the system prompt, intercept tool calls, and react to model outputs.
+
+This design keeps the agent core simple and stable while allowing rich, composable behaviors to be layered in.
+
+## How the chain works
+
+The middleware chain is built once per agent invocation, based on the current configuration and request parameters. The middlewares run in a defined order:
+
+1. Runtime middlewares (error handling, thread data, uploads, dangling tool call patching)
+2. `SummarizationMiddleware` — context compression (if enabled)
+3. `TodoMiddleware` — task list management (plan mode only)
+4. `TokenUsageMiddleware` — token tracking (if enabled)
+5. `TitleMiddleware` — automatic thread title generation
+6. `MemoryMiddleware` — cross-session memory injection and queuing
+7. `ViewImageMiddleware` — image details injection (if model supports vision)
+8. `DeferredToolFilterMiddleware` — hides deferred tool schemas (if tool search enabled)
+9. `SubagentLimitMiddleware` — limits parallel subagent calls (if subagents enabled)
+10. `LoopDetectionMiddleware` — breaks repetitive tool call loops
+11. Custom middlewares (if any)
+12. `ClarificationMiddleware` — intercepts clarification requests (always last)
+
+The ordering is significant. Summarization runs early to reduce context before other processing. Clarification always runs last so it can intercept after all other middlewares have had their turn.
+
+## Middleware reference
+
+### ClarificationMiddleware
+
+Intercepts clarification tool calls and converts them into proper user-facing requests for additional information. When the model decides it needs to ask the user something before proceeding, this middleware surfaces that request.
+
+**Configuration**: controlled by `guardrails.clarification` settings.
+
+---
+
+### LoopDetectionMiddleware
+
+Detects when the agent is making the same tool call repeatedly without making progress. When a loop is detected, the middleware intervenes to break the cycle and prevents the agent from burning turns indefinitely.
+
+**Configuration**: built-in, no user configuration.
+
+---
+
+### MemoryMiddleware
+
+Reads persisted memory facts at the start of each conversation and injects them into the system prompt. After a conversation ends, queues a background update to incorporate any new information into the memory store.
+
+**Configuration**: see the [Memory](/docs/harness/memory) page and the `memory:` section in `config.yaml`.
+
+```yaml
+memory:
+  enabled: true
+  injection_enabled: true
+  max_injection_tokens: 2000
+  debounce_seconds: 30
+```
+
+---
+
+### SubagentLimitMiddleware
+
+Limits the number of parallel subagent task calls the agent can make in a single turn. This prevents the agent from spawning an unbounded number of concurrent subagents.
+
+**Configuration**: `subagent_enabled` and `max_concurrent_subagents` in the per-request config.
+
+---
+
+### TitleMiddleware
+
+Automatically generates a title for the thread after the first exchange. The title is derived from the user's first message and the agent's response.
+
+**Configuration**: `title:` section in `config.yaml`.
+
+```yaml
+title:
+  enabled: true
+  max_words: 6
+  max_chars: 60
+  model_name: null  # use default model
+```
+
+---
+
+### TodoMiddleware
+
+When plan mode is active, maintains a structured task list visible to the user. The agent uses the `write_todos` tool to mark tasks as `pending`, `in_progress`, or `completed` as it works through a complex objective.
+
+**Activation**: enabled automatically when `is_plan_mode: true` is set in the request configuration. No `config.yaml` entry required.
+
+---
+
+### TokenUsageMiddleware
+
+Tracks LLM token consumption per model call and logs it at the `info` level. Useful for monitoring costs and understanding where tokens are going in long tasks.
+
+**Configuration**: `token_usage:` section in `config.yaml`.
+
+```yaml
+token_usage:
+  enabled: false
+```
+
+---
+
+### SandboxAuditMiddleware
+
+Audits sandbox operations performed during the agent's execution. Provides a record of what files were read, written, and what commands were run.
+
+**Configuration**: built-in runtime middleware, always active when a sandbox is available.
+
+---
+
+### SummarizationMiddleware
+
+When the conversation grows long, summarizes older messages to reduce context size. The summary is injected back into the conversation in place of the original messages, preserving meaning without the full token cost.
+
+**Configuration**: `summarization:` section in `config.yaml`. See detailed configuration below.
+
+---
+
+### ViewImageMiddleware
+
+When the current model supports vision (`supports_vision: true`), this middleware intercepts `view_image` tool calls and injects the image content directly into the model's context so it can be analyzed.
+
+**Activation**: automatically enabled when the resolved model has `supports_vision: true`.
+
+---
+
+### DeferredToolFilterMiddleware
+
+When tool search is enabled, this middleware hides deferred tool schemas from the model's context. Tools are discovered lazily via the `tool_search` tool instead of being listed upfront, reducing context usage.
+
+**Configuration**: `tool_search.enabled: true` in `config.yaml`.
+
+## Summarization configuration
+
+The `SummarizationMiddleware` is one of the most impactful middlewares for long-horizon tasks. Here is the full configuration reference:
+
+```yaml
+summarization:
+  enabled: true
+
+  # Model to use for summarization (null = use default model)
+  # A lightweight model like gpt-4o-mini is recommended to reduce cost.
+  model_name: null
+
+  # Trigger conditions — summarization runs when ANY threshold is met
+  trigger:
+    - type: tokens      # trigger when context exceeds N tokens
+      value: 15564
+    # - type: messages  # trigger when there are more than N messages
+    #   value: 50
+    # - type: fraction  # trigger when context exceeds X% of model max
+    #   value: 0.8
+
+  # How much recent history to keep after summarization
+  keep:
+    type: messages
+    value: 10           # keep the 10 most recent messages
+    # Alternative: keep by tokens
+    # type: tokens
+    # value: 3000
+
+  # Maximum tokens to trim when preparing messages for the summarizer
+  trim_tokens_to_summarize: 15564
+
+  # Custom summary prompt (null = use default LangChain prompt)
+  summary_prompt: null
+```
+
+**Trigger types**:
+- `tokens`: triggers when the total token count in the conversation exceeds `value`.
+- `messages`: triggers when the number of messages exceeds `value`.
+- `fraction`: triggers when the context reaches `value` fraction of the model's maximum input token limit.
+
+Multiple triggers can be listed; summarization runs when **any** of them fires.
+
+**Keep types**:
+- `messages`: keep the last `value` messages after summarization.
+- `tokens`: keep up to `value` tokens of recent history.
+- `fraction`: keep up to `value` fraction of the model's max input token limit.
+
+## Writing a custom middleware
+
+Custom middlewares can be injected into the chain for specialized use cases. A middleware must implement the `AgentMiddleware` interface from `langchain.agents.middleware`.
+
+The basic structure is:
+
+```python
+from langchain.agents.middleware import AgentMiddleware
+
+class MyMiddleware(AgentMiddleware):
+    async def on_start(self, state, config):
+        # Runs before the model call
+        # Modify state or config here
+        return state, config
+
+    async def on_end(self, state, config):
+        # Runs after the model call
+        # Inspect or modify the result
+        return state, config
+```
+
+Custom middlewares are passed to `make_lead_agent` via the `custom_middlewares` parameter in `_build_middlewares`. They are injected immediately before `ClarificationMiddleware` at the end of the chain.
diff --git a/frontend/src/content/en/harness/quick-start.mdx b/frontend/src/content/en/harness/quick-start.mdx
index 81c102836..5327e6773 100644
--- a/frontend/src/content/en/harness/quick-start.mdx
+++ b/frontend/src/content/en/harness/quick-start.mdx
@@ -1,3 +1,115 @@
+---
+title: Quick Start
+description: Learn how to create and run a DeerFlow agent with create_deerflow_agent, from model setup to streaming responses.
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
 # Quick Start
 
-TBD
+<Callout type="info" emoji="🚀">
+  This guide shows you how to build and run a DeerFlow agent in Python with
+  <code>create_deerflow_agent</code>.
+</Callout>
+
+The fastest way to understand DeerFlow Harness is to create an agent directly in code. This quick start walks through model setup, agent creation, and streaming a response.
+
+## Prerequisites
+
+DeerFlow Harness requires Python 3.12 or later. The package is part of the `deerflow` repository under `backend/packages/harness`.
+
+If you are working from the repository clone:
+
+```bash
+cd backend
+uv sync
+```
+
+You will also need a chat model instance from the LangChain provider package you want to use.
+
+## Create your first agent
+
+<Steps>
+
+### Import the factory and model
+
+```python
+from deerflow.agents import create_deerflow_agent
+from langchain_openai import ChatOpenAI
+```
+
+### Create a model
+
+```python
+model = ChatOpenAI(
+    model="gpt-4o",
+    api_key="YOUR_OPENAI_API_KEY",
+)
+```
+
+### Create an agent
+
+```python
+agent = create_deerflow_agent(model)
+```
+
+This returns a compiled LangGraph agent with DeerFlow's default middleware chain.
+
+### Stream a response
+
+```python
+for event in agent.stream(
+    {"messages": [{"role": "user", "content": "Explain what DeerFlow Harness is."}]},
+    stream_mode=["messages", "values"],
+):
+    print(event)
+```
+
+</Steps>
+
+## Add tools or behavior
+
+You can customize the agent by passing tools, a system prompt, runtime features, middleware, or a checkpointer.
+
+```python
+from deerflow.agents import RuntimeFeatures, create_deerflow_agent
+
+agent = create_deerflow_agent(
+    model,
+    system_prompt="You are a concise research assistant.",
+    features=RuntimeFeatures(subagent=True, memory=False),
+    plan_mode=True,
+    name="research-agent",
+)
+```
+
+Common parameters:
+
+| Parameter | Description |
+|---|---|
+| `tools` | Additional tools available to the agent |
+| `system_prompt` | Custom system prompt |
+| `features` | Enable or replace built-in runtime features |
+| `extra_middleware` | Insert custom middleware into the default chain |
+| `plan_mode` | Enable Todo-style task tracking |
+| `checkpointer` | Persist agent state across runs |
+| `name` | Logical agent name |
+
+## When to use DeerFlowClient instead
+
+`create_deerflow_agent()` is the low-level SDK factory when you want to work directly with the compiled agent graph.
+
+Use `DeerFlowClient` when you want the higher-level embedded app interface, such as:
+
+- thread-oriented chat helpers,
+- model / skills / memory management APIs,
+- file uploads and artifacts,
+- Gateway-like response formats.
+
+## Next steps
+
+<Cards num={3}>
+  <Cards.Card title="Design Principles" href="/docs/harness/design-principles" />
+  <Cards.Card title="Lead Agent" href="/docs/harness/lead-agent" />
+  <Cards.Card title="Configuration" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/en/harness/sandbox.mdx b/frontend/src/content/en/harness/sandbox.mdx
index 318d5475f..203e5c44d 100644
--- a/frontend/src/content/en/harness/sandbox.mdx
+++ b/frontend/src/content/en/harness/sandbox.mdx
@@ -1,3 +1,155 @@
+---
+title: Sandbox
+description: The sandbox gives the Lead Agent a controlled environment where it can read files, write outputs, run shell commands, and produce artifacts. Without a sandbox, the agent can only generate text. With a sandbox, it can write and execute code, process data files, generate charts, and build deliverables.
+---
+
+import { Callout, Cards, Tabs } from "nextra/components";
+
 # Sandbox
 
-TBD
+<Callout type="info" emoji="📦">
+  The sandbox is the isolated workspace where the agent does file and
+  command-based work. It is what makes DeerFlow capable of real action, not
+  just conversation.
+</Callout>
+
+The sandbox gives the Lead Agent a controlled environment where it can read files, write outputs, run shell commands, and produce artifacts. Without a sandbox, the agent can only generate text. With a sandbox, it can write and execute code, process data files, generate charts, and build deliverables.
+
+## Sandbox modes
+
+DeerFlow supports three sandbox modes. Choose the one that fits your deployment:
+
+### LocalSandbox (default)
+
+Commands run directly on the host machine's filesystem. There is no container isolation.
+
+- **Best for**: trusted, single-user local development workflows.
+- **Risk**: the agent has access to the host filesystem. Use `allow_host_bash: false` (default) to prevent arbitrary command execution.
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+  allow_host_bash: false  # default; set to true only for fully trusted workflows
+```
+
+### Container-based AIO Sandbox
+
+Commands run in an isolated container (Docker on Linux/Windows, or Apple Container on macOS). Each sandbox session gets a fresh container environment.
+
+- **Best for**: multi-user environments, production deployments, or any case where you want execution isolation.
+
+```yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+
+  # Optional: container image (default shown below)
+  image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+
+  # Optional: max concurrent containers (default: 3, LRU eviction when exceeded)
+  replicas: 3
+
+  # Optional: container name prefix (default: deer-flow-sandbox)
+  container_prefix: deer-flow-sandbox
+
+  # Optional: idle timeout in seconds (default: 600)
+  idle_timeout: 600
+
+  # Optional: custom mounts
+  mounts:
+    - host_path: /path/on/host
+      container_path: /home/user/shared
+      read_only: false
+
+  # Optional: environment variables injected into the container
+  environment:
+    API_KEY: $MY_API_KEY
+```
+
+Install: `cd backend && uv add 'deerflow-harness[aio-sandbox]'`
+
+### Provisioner-managed Sandbox (Kubernetes)
+
+Each sandbox gets a dedicated Pod in a Kubernetes cluster, managed by the provisioner service. This provides the strongest isolation and is recommended for production environments with multiple concurrent users.
+
+```yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+  provisioner_url: http://provisioner:8002
+```
+
+The provisioner service is included in `docker/docker-compose-dev.yaml` and manages the Pod and Service lifecycle for each sandbox ID.
+
+## Path mappings
+
+The sandbox uses path mappings to bridge the host filesystem and the container's virtual filesystem. Two key mappings are always configured:
+
+| Host path | Container path | Access |
+|---|---|---|
+| `skills/` (from `skills.path`) | `/mnt/skills` (from `skills.container_path`) | Read-only |
+| `.deer-flow/threads/{thread_id}/user-data/` | `/mnt/user-data/` | Read-write |
+
+The skills directory is always mounted read-only. Threads write their working data (uploads, outputs, intermediate files) to `/mnt/user-data/`.
+
+### Custom mounts
+
+You can add additional mounts for the local sandbox using the `mounts:` configuration:
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+  mounts:
+    - host_path: /home/user/my-project
+      container_path: /mnt/my-project
+      read_only: true
+```
+
+<Callout type="warning">
+  Custom mount `container_path` values must not conflict with reserved prefixes:
+  `/mnt/skills`, `/mnt/acp-workspace`, or `/mnt/user-data`.
+</Callout>
+
+## Output truncation
+
+The sandbox tools limit output size to keep the agent's context manageable. These limits are configurable:
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+  # bash uses middle-truncation (head + tail)
+  bash_output_max_chars: 20000
+
+  # read_file uses head-truncation
+  read_file_output_max_chars: 50000
+
+  # ls uses head-truncation
+  ls_output_max_chars: 20000
+```
+
+Set to `0` to disable truncation.
+
+## Security
+
+### LocalSandbox
+
+The `LocalSandbox` runs commands directly on the host. By default, the `bash` tool is **disabled** to prevent arbitrary host command execution. Enable it only for fully trusted, single-user workflows:
+
+```yaml
+sandbox:
+  allow_host_bash: true  # Dangerous: grants the agent shell access to your machine
+```
+
+Even without `bash`, the agent can still read and write files through the dedicated file tools.
+
+### Container sandbox
+
+Container-based sandboxes provide filesystem and process isolation. The agent cannot see or modify the host filesystem except through explicit mounts. The provisioner-managed mode adds a further layer: each thread gets its own isolated Pod.
+
+### Audit middleware
+
+`SandboxAuditMiddleware` runs on every agent turn and records all sandbox operations. This provides an audit trail of what files were accessed and what commands were run during a session.
+
+<Cards num={2}>
+  <Cards.Card title="Tools" href="/docs/harness/tools" />
+  <Cards.Card title="Subagents" href="/docs/harness/subagents" />
+</Cards>
diff --git a/frontend/src/content/en/harness/skills.mdx b/frontend/src/content/en/harness/skills.mdx
index 375984c3f..45f232b37 100644
--- a/frontend/src/content/en/harness/skills.mdx
+++ b/frontend/src/content/en/harness/skills.mdx
@@ -1,3 +1,165 @@
+---
+title: Skills
+description: A skill is more than a prompt. It is a self-contained capability package that can include structured instructions, step-by-step workflows, domain-specific best practices, supporting resources, and tool configurations. Skills are loaded on demand — they inject their content when a task calls for them and stay out of the context otherwise.
+---
+
+import { Callout, Cards, FileTree, Steps } from "nextra/components";
+
 # Skills
 
-TBD
+<Callout type="info" emoji="🎯">
+  Skills are task-oriented capability packages that teach the agent how to do a
+  specific class of work. The base agent stays general; skills provide
+  specialization only when needed.
+</Callout>
+
+A skill is more than a prompt. It is a self-contained capability package that can include structured instructions, step-by-step workflows, domain-specific best practices, supporting resources, and tool configurations. Skills are loaded on demand — they inject their content when a task calls for them and stay out of the context otherwise.
+
+## What a skill contains
+
+Each skill lives in its own subdirectory under `skills/public/` (or `skills/custom/` for user-created skills). The directory contains a `SKILL.md` file that defines the skill's metadata, instructions, and workflow.
+
+<FileTree>
+  <FileTree.Folder name="skills/" defaultOpen>
+    <FileTree.Folder name="public/" defaultOpen>
+      <FileTree.Folder name="deep-research/" defaultOpen>
+        <FileTree.File name="SKILL.md" />
+      </FileTree.Folder>
+      <FileTree.Folder name="data-analysis/">
+        <FileTree.File name="SKILL.md" />
+      </FileTree.Folder>
+      <FileTree.Folder name="chart-visualization/">
+        <FileTree.File name="SKILL.md" />
+      </FileTree.Folder>
+    </FileTree.Folder>
+    <FileTree.Folder name="custom/">
+      <FileTree.File name="(your custom skills go here)" />
+    </FileTree.Folder>
+  </FileTree.Folder>
+</FileTree>
+
+The `SKILL.md` file is the authoritative definition of the skill. It is parsed by `skills/parser.py` to extract the skill name, description, category, instructions, and any dependencies or tool requirements.
+
+## Built-in skills
+
+DeerFlow ships with the following public skills:
+
+| Skill | Description |
+|---|---|
+| `deep-research` | Multi-step research with source gathering, cross-checking, and structured output |
+| `data-analysis` | Data exploration, statistical analysis, and insight generation |
+| `chart-visualization` | Chart and graph creation from data |
+| `ppt-generation` | Presentation slide generation |
+| `image-generation` | AI image generation workflows |
+| `code-documentation` | Automated code documentation generation |
+| `newsletter-generation` | Newsletter content creation |
+| `podcast-generation` | Podcast script and outline generation |
+| `academic-paper-review` | Structured academic paper analysis |
+| `consulting-analysis` | Business consulting frameworks and analysis |
+| `systematic-literature-review` | Literature review methodology and synthesis |
+| `github-deep-research` | Repository and code deep-dive research |
+| `frontend-design` | Frontend design and UI workflow |
+| `web-design-guidelines` | Web design standards and review |
+| `video-generation` | Video content planning and generation |
+
+## Skill lifecycle
+
+<Steps>
+
+### Discovery and loading
+
+`load_skills()` in `skills/loader.py` scans both `public/` and `custom/` directories under the configured skills path. It re-reads `ExtensionsConfig.from_file()` on every call, which means enabling or disabling a skill through the Gateway API takes effect immediately in the running LangGraph server without a restart.
+
+### Parsing
+
+`parser.py` reads each `SKILL.md` file and extracts structured metadata: name, description, category, instructions, and any tool or resource requirements.
+
+### Security scanning
+
+`security_scanner.py` checks skill content for potentially dangerous patterns before it is loaded into the agent's context. This step runs during skill loading to prevent malicious skill content from being injected.
+
+### Dependency installation
+
+`installer.py` handles any Python or system dependencies declared by the skill. Dependencies are installed into the runtime environment when the skill is first loaded.
+
+### Context injection
+
+When the agent is invoked with a specific skill in scope, the skill's instructions are injected into the system prompt. The agent then has access to the skill's workflow, best practices, and domain knowledge for the duration of that conversation.
+
+</Steps>
+
+## Configuration
+
+The skills system is configured under `skills:` in `config.yaml`:
+
+```yaml
+skills:
+  # Path to skills directory on the host.
+  # Default: ../skills relative to the backend directory.
+  # Uncomment to customize:
+  # path: /absolute/path/to/custom/skills
+
+  # Path where skills are mounted in the sandbox container.
+  # The agent uses this path to access skill files during execution.
+  container_path: /mnt/skills
+```
+
+The `container_path` is important: it tells the agent where to find skill resources inside the sandbox. The harness automatically mounts the host skills directory to this container path.
+
+## Enabling and disabling skills
+
+Skill availability is tracked in `extensions_config.json` (separate from `config.yaml`). You can manage skill state:
+
+- **Through the DeerFlow App UI**: the skills panel lets you toggle skills on and off.
+- **Through the Gateway API**: `POST /api/extensions/skills/{name}/enable` and `/disable`.
+- **By editing `extensions_config.json` directly**.
+
+Because `load_skills()` re-reads the extensions config on every call, changes take effect immediately — no server restart required.
+
+## Restricting skills per custom agent
+
+A custom agent can be restricted to a specific subset of skills. In the agent's config (stored in `agents/{name}/config.yaml`), set a `skills` list:
+
+```yaml
+# agents/my-researcher/config.yaml
+name: my-researcher
+skills:
+  - deep-research
+  - academic-paper-review
+  # Omit all other skills
+```
+
+- **Omitted or null**: the agent loads all globally enabled skills.
+- **Empty list `[]`**: the agent has no skills.
+- **Named list**: the agent loads only those specific skills.
+
+## Skill evolution
+
+DeerFlow includes an optional **skill evolution** feature that allows the agent to autonomously create and improve skills in the `skills/custom/` directory:
+
+```yaml
+skill_evolution:
+  enabled: false  # Set to true to allow agent-managed skill creation
+  moderation_model_name: null  # Model for security scanning (null = use default)
+```
+
+<Callout type="warning">
+  Enable skill evolution only in environments where you trust the agent's
+  outputs. Newly created skills are security-scanned before being loaded, but
+  the feature gives the agent write access to the skills directory.
+</Callout>
+
+## Writing a custom skill
+
+To create a custom skill:
+
+1. Create a new directory under `skills/custom/your-skill-name/`
+2. Add a `SKILL.md` file that defines the skill's metadata and instructions
+3. The skill will be discovered automatically on the next `load_skills()` call
+
+The `SKILL.md` format follows the same structure as the built-in skills. Use one of the existing public skills as a reference for the expected format.
+
+<Cards num={2}>
+  <Cards.Card title="Sandbox" href="/docs/harness/sandbox" />
+  <Cards.Card title="Tools" href="/docs/harness/tools" />
+</Cards>
diff --git a/frontend/src/content/en/harness/subagents.mdx b/frontend/src/content/en/harness/subagents.mdx
new file mode 100644
index 000000000..193f1aa0c
--- /dev/null
+++ b/frontend/src/content/en/harness/subagents.mdx
@@ -0,0 +1,136 @@
+---
+title: Subagents
+description: When a task is too broad for a single reasoning thread, or when parts of it can be done in parallel, the Lead Agent delegates work to **subagents**. A subagent is a self-contained agent invocation that receives a specific task, executes it, and returns the result.
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# Subagents
+
+<Callout type="info" emoji="👥">
+  Subagents are focused workers that the Lead Agent delegates subtasks to. They
+  run with isolated context, keeping the main conversation clean while handling
+  parallel or specialized work.
+</Callout>
+
+When a task is too broad for a single reasoning thread, or when parts of it can be done in parallel, the Lead Agent delegates work to **subagents**. A subagent is a self-contained agent invocation that receives a specific task, executes it, and returns the result.
+
+## Why subagents matter
+
+Subagents solve two key problems in long-horizon workflows:
+
+1. **Context isolation**: a subagent only sees the information it needs for its piece of the task, not the entire parent conversation. This keeps each agent's working context focused and tractable.
+2. **Parallelism**: multiple subagents can run concurrently, allowing independent parts of a task (e.g., researching multiple topics simultaneously) to be processed in parallel.
+
+## Built-in subagents
+
+DeerFlow ships with two built-in subagents:
+
+### general-purpose
+
+A general-purpose reasoning and execution agent. Suitable for delegating complex subtasks that require multi-step reasoning, web search, file operations, and artifact production.
+
+- **Default timeout**: 900 seconds (15 minutes)
+- **Default max turns**: 160
+
+### bash
+
+A subagent specialized for command-line task execution inside the sandbox. Suitable for scripting, data processing, file transformation, and environment setup tasks.
+
+- **Default timeout**: 900 seconds (15 minutes)
+- **Default max turns**: 80
+- **Availability**: only exposed when the sandbox's `bash` tool is available (either `allow_host_bash: true` or a container sandbox is configured)
+
+## Delegation flow
+
+The Lead Agent delegates work to a subagent using the built-in `task` tool:
+
+```
+task(
+  agent="general-purpose",
+  task="Research the top 5 competitors of Acme Corp and summarize their pricing",
+  context="Focus on B2B SaaS pricing models"
+)
+```
+
+The runtime then:
+
+1. Looks up the subagent configuration from the registry, applying any `config.yaml` overrides.
+2. Creates a new agent invocation with the subagent's own prompt and tools.
+3. Runs the subagent to completion (or until timeout / max turns).
+4. Returns the subagent's final output to the Lead Agent as the tool result.
+
+## Configuration
+
+Subagent timeouts and max turns are controlled through the `subagents:` section in `config.yaml`:
+
+```yaml
+subagents:
+  # Default timeout in seconds for all subagents (default: 900 = 15 minutes)
+  timeout_seconds: 900
+
+  # Optional: override max turns for all subagents
+  # max_turns: 120
+
+  # Optional: per-agent overrides
+  agents:
+    general-purpose:
+      timeout_seconds: 1800  # 30 minutes for complex tasks
+      max_turns: 160
+    bash:
+      timeout_seconds: 300   # 5 minutes for quick commands
+      max_turns: 80
+```
+
+Per-agent overrides take priority over the global `timeout_seconds` and `max_turns` settings.
+
+## Concurrency limits
+
+The `SubagentLimitMiddleware` controls how many subagents the Lead Agent can invoke in parallel in a single turn. This is controlled through the per-request configuration:
+
+- `subagent_enabled`: whether subagent delegation is active for this session
+- `max_concurrent_subagents`: maximum parallel task calls in one turn (default: 3)
+
+If the agent tries to call more subagents than the limit allows, the middleware trims the excess calls.
+
+## ACP agents (external agents)
+
+In addition to the built-in subagents, DeerFlow supports delegating to external agents through the **Agent Connect Protocol (ACP)**. ACP allows DeerFlow to invoke agents running as separate processes (including third-party CLI tools wrapped with an ACP adapter).
+
+Configure ACP agents in `config.yaml`:
+
+```yaml
+acp_agents:
+  claude_code:
+    command: npx
+    args: ["-y", "@zed-industries/claude-agent-acp"]
+    description: Claude Code for implementation, refactoring, and debugging
+    model: null
+    # auto_approve_permissions: false
+    # env:
+    #   ANTHROPIC_API_KEY: $ANTHROPIC_API_KEY
+
+  codex:
+    command: npx
+    args: ["-y", "@zed-industries/codex-acp"]
+    description: Codex CLI for repository tasks and code generation
+    model: null
+```
+
+The Lead Agent invokes ACP agents through the `invoke_acp_agent` built-in tool.
+
+<Callout type="tip">
+  ACP agents run as child processes managed by DeerFlow. They communicate over
+  the ACP wire protocol. The standard CLI tools (like the plain `claude` or
+  `codex` commands) are not ACP-compatible by default — use the adapter
+  packages listed above or a compatible ACP wrapper.
+</Callout>
+
+## Custom agents as subagents
+
+Custom agents created through the DeerFlow App UI can also be invoked as subagents using the `task` tool. When you specify `agent="my-custom-agent"`, the runtime loads that agent's configuration (skills, tool groups, model) and runs it as a subagent for the delegated task.
+
+<Cards num={2}>
+  <Cards.Card title="Sandbox" href="/docs/harness/sandbox" />
+  <Cards.Card title="MCP Integration" href="/docs/harness/mcp" />
+</Cards>
diff --git a/frontend/src/content/en/harness/tools.mdx b/frontend/src/content/en/harness/tools.mdx
index ad9493e40..005fa3ed3 100644
--- a/frontend/src/content/en/harness/tools.mdx
+++ b/frontend/src/content/en/harness/tools.mdx
@@ -1,3 +1,241 @@
+---
+title: Tools
+description: "The Lead Agent is a tool-calling agent. Tools are how it interacts with the world: searching the web, reading and writing files, running commands, delegating tasks, and presenting outputs to the user."
+---
+
+import { Callout, Cards, Tabs } from "nextra/components";
+
 # Tools
 
-TBD
+<Callout type="info" emoji="🔧">
+  Tools are the actions the Lead Agent can take. DeerFlow provides built-in
+  tools, community integrations, MCP tools, and skill tools — all controlled
+  through <code>config.yaml</code>.
+</Callout>
+
+The Lead Agent is a tool-calling agent. Tools are how it interacts with the world: searching the web, reading and writing files, running commands, delegating tasks, and presenting outputs to the user.
+
+DeerFlow organizes tools into four categories:
+
+1. **Built-in tools** — core runtime capabilities always available to the agent
+2. **Community tools** — integrations with external search, fetch, and image services
+3. **MCP tools** — tools provided by external Model Context Protocol servers
+4. **Skill tools** — tools bundled with specific skill packs
+
+## Built-in tools
+
+Built-in tools are part of the harness and do not require configuration to be available.
+
+### task
+
+Delegates a subtask to a subagent. The Lead Agent uses this tool when a task is too broad for a single reasoning thread or when parallel work would be beneficial.
+
+```
+task(agent="general-purpose", task="...", context="...")
+```
+
+See the [Subagents](/docs/harness/subagents) page for how subagents are configured.
+
+---
+
+### present_files
+
+Presents output files to the user as artifacts. The agent calls this tool after producing a file (report, chart, code, etc.) to surface it in the conversation.
+
+Files at `/mnt/user-data/uploads/*` are copied into `/mnt/user-data/outputs/*` before being presented. The artifact paths are tracked in `ThreadState.artifacts`.
+
+---
+
+### view_image
+
+Reads an image file and injects its content into the model's context for visual analysis. Only available when the active model has `supports_vision: true`.
+
+---
+
+### clarification
+
+Asks the user a clarifying question before proceeding. This is triggered by the `ClarificationMiddleware` when the model decides it does not have enough information to act.
+
+---
+
+### setup_agent
+
+Dynamically configures the current agent session. Used during the bootstrap flow when setting up a new custom agent.
+
+---
+
+### invoke_acp_agent
+
+Invokes an external agent using the [Agent Connect Protocol (ACP)](https://agentconnectprotocol.org/). Requires `acp_agents:` configuration in `config.yaml`. See the [Subagents](/docs/harness/subagents) page for ACP configuration.
+
+---
+
+### tool_search
+
+Searches for tools by name or description and loads them into the agent's context on demand. Only active when `tool_search.enabled: true` in `config.yaml`. Useful when MCP or other tool sets expose many tools and you want to reduce context usage.
+
+## Sandbox file tools
+
+The following tools interact with the sandbox filesystem. They require a sandbox to be configured and active.
+
+| Tool | Description |
+|---|---|
+| `ls` | List files in a directory |
+| `read_file` | Read file contents |
+| `glob` | Find files matching a pattern |
+| `grep` | Search file contents |
+| `write_file` | Write content to a file |
+| `str_replace` | Replace a string in a file |
+| `bash` | Execute a shell command (requires `allow_host_bash: true` or a container sandbox) |
+
+These are configured in `config.yaml` under `tools:`:
+
+```yaml
+tools:
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:glob_tool
+  - use: deerflow.sandbox.tools:grep_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:str_replace_tool
+  - use: deerflow.sandbox.tools:bash_tool  # requires host bash or container sandbox
+```
+
+## Community tools
+
+Community tools connect the agent to external services. They are configured in `config.yaml` under `tools:` using the `use:` field to specify the implementation.
+
+### Web search
+
+<Tabs items={["DuckDuckGo (default)", "Tavily", "Exa", "InfoQuest", "Firecrawl"]}>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+```
+No API key required. Default configuration. Suitable for development and general use.
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.tavily.tools:web_search_tool
+    api_key: $TAVILY_API_KEY
+```
+High-quality search with structured results. Requires a [Tavily](https://tavily.com) API key.
+
+Install: `cd backend && uv add 'deerflow-harness[tavily]'`
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.exa.tools:web_search_tool
+    api_key: $EXA_API_KEY
+```
+Semantic search with neural retrieval. Requires an [Exa](https://exa.ai) API key.
+
+Install: `cd backend && uv add 'deerflow-harness[exa]'`
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.infoquest.tools:web_search_tool
+    api_key: $INFOQUEST_API_KEY
+```
+InfoQuest search integration.
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.firecrawl.tools:web_search_tool
+    api_key: $FIRECRAWL_API_KEY
+```
+Firecrawl-powered search and crawl. Requires a [Firecrawl](https://firecrawl.dev) API key.
+
+Install: `cd backend && uv add 'deerflow-harness[firecrawl]'`
+  </Tabs.Tab>
+</Tabs>
+
+### Web fetch (page content extraction)
+
+<Tabs items={["Jina AI (default)", "Exa", "InfoQuest", "Firecrawl"]}>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+    api_key: $JINA_API_KEY  # optional; anonymous usage has rate limits
+```
+Converts web pages to clean Markdown. Works without an API key at reduced rate limits.
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.exa.tools:web_fetch_tool
+    api_key: $EXA_API_KEY
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.infoquest.tools:web_fetch_tool
+    api_key: $INFOQUEST_API_KEY
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.firecrawl.tools:web_fetch_tool
+    api_key: $FIRECRAWL_API_KEY
+```
+  </Tabs.Tab>
+</Tabs>
+
+### Image search
+
+```yaml
+tools:
+  - use: deerflow.community.image_search.tools:image_search_tool
+  # Or use InfoQuest:
+  # - use: deerflow.community.infoquest.tools:image_search_tool
+  #   api_key: $INFOQUEST_API_KEY
+```
+
+## Tool groups
+
+Tool groups let you organize tools into named sets and restrict which groups a custom agent can access.
+
+```yaml
+tool_groups:
+  - name: research
+    tools:
+      - web_search
+      - web_fetch
+      - image_search
+  - name: coding
+    tools:
+      - bash
+      - read_file
+      - write_file
+      - str_replace
+      - glob
+      - grep
+```
+
+Custom agents can then reference a group by name in their configuration, restricting their tool access to only the relevant set.
+
+## Tool search (deferred loading)
+
+When you have many tools (especially from multiple MCP servers), loading all of them upfront increases context usage and can confuse the model. The tool search feature addresses this:
+
+```yaml
+tool_search:
+  enabled: true
+```
+
+When enabled, tools are not listed in the model's context directly. Instead, they are discoverable at runtime via the `tool_search` built-in tool. The agent searches by name or description and the matching tools are loaded into context on demand.
+
+This is particularly useful when MCP servers expose dozens of tools.
+
+<Cards num={2}>
+  <Cards.Card title="MCP Integration" href="/docs/harness/mcp" />
+  <Cards.Card title="Skills" href="/docs/harness/skills" />
+</Cards>
diff --git a/frontend/src/content/en/introduction/core-concepts.mdx b/frontend/src/content/en/introduction/core-concepts.mdx
index dfed365cb..f7203ae1e 100644
--- a/frontend/src/content/en/introduction/core-concepts.mdx
+++ b/frontend/src/content/en/introduction/core-concepts.mdx
@@ -1,3 +1,8 @@
+---
+title: Core Concepts
+description: Before you go deeper into DeerFlow, it helps to anchor on a few concepts that appear throughout the system. These concepts explain what DeerFlow is optimizing for and why its architecture looks the way it does.
+---
+
 import { Callout, Cards } from "nextra/components";
 
 # Core Concepts
diff --git a/frontend/src/content/en/introduction/harness-vs-app.mdx b/frontend/src/content/en/introduction/harness-vs-app.mdx
index e0bbb6d55..9e6b8ece2 100644
--- a/frontend/src/content/en/introduction/harness-vs-app.mdx
+++ b/frontend/src/content/en/introduction/harness-vs-app.mdx
@@ -1,3 +1,8 @@
+---
+title: Harness vs App
+description: "DeerFlow has two layers that are closely related but serve different purposes."
+---
+
 import { Callout, Cards } from "nextra/components";
 
 # Harness vs App
diff --git a/frontend/src/content/en/introduction/why-deerflow.mdx b/frontend/src/content/en/introduction/why-deerflow.mdx
index f7fad9628..66ca91f31 100644
--- a/frontend/src/content/en/introduction/why-deerflow.mdx
+++ b/frontend/src/content/en/introduction/why-deerflow.mdx
@@ -1,3 +1,8 @@
+---
+title: Why DeerFlow
+description: DeerFlow exists because modern agent systems need more than a chat loop. A useful agent must plan over long horizons, break work into sub-tasks, use tools, manipulate files, run code safely, and preserve enough context to stay coherent across a complex task. DeerFlow was built to provide that runtime foundation.
+---
+
 import { Callout, Cards } from "nextra/components";
 
 # Why DeerFlow
diff --git a/frontend/src/content/en/reference/_meta.ts b/frontend/src/content/en/reference/_meta.ts
index cf53ad9a8..835fd6ce0 100644
--- a/frontend/src/content/en/reference/_meta.ts
+++ b/frontend/src/content/en/reference/_meta.ts
@@ -1,20 +1,8 @@
 import type { MetaRecord } from "nextra";
 
 const meta: MetaRecord = {
-  "concepts-glossary": {
-    title: "Concepts Glossary",
-  },
-  "configuration-reference": {
-    title: "Configuration Reference",
-  },
-  "api-gateway-reference": {
-    title: "API / Gateway Reference",
-  },
-  "runtime-flags-and-modes": {
-    title: "Runtime Flags and Modes",
-  },
-  "source-map": {
-    title: "Source Map",
+  "model-providers": {
+    title: "Model providers",
   },
 };
 
diff --git a/frontend/src/content/en/reference/api-gateway-reference.mdx b/frontend/src/content/en/reference/api-gateway-reference.mdx
deleted file mode 100644
index 2a5922d2b..000000000
--- a/frontend/src/content/en/reference/api-gateway-reference.mdx
+++ /dev/null
@@ -1,3 +0,0 @@
-# API / Gateway Reference
-
-TBD
diff --git a/frontend/src/content/en/reference/concepts-glossary.mdx b/frontend/src/content/en/reference/concepts-glossary.mdx
deleted file mode 100644
index 32f000a7e..000000000
--- a/frontend/src/content/en/reference/concepts-glossary.mdx
+++ /dev/null
@@ -1,3 +0,0 @@
-# Concepts Glossary
-
-TBD
diff --git a/frontend/src/content/en/reference/configuration-reference.mdx b/frontend/src/content/en/reference/configuration-reference.mdx
deleted file mode 100644
index cbc914dda..000000000
--- a/frontend/src/content/en/reference/configuration-reference.mdx
+++ /dev/null
@@ -1,3 +0,0 @@
-# Configuration Reference
-
-TBD
diff --git a/frontend/src/content/en/reference/model-providers/_meta.ts b/frontend/src/content/en/reference/model-providers/_meta.ts
new file mode 100644
index 000000000..81fd1b56f
--- /dev/null
+++ b/frontend/src/content/en/reference/model-providers/_meta.ts
@@ -0,0 +1,9 @@
+import type { MetaRecord } from "nextra";
+
+const meta: MetaRecord = {
+  ark: {
+    title: "火山方舟",
+  },
+};
+
+export default meta;
diff --git a/frontend/src/content/en/reference/model-providers/ark.mdx b/frontend/src/content/en/reference/model-providers/ark.mdx
new file mode 100644
index 000000000..5956d691b
--- /dev/null
+++ b/frontend/src/content/en/reference/model-providers/ark.mdx
@@ -0,0 +1,8 @@
+---
+title: Volcano Ark
+description: Integration guide for the Volcano Ark model provider.
+---
+
+# Volcano Ark
+
+## Coding Plan
diff --git a/frontend/src/content/en/reference/model-providers/index.mdx b/frontend/src/content/en/reference/model-providers/index.mdx
new file mode 100644
index 000000000..f9b506a6d
--- /dev/null
+++ b/frontend/src/content/en/reference/model-providers/index.mdx
@@ -0,0 +1,7 @@
+---
+title: Model providers
+description: Integration references for supported model provider services.
+asIndexPage: true
+---
+
+# Model providers
diff --git a/frontend/src/content/en/reference/runtime-flags-and-modes.mdx b/frontend/src/content/en/reference/runtime-flags-and-modes.mdx
deleted file mode 100644
index 76ce6f9ab..000000000
--- a/frontend/src/content/en/reference/runtime-flags-and-modes.mdx
+++ /dev/null
@@ -1,3 +0,0 @@
-# Runtime Flags and Modes
-
-TBD
diff --git a/frontend/src/content/en/reference/source-map.mdx b/frontend/src/content/en/reference/source-map.mdx
deleted file mode 100644
index f5a4823da..000000000
--- a/frontend/src/content/en/reference/source-map.mdx
+++ /dev/null
@@ -1,3 +0,0 @@
-# Source Map
-
-TBD
diff --git a/frontend/src/content/en/tutorials/create-your-first-harness.mdx b/frontend/src/content/en/tutorials/create-your-first-harness.mdx
index b692e09e0..3e94f984c 100644
--- a/frontend/src/content/en/tutorials/create-your-first-harness.mdx
+++ b/frontend/src/content/en/tutorials/create-your-first-harness.mdx
@@ -1,3 +1,104 @@
+---
+title: Create Your First Harness
+description: This tutorial shows you how to use the DeerFlow Harness programmatically — importing and using DeerFlow directly in your Python code rather than through the web interface.
+---
+
+import { Callout, Steps } from "nextra/components";
+
 # Create Your First Harness
 
-TBD
+This tutorial shows you how to use the DeerFlow Harness programmatically — importing and using DeerFlow directly in your Python code rather than through the web interface.
+
+## Prerequisites
+
+- Python 3.12+
+- `uv` installed
+- DeerFlow repository cloned
+
+## Install
+
+```bash
+cd deer-flow/backend
+uv sync
+```
+
+## Create configuration
+
+Create a minimal `config.yaml`:
+
+```yaml
+config_version: 6
+
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+tools:
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+```
+
+## Write the code
+
+<Steps>
+
+### Create a Python file
+
+Create `my_agent.py` in the `backend/` directory:
+
+```python
+import asyncio
+import os
+from deerflow.client import DeerFlowClient
+from deerflow.config import load_config
+
+os.environ["OPENAI_API_KEY"] = "sk-..."
+
+# Load config.yaml
+load_config()
+
+client = DeerFlowClient()
+
+async def main():
+    async for event in client.astream(
+        thread_id="my-first-thread",
+        message="Write a Python fibonacci function with a docstring",
+        config={
+            "configurable": {
+                "model_name": "gpt-4o",
+            }
+        },
+    ):
+        print(event)
+
+asyncio.run(main())
+```
+
+### Run it
+
+```bash
+cd backend
+uv run python my_agent.py
+```
+
+</Steps>
+
+## What the events look like
+
+The stream yields events like:
+
+```python
+{"type": "messages", "data": {"content": "def fibonacci..."}}
+{"type": "thread_state", "data": {"title": "Python Fibonacci Function"}}
+```
+
+## Next steps
+
+- [Use Tools and Skills](/docs/tutorials/use-tools-and-skills)
+- [Harness Quick Start](/docs/harness/quick-start)
diff --git a/frontend/src/content/en/tutorials/deploy-your-own-deerflow.mdx b/frontend/src/content/en/tutorials/deploy-your-own-deerflow.mdx
index 4ecfbaec1..bd62bbb51 100644
--- a/frontend/src/content/en/tutorials/deploy-your-own-deerflow.mdx
+++ b/frontend/src/content/en/tutorials/deploy-your-own-deerflow.mdx
@@ -1,3 +1,79 @@
+---
+title: Deploy Your Own DeerFlow
+description: This tutorial guides you through deploying DeerFlow to a production environment using Docker Compose for multi-user access.
+---
+
+import { Callout, Steps } from "nextra/components";
+
 # Deploy Your Own DeerFlow
 
-TBD
+This tutorial guides you through deploying DeerFlow to a production environment using Docker Compose for multi-user access.
+
+## Prerequisites
+
+- Docker and Docker Compose installed
+- A server or VM (Linux recommended)
+- LLM API key
+
+## Steps
+
+<Steps>
+
+### Clone the repository
+
+```bash
+git clone https://github.com/bytedance/deer-flow.git
+cd deer-flow
+```
+
+### Create the configuration file
+
+```bash
+cp config.example.yaml config.yaml
+```
+
+Edit `config.yaml` to add your model configuration.
+
+### Create the environment variables file
+
+```bash
+cat > .env << EOF
+OPENAI_API_KEY=sk-your-key-here
+DEER_FLOW_ROOT=$(pwd)
+BETTER_AUTH_SECRET=$(openssl rand -base64 32)
+BETTER_AUTH_URL=https://your-domain.com
+EOF
+```
+
+### Start the services
+
+```bash
+docker compose -f docker/docker-compose-dev.yaml up -d
+```
+
+### Verify the deployment
+
+```bash
+# Check all services are healthy
+curl http://localhost:2026/api/models
+
+# Follow logs
+docker compose -f docker/docker-compose-dev.yaml logs -f
+```
+
+Open `http://your-server:2026` in your browser.
+
+</Steps>
+
+## Production checklist
+
+- Configure HTTPS/TLS for nginx
+- Set `BETTER_AUTH_SECRET` to a strong random string (minimum 32 characters)
+- Configure firewall rules to allow only necessary ports
+- Back up `backend/.deer-flow/` directory regularly
+- Consider using a container-based sandbox (`AioSandboxProvider`) for multi-user isolation
+
+## Next steps
+
+- [Full Deployment Guide](/docs/application/deployment-guide)
+- [Operations and Troubleshooting](/docs/application/operations-and-troubleshooting)
diff --git a/frontend/src/content/en/tutorials/first-conversation.mdx b/frontend/src/content/en/tutorials/first-conversation.mdx
index 71ff683cf..271e806d2 100644
--- a/frontend/src/content/en/tutorials/first-conversation.mdx
+++ b/frontend/src/content/en/tutorials/first-conversation.mdx
@@ -1,3 +1,64 @@
+---
+title: First Conversation
+description: This tutorial walks you through your first complete agent conversation in DeerFlow — from launching the app to getting meaningful work done with the agent.
+---
+
+import { Callout, Steps } from "nextra/components";
+
 # First Conversation
 
-TBD
+This tutorial walks you through your first complete agent conversation in DeerFlow — from launching the app to getting meaningful work done with the agent.
+
+## Prerequisites
+
+- DeerFlow app is running (see [Quick Start](/docs/application/quick-start))
+- At least one model is configured in `config.yaml`
+
+## Steps
+
+<Steps>
+
+### Open the workspace
+
+Open [http://localhost:2026](http://localhost:2026) in your browser. You will see the conversation workspace.
+
+### Send your first message
+
+Type a question in the input box, for example:
+
+```
+Research the top 3 most popular open-source LLM frameworks in 2024 and compare their strengths and weaknesses.
+```
+
+Press Enter to send.
+
+### Watch the agent work
+
+You will see the agent start working:
+- Expand the **thinking steps** to see which tools it is calling
+- Watch search results stream in
+- Wait for the final report to be generated
+
+### Interact with the result
+
+Once the report is generated, you can:
+- Ask for more detail on a specific section
+- Ask to export the report as a file (the agent will use the `present_files` tool)
+- Ask to create a chart based on the research findings
+
+</Steps>
+
+## What just happened
+
+The agent used the DeerFlow Harness to:
+1. Receive your message and add it to the thread state
+2. Run the middleware chain (memory injection, title generation)
+3. Call the LLM, which decided to search the web
+4. Execute web search tool calls
+5. Synthesize results into a structured response
+6. Update the thread state with any artifacts produced
+
+## Next steps
+
+- [Use Tools and Skills](/docs/tutorials/use-tools-and-skills)
+- [Workspace Usage](/docs/application/workspace-usage)
diff --git a/frontend/src/content/en/tutorials/use-tools-and-skills.mdx b/frontend/src/content/en/tutorials/use-tools-and-skills.mdx
index 0d1af3a81..dc84a6317 100644
--- a/frontend/src/content/en/tutorials/use-tools-and-skills.mdx
+++ b/frontend/src/content/en/tutorials/use-tools-and-skills.mdx
@@ -1,3 +1,64 @@
+---
+title: Use Tools and Skills
+description: This tutorial shows you how to configure and use tools and skills in DeerFlow to give the agent access to web search, file operations, and domain-specific capabilities.
+---
+
+import { Callout } from "nextra/components";
+
 # Use Tools and Skills
 
-TBD
+This tutorial shows you how to configure and use tools and skills in DeerFlow to give the agent access to web search, file operations, and domain-specific capabilities.
+
+## Configuring tools
+
+Add tools to `config.yaml`:
+
+```yaml
+tools:
+  # Web search
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+
+  # Web content fetching
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+
+  # Sandbox file operations
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:bash_tool
+```
+
+## Enabling skills
+
+Enable skills through the DeerFlow app's extensions panel, or edit `extensions_config.json` directly.
+
+**Via the app UI:**
+1. Open the DeerFlow app
+2. Click the Extensions/Skills icon in the sidebar
+3. Find `deep-research` and toggle it on
+
+## Using a skill for research
+
+With the `deep-research` skill enabled, select it in the conversation input, then send a research request:
+
+```
+Do a deep research on the latest advances in quantum computing, focusing on practical applications.
+```
+
+The agent will run a multi-step research workflow including web search, information synthesis, and report generation.
+
+## Using the data analysis skill
+
+Enable `data-analysis`, then upload a CSV or data file and ask the agent to analyze it:
+
+```
+Analyze this CSV file and identify the top trends.
+```
+
+The agent will use the sandbox tools to read the file, run analysis, and produce charts.
+
+## Next steps
+
+- [Work with Memory](/docs/tutorials/work-with-memory)
+- [Tools Reference](/docs/harness/tools)
+- [Skills Reference](/docs/harness/skills)
diff --git a/frontend/src/content/en/tutorials/work-with-memory.mdx b/frontend/src/content/en/tutorials/work-with-memory.mdx
index f32d0f9ee..8b101aaf4 100644
--- a/frontend/src/content/en/tutorials/work-with-memory.mdx
+++ b/frontend/src/content/en/tutorials/work-with-memory.mdx
@@ -1,3 +1,69 @@
+---
+title: Work with Memory
+description: This tutorial shows you how to enable and use DeerFlow's memory system so the agent remembers important information about you across multiple sessions.
+---
+
+import { Callout } from "nextra/components";
+
 # Work with Memory
 
-TBD
+This tutorial shows you how to enable and use DeerFlow's memory system so the agent remembers important information about you across multiple sessions.
+
+## Enable memory
+
+In `config.yaml`:
+
+```yaml
+memory:
+  enabled: true
+  injection_enabled: true
+  max_injection_tokens: 2000
+  debounce_seconds: 30
+```
+
+## How memory works
+
+Memory works automatically through `MemoryMiddleware`:
+
+1. **First conversation**: tell the agent about your preferences, project, or background.
+2. **Automatic learning**: the agent extracts and saves important facts in the background.
+3. **Future conversations**: memory facts are automatically injected into the system prompt — the agent does not need you to repeat context.
+
+## Example
+
+**First conversation:**
+```
+I am a Python backend developer primarily using FastAPI and PostgreSQL.
+My team follows PEP 8 and prefers type annotations everywhere.
+Please remember this for future code suggestions.
+```
+
+**Later conversation** (no need to repeat background):
+```
+Help me write a user authentication module
+```
+
+The agent will automatically produce FastAPI-style code with type annotations.
+
+## Inspect memory
+
+Memory is stored in `backend/.deer-flow/memory.json`:
+
+```bash
+cat backend/.deer-flow/memory.json
+```
+
+## Per-agent memory
+
+When a custom agent is active, it maintains its own memory file at:
+
+```
+backend/.deer-flow/agents/{agent_name}/memory.json
+```
+
+This keeps each agent's learned knowledge separate.
+
+## Next steps
+
+- [Deploy Your Own DeerFlow](/docs/tutorials/deploy-your-own-deerflow)
+- [Memory System Reference](/docs/harness/memory)
diff --git a/frontend/src/content/zh/_meta.ts b/frontend/src/content/zh/_meta.ts
index 8bd4c5575..9540c441e 100644
--- a/frontend/src/content/zh/_meta.ts
+++ b/frontend/src/content/zh/_meta.ts
@@ -4,12 +4,36 @@ const meta: MetaRecord = {
   index: {
     title: "概览",
   },
+  introduction: {
+    title: "简介",
+  },
+  harness: {
+    title: "DeerFlow Harness",
+  },
+  application: {
+    title: "DeerFlow 应用",
+  },
+  tutorials: {
+    title: "教程",
+  },
+  reference: {
+    title: "参考",
+  },
   workspace: {
     type: "page",
   },
   blog: {
     type: "page",
   },
+  posts: {
+    type: "page",
+  },
+  login: {
+    type: "page",
+  },
+  setup: {
+    type: "page",
+  },
 };
 
 export default meta;
diff --git a/frontend/src/content/zh/application/_meta.ts b/frontend/src/content/zh/application/_meta.ts
new file mode 100644
index 000000000..8cbba95d5
--- /dev/null
+++ b/frontend/src/content/zh/application/_meta.ts
@@ -0,0 +1,27 @@
+import type { MetaRecord } from "nextra";
+
+const meta: MetaRecord = {
+  index: {
+    title: "概览",
+  },
+  "quick-start": {
+    title: "快速上手",
+  },
+  "deployment-guide": {
+    title: "部署指南",
+  },
+  configuration: {
+    title: "配置",
+  },
+  "workspace-usage": {
+    title: "工作区使用",
+  },
+  "agents-and-threads": {
+    title: "Agent 与线程",
+  },
+  "operations-and-troubleshooting": {
+    title: "运维与排障",
+  },
+};
+
+export default meta;
diff --git a/frontend/src/content/zh/application/agents-and-threads.mdx b/frontend/src/content/zh/application/agents-and-threads.mdx
new file mode 100644
index 000000000..0900c733a
--- /dev/null
+++ b/frontend/src/content/zh/application/agents-and-threads.mdx
@@ -0,0 +1,119 @@
+---
+title: Agent 与线程
+description: 了解 DeerFlow 中 Agent 与线程的关系，以及如何管理自定义 Agent 和对话线程。
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
+# Agent 与线程
+
+<Callout type="info" emoji="🤖">
+  Agent 是配置单元——它们定义了一组能力。线程是对话实例，带有持久化状态和历史记录。
+</Callout>
+
+## 自定义 Agent
+
+DeerFlow 允许你创建多个具有不同专业领域的自定义 Agent。每个 Agent 使用 DeerFlow Harness 相同的 Lead Agent 运行时，但具有不同的：
+
+- 模型（例如为一个 Agent 使用 GPT-4o，为另一个使用 Claude）
+- 系统提示和指令
+- 技能（例如专注于数据分析的 Agent 只加载数据分析技能）
+- 工具访问（通过工具组）
+
+### 创建自定义 Agent
+
+<Steps>
+
+#### 通过界面
+
+1. 打开 DeerFlow 应用。
+2. 点击侧边栏中的 **Agents**（Agent 管理）。
+3. 点击 **New Agent**（新建 Agent）。
+4. 填写名称、描述和配置。
+5. 保存——Agent 立即可用，无需重启。
+
+#### 通过 API
+
+```bash
+curl -X POST http://localhost:8001/api/agents \
+  -H "Content-Type: application/json" \
+  -d '{
+    "display_name": "数据分析师",
+    "description": "专业的数据分析和可视化",
+    "skills": ["data-analysis", "chart-visualization"]
+  }'
+```
+
+</Steps>
+
+### Agent 名称和 Slug
+
+创建 Agent 时，`display_name` 对用户显示，系统内部使用自动派生的 ASCII `slug`（`name` 字段）来标识 Agent。
+
+- `display_name`：对用户显示的任意字符串（例如 "数据分析师"）
+- `name`（slug）：用于 API 和文件路径的 ASCII 标识符（例如 `data-analyst`）
+
+如果派生的 slug 与现有 Agent 冲突，`/api/agents/check` 端点会建议一个唯一的替代名称。
+
+### Agent 存储
+
+自定义 Agent 配置存储在 `backend/agents/{name}/config.yaml` 中。你可以直接编辑这些文件——更改在下次 Agent 调用时自动加载。
+
+## 线程生命周期
+
+线程是对话及其所有相关状态的完整封装：消息历史、产出物、待办列表和检查点数据。
+
+### 创建
+
+线程在你发送第一条消息时创建。线程 ID 是自动生成的 UUID（由前端生成），用于标识所有后续请求中的对话。
+
+### 执行
+
+每次你发送消息，LangGraph 从最新检查点恢复线程状态，运行 Lead Agent，并更新状态。流式事件在 Agent 工作时发送到浏览器。
+
+### 检查点和持久化
+
+DeerFlow 在每次 Agent 轮次后自动保存线程状态（如果配置了检查点器）。这允许：
+
+- 在服务器重启后恢复线程。
+- 在长时间间隔后继续对话。
+- 在出现问题时重放或从特定时间点恢复。
+
+配置检查点器：
+
+```yaml
+checkpointer:
+  type: sqlite
+  connection_string: .deer-flow/checkpoints.db
+```
+
+对于生产高负载环境使用 Redis：
+
+```yaml
+checkpointer:
+  type: redis
+  connection_string: redis://localhost:6379/0
+```
+
+### 线程数据目录
+
+每个线程在 `.deer-flow/threads/{thread_id}/` 下有一个专用目录：
+
+```
+.deer-flow/threads/{thread_id}/
+  user-data/
+    uploads/     ← 用户上传的文件
+    outputs/     ← Agent 生成的产出物
+    workspace/   ← 会话工作文件
+```
+
+删除线程会移除其检查点数据，但用户数据目录会单独管理（取决于删除配置）。
+
+### 恢复线程
+
+线程以其持久化状态自动恢复。从侧边栏选择任何过往线程即可从停止处继续——Agent 会记住所有消息和产出物。
+
+<Cards num={2}>
+  <Cards.Card title="工作区使用" href="/docs/application/workspace-usage" />
+  <Cards.Card title="运维与排障" href="/docs/application/operations-and-troubleshooting" />
+</Cards>
diff --git a/frontend/src/content/zh/application/configuration.mdx b/frontend/src/content/zh/application/configuration.mdx
new file mode 100644
index 000000000..a0cb9c7cc
--- /dev/null
+++ b/frontend/src/content/zh/application/configuration.mdx
@@ -0,0 +1,220 @@
+---
+title: 配置
+description: 本页面涵盖 DeerFlow 应用的所有配置层——`config.yaml`、前端环境变量、`extensions_config.json` 和运行时环境变量。
+---
+
+import { Callout, Cards, Tabs } from "nextra/components";
+
+# 配置
+
+本页面涵盖 DeerFlow 应用的所有配置层——`config.yaml`、前端环境变量、`extensions_config.json` 和运行时环境变量。
+
+## config.yaml
+
+`config.yaml` 是 DeerFlow 的主要配置文件。所有 Agent 行为、模型选择、工具加载和运行时功能都由它控制。
+
+参见[配置](/docs/harness/configuration)参考页面了解文件格式和每个章节的详细说明。
+
+### 模型提供商
+
+<Tabs items={["OpenAI", "Claude", "Gemini", "DeepSeek", "Ollama（本地）"]}>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+    thinking_enabled: false
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: claude-sonnet
+    use: langchain_anthropic:ChatAnthropic
+    model: claude-sonnet-4-5
+    api_key: $ANTHROPIC_API_KEY
+    max_tokens: 16000
+    supports_vision: true
+    thinking_enabled: false
+```
+
+启用扩展思考：
+
+```yaml
+  - name: claude-extended-thinking
+    use: langchain_anthropic:ChatAnthropic
+    model: claude-sonnet-4-5
+    api_key: $ANTHROPIC_API_KEY
+    max_tokens: 16000
+    thinking_enabled: true
+    extra_body:
+      thinking:
+        type: enabled
+        budget_tokens: 10000
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: gemini
+    use: langchain_google_genai:ChatGoogleGenerativeAI
+    model: gemini-2.5-pro-preview-03-25
+    api_key: $GEMINI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+    thinking_enabled: false
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: deepseek
+    use: langchain_openai:ChatOpenAI
+    model: deepseek-reasoner
+    api_key: $DEEPSEEK_API_KEY
+    base_url: https://api.deepseek.com
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: false
+    thinking_enabled: true
+```
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+models:
+  - name: ollama-llama
+    use: langchain_ollama:ChatOllama
+    model: llama3.3
+    base_url: http://localhost:11434
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: false
+    thinking_enabled: false
+```
+
+确保 Ollama 已运行并已拉取所需模型：
+
+```bash
+ollama pull llama3.3
+```
+  </Tabs.Tab>
+</Tabs>
+
+### 沙箱
+
+```yaml
+# 本地开发（默认）
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+  allow_host_bash: false
+
+# 基于容器（推荐用于多用户）
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+  replicas: 3
+  idle_timeout: 600
+```
+
+### 工具
+
+参见[工具](/docs/harness/tools)页面了解完整的工具配置参考。快速参考：
+
+```yaml
+tools:
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:bash_tool
+```
+
+### 技能
+
+```yaml
+skills:
+  container_path: /mnt/skills
+  # path: /custom/path/to/skills  # 可选；默认为仓库 skills/ 目录
+```
+
+技能可用性在 `extensions_config.json` 中管理（参见下方）。
+
+### 检查点（线程持久化）
+
+```yaml
+checkpointer:
+  type: sqlite
+  connection_string: .deer-flow/checkpoints.db
+
+# 或使用 Redis（高负载生产环境）：
+# checkpointer:
+#   type: redis
+#   connection_string: redis://localhost:6379/0
+```
+
+## 前端环境变量
+
+前端通过 `.env.local`（本地开发）或 Docker Compose 环境中的环境变量配置。
+
+| 变量 | 必需 | 描述 |
+|---|---|---|
+| `BETTER_AUTH_SECRET` | 是（生产） | 会话管理的密钥（最少 32 个字符） |
+| `BETTER_AUTH_URL` | 推荐 | 你的应用公开 URL（例如 `https://your-domain.com`） |
+| `SKIP_ENV_VALIDATION` | 否 | 设为 `1` 跳过构建时环境变量验证 |
+
+本地开发：
+
+```bash
+# frontend/.env.local
+BETTER_AUTH_SECRET=local-dev-secret-at-least-32-chars
+```
+
+<Callout type="warning">
+  不要在生产中使用 <code>SKIP_ENV_VALIDATION=1</code>。为 <code>BETTER_AUTH_SECRET</code> 设置一个真实值。
+</Callout>
+
+## extensions_config.json
+
+`extensions_config.json` 控制 MCP 服务器和技能的运行时启用状态，独立于 `config.yaml`。
+
+默认位置：项目根目录（与 `config.yaml` 同一目录）。
+
+### MCP 服务器
+
+```json
+{
+  "mcpServers": {
+    "filesystem": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"],
+      "enabled": true
+    }
+  }
+}
+```
+
+### 技能启用状态
+
+技能启用状态会反映在 `extensions_config.json` 中。你可以直接编辑它，或通过 DeerFlow 应用界面进行管理。
+
+## 运行时环境变量
+
+这些变量在 DeerFlow 进程中设置（通过 `.env`、Docker 环境或 shell）：
+
+| 变量 | 默认值 | 描述 |
+|---|---|---|
+| `DEER_FLOW_CONFIG_PATH` | 自动发现 | `config.yaml` 的绝对路径 |
+| `LOG_LEVEL` | `info` | 日志详细程度（`debug`/`info`/`warning`/`error`） |
+| `DEER_FLOW_ROOT` | 仓库根目录 | 用于 Docker 中的技能和线程挂载 |
+| `LANGGRAPH_UPSTREAM` | `langgraph:2024` | nginx 代理的 LangGraph 地址 |
+
+<Cards num={2}>
+  <Cards.Card title="Harness 配置" href="/docs/harness/configuration" />
+  <Cards.Card title="部署指南" href="/docs/application/deployment-guide" />
+</Cards>
diff --git a/frontend/src/content/zh/application/deployment-guide.mdx b/frontend/src/content/zh/application/deployment-guide.mdx
new file mode 100644
index 000000000..ff75c21b0
--- /dev/null
+++ b/frontend/src/content/zh/application/deployment-guide.mdx
@@ -0,0 +1,204 @@
+---
+title: 部署指南
+description: 本指南涵盖 DeerFlow 应用所有支持的部署方式：本地开发、Docker Compose 以及使用 Kubernetes 管理沙箱的生产环境。
+---
+
+import { Callout, Cards, Steps, Tabs } from "nextra/components";
+
+# 部署指南
+
+本指南涵盖 DeerFlow 应用所有支持的部署方式：本地开发、Docker Compose 以及使用 Kubernetes 管理沙箱的生产环境。
+
+## 本地开发部署
+
+本地工作流是运行 DeerFlow 最快的方式，所有服务作为原生进程在你的机器上运行。
+
+<Tabs items={["启动", "停止", "日志"]}>
+  <Tabs.Tab>
+```bash
+make dev
+```
+
+启动的服务：
+
+| 服务 | 端口 | 描述 |
+|---|---|---|
+| LangGraph | 2024 | DeerFlow Harness 运行时 |
+| Gateway API | 8001 | FastAPI 后端 |
+| 前端 | 3000 | Next.js 界面 |
+| nginx | 2026 | 统一反向代理 |
+
+访问地址：**http://localhost:2026**
+  </Tabs.Tab>
+  <Tabs.Tab>
+```bash
+make stop
+```
+
+停止所有四个服务。即使某个服务没有运行也可以安全执行。
+  </Tabs.Tab>
+  <Tabs.Tab>
+```
+logs/langgraph.log   # Agent 运行时日志
+logs/gateway.log     # API Gateway 日志
+logs/frontend.log    # Next.js 开发服务器日志
+logs/nginx.log       # nginx 访问/错误日志
+```
+
+实时追踪日志：
+```bash
+tail -f logs/langgraph.log
+```
+  </Tabs.Tab>
+</Tabs>
+
+## Docker Compose 部署
+
+Docker Compose 在容器中运行所有服务。适用于更接近生产的本地设置或团队环境。
+
+### 前置条件
+
+- Docker（macOS 上的 Docker Desktop 或 OrbStack）
+- 在仓库根目录中已配置的 `config.yaml`
+
+### 开发 Compose
+
+```bash
+# 设置 deer-flow 仓库根目录的绝对路径
+export DEER_FLOW_ROOT=/path/to/deer-flow
+
+docker compose -f docker/docker-compose-dev.yaml up --build
+```
+
+访问地址：**http://localhost:2026**
+
+### 环境变量
+
+在仓库根目录创建 `.env` 文件用于存放密钥和运行时配置：
+
+```bash
+# .env
+OPENAI_API_KEY=sk-...
+DEER_FLOW_ROOT=/absolute/path/to/deer-flow
+BETTER_AUTH_SECRET=your-secret-here-min-32-chars
+```
+
+`docker-compose*.yaml` 文件包含 `env_file: ../.env` 指令，会自动加载该文件。
+
+<Callout type="warning">
+  在部署前始终将 <code>BETTER_AUTH_SECRET</code> 设置为强随机字符串。没有它，前端构建会使用一个公开已知的默认值。
+</Callout>
+
+### 数据持久化
+
+线程数据存储在 `backend/.deer-flow/threads/`。在 Docker 部署中，此目录被绑定挂载到 langgraph 容器中。
+
+为避免容器重建时数据丢失：
+
+1. 将 `DEER_FLOW_ROOT` 设置为仓库根目录的绝对路径。
+2. 验证 `threads/` 和 `skills/` 目录挂载正确。
+
+对于生产环境，使用命名卷或持久化卷声明（PVC）代替主机绑定挂载。
+
+## 生产部署注意事项
+
+### 沙箱模式选择
+
+| 沙箱 | 使用场景 |
+|---|---|
+| `LocalSandboxProvider` | 单用户、受信任的本地工作流 |
+| `AioSandboxProvider`（Docker） | 多用户、中等隔离需求 |
+| `AioSandboxProvider` + K8s Provisioner | 生产环境、强隔离、多用户 |
+
+对于有多个并发用户的任何部署，使用基于容器的沙箱，防止用户之间的执行环境相互干扰。
+
+### K8s Provisioner 配置
+
+<Steps>
+
+#### 配置 Provisioner
+
+在你的 `.env` 或 compose 覆盖文件中设置必需的环境变量：
+
+```bash
+K8S_NAMESPACE=deer-flow
+SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+DEER_FLOW_ROOT=/absolute/path/to/deer-flow
+```
+
+#### 配置沙箱提供者
+
+```yaml
+# config.yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+  provisioner_url: http://provisioner:8002
+```
+
+#### 配置数据持久化
+
+对于生产环境，使用 PVC 代替 hostPath 卷：
+
+```bash
+# 在 .env 或 compose 环境中
+USERDATA_PVC_NAME=deer-flow-userdata-pvc
+SKILLS_PVC_NAME=deer-flow-skills-pvc
+```
+
+当设置 `USERDATA_PVC_NAME` 时，Provisioner 自动使用子路径（`threads/{thread_id}/user-data`），每个线程在 PVC 中获得自己的目录。
+
+</Steps>
+
+### nginx 配置
+
+nginx 路由所有流量，控制路由的关键环境变量：
+
+| 变量 | 默认值 | 描述 |
+|---|---|---|
+| `LANGGRAPH_UPSTREAM` | `langgraph:2024` | LangGraph 服务地址 |
+| `LANGGRAPH_REWRITE` | `/` | LangGraph 路由的 URL 重写前缀 |
+
+这些在 Docker Compose 环境中设置，并在容器启动时由 `envsubst` 处理。
+
+### 认证配置
+
+DeerFlow 应用使用 [Better Auth](https://www.better-auth.com/) 进行会话管理。在生产环境中：
+
+1. 将 `BETTER_AUTH_SECRET` 设置为强随机字符串（最少 32 个字符）。
+2. 将 `BETTER_AUTH_URL` 设置为你的公开 URL（例如 `https://your-domain.com`）。
+
+```bash
+# 生成密钥
+openssl rand -base64 32
+```
+
+### 资源建议
+
+| 服务 | 最低配置 | 推荐配置 |
+|---|---|---|
+| LangGraph（Agent 运行时） | 2 vCPU、4 GB RAM | 4 vCPU、8 GB RAM |
+| Gateway | 0.5 vCPU、512 MB | 1 vCPU、1 GB |
+| 前端 | 0.5 vCPU、512 MB | 1 vCPU、1 GB |
+| 沙箱容器（每会话） | 1 vCPU、1 GB | 2 vCPU、2 GB |
+
+## 部署后验证
+
+启动后验证部署：
+
+```bash
+# 检查 Gateway 健康状态
+curl http://localhost:8001/health
+
+# 检查 LangGraph 健康状态
+curl http://localhost:2024/ok
+
+# 通过 nginx 列出配置的模型（验证完整代理链）
+curl http://localhost:2026/api/models
+```
+
+正常工作的部署会从每个端点返回 `200` 响应。`/api/models` 调用会返回你 `config.yaml` 中的模型列表。
+
+<Cards num={2}>
+  <Cards.Card title="配置" href="/docs/application/configuration" />
+  <Cards.Card title="运维与排障" href="/docs/application/operations-and-troubleshooting" />
+</Cards>
diff --git a/frontend/src/content/zh/application/index.mdx b/frontend/src/content/zh/application/index.mdx
new file mode 100644
index 000000000..d8c06d0c4
--- /dev/null
+++ b/frontend/src/content/zh/application/index.mdx
@@ -0,0 +1,70 @@
+---
+title: DeerFlow 应用
+description: DeerFlow 应用是 DeerFlow 生产体验的参考实现。它将 Harness 运行时、基于 Web 的对话工作区、API Gateway 和反向代理组合成一个可部署的完整系统。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# DeerFlow 应用
+
+<Callout type="info" emoji="🚀">
+  DeerFlow 应用是构建在 DeerFlow Harness 之上的完整 Super Agent 应用。它将运行时能力打包成一个可部署的产品，包含 Web 界面、API Gateway 和运维工具。
+</Callout>
+
+DeerFlow 应用是 DeerFlow 生产体验的参考实现。它将 Harness 运行时、基于 Web 的对话工作区、API Gateway 和反向代理组合成一个可部署的完整系统。
+
+## 应用提供什么
+
+| 能力 | 描述 |
+|---|---|
+| **Web 工作区** | 浏览器对话界面，支持线程、产出物、文件上传和技能选择 |
+| **自定义 Agent** | 创建和管理具有不同模型、技能和工具集的命名 Agent |
+| **线程管理** | 带检查点和历史记录的持久化对话线程 |
+| **流式响应** | 实时 token 流式传输，带思考步骤和工具调用可见性 |
+| **产出物查看器** | Agent 生成文件和输出的浏览器内预览和下载 |
+| **扩展界面** | 无需编辑配置文件即可启用/禁用 MCP 服务器和技能 |
+| **Gateway API** | 桥接前端和 LangGraph 运行时的基于 FastAPI 的 REST API |
+
+## 架构
+
+DeerFlow 应用以四个服务的形式运行，通过单个 nginx 反向代理提供：
+
+```
+                ┌──────────────────┐
+     浏览器 → │  nginx :2026      │
+                └──────────────────┘
+                  │          │
+         ┌────────┘          └────────┐
+         ▼                            ▼
+┌──────────────────┐       ┌──────────────────────┐
+│  前端 :3000      │       │  Gateway API :8001    │
+│  (Next.js)       │       │  (FastAPI)            │
+└──────────────────┘       └──────────────────────┘
+                                      │
+                            ┌─────────┘
+                            ▼
+                  ┌──────────────────────┐
+                  │  LangGraph :2024     │
+                  │  (DeerFlow Harness)  │
+                  └──────────────────────┘
+```
+
+- **nginx**：路由请求——`/api/*` 到 Gateway，LangGraph 流式端点到 LangGraph，其余到前端。
+- **前端**（Next.js + React）：浏览器界面，与 Gateway 和 LangGraph 通信。
+- **Gateway**（FastAPI）：处理 API 操作——模型列表、Agent CRUD、记忆、扩展管理、文件上传。
+- **LangGraph**：DeerFlow Harness 运行时，管理线程状态、Agent 执行和流式传输。
+
+## 技术栈
+
+| 层次 | 技术 |
+|---|---|
+| 前端 | Next.js 16、React 19、TypeScript、pnpm |
+| Gateway | FastAPI、Python 3.12、uvicorn |
+| Agent 运行时 | LangGraph、LangChain、DeerFlow Harness |
+| 反向代理 | nginx |
+| 状态持久化 | LangGraph Server（默认）+ 可选 SQLite/PostgreSQL 检查点 |
+
+<Cards num={2}>
+  <Cards.Card title="快速上手" href="/docs/application/quick-start" />
+  <Cards.Card title="部署指南" href="/docs/application/deployment-guide" />
+</Cards>
diff --git a/frontend/src/content/zh/application/operations-and-troubleshooting.mdx b/frontend/src/content/zh/application/operations-and-troubleshooting.mdx
new file mode 100644
index 000000000..3b4157d4f
--- /dev/null
+++ b/frontend/src/content/zh/application/operations-and-troubleshooting.mdx
@@ -0,0 +1,170 @@
+---
+title: 运维与排障
+description: 本页面涵盖运行 DeerFlow 应用的操作信息：日志记录、常见问题和维护任务。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 运维与排障
+
+本页面涵盖运行 DeerFlow 应用的操作信息：日志记录、常见问题和维护任务。
+
+## 日志
+
+DeerFlow 应用在 `logs/` 目录中写入每个服务的日志：
+
+| 文件 | 内容 |
+|---|---|
+| `logs/langgraph.log` | Agent 运行时、工具调用、LangGraph 错误 |
+| `logs/gateway.log` | API 请求/响应、Gateway 错误 |
+| `logs/frontend.log` | Next.js 服务器日志 |
+| `logs/nginx.log` | 代理访问和错误日志 |
+
+**实时追踪日志**：
+
+```bash
+tail -f logs/langgraph.log   # 查看 Agent 活动
+tail -f logs/gateway.log     # 查看 API 请求
+```
+
+**调整日志级别**：
+
+```yaml
+# config.yaml
+log_level: debug  # debug | info | warning | error
+```
+
+## 健康检查
+
+DeerFlow 暴露健康检查端点：
+
+```bash
+# Gateway 健康状态
+curl http://localhost:8001/health
+
+# LangGraph 健康状态
+curl http://localhost:2024/ok
+
+# 通过 nginx 完整代理链验证
+curl http://localhost:2026/api/models
+```
+
+## 配置升级
+
+当 `config.yaml` schema 更新时（由 `config_version` 字段标识），运行：
+
+```bash
+make config-upgrade
+```
+
+这将 `config.example.yaml` 中的新字段合并到你现有的 `config.yaml` 中，而不覆盖你的自定义内容。
+
+## 常见问题
+
+### 模型配置错误
+
+**症状**：Agent 在响应第一条消息时报错，日志中有 API 认证错误。
+
+**诊断**：
+```bash
+# 检查 LangGraph 日志中的模型错误
+grep -i "error\|apikey\|unauthorized" logs/langgraph.log | tail -20
+```
+
+**解决**：
+1. 验证 `config.yaml` 中 API key 字段名称正确（例如 `$OPENAI_API_KEY`）。
+2. 确认对应的环境变量已设置（`echo $OPENAI_API_KEY`）。
+3. 检查 `base_url`（如有）是否与提供商的实际端点匹配。
+
+---
+
+### 沙箱权限问题
+
+**症状**：工具报"文件未找到"或权限错误，即使 Agent 声称已创建文件。
+
+**诊断**：
+```bash
+# 检查线程用户数据目录是否存在且可写
+ls -la backend/.deer-flow/threads/
+```
+
+**解决**：
+1. 确保 `backend/.deer-flow/` 对运行 DeerFlow 的进程可写。
+2. 在 Docker 部署中，验证卷挂载路径正确（`DEER_FLOW_ROOT` 设置为绝对路径）。
+3. 如果使用基于容器的沙箱，确认 Docker 已运行并且容器镜像已拉取。
+
+---
+
+### 前端构建失败
+
+**症状**：`make install` 或前端构建步骤失败，提示 `BETTER_AUTH_SECRET` 错误。
+
+**解决**：
+```bash
+# 选项 1：设置环境变量（推荐）
+export BETTER_AUTH_SECRET=your-secret-here-at-least-32-chars
+cd frontend && pnpm build
+
+# 选项 2：跳过构建时验证（仅限开发）
+SKIP_ENV_VALIDATION=1 pnpm build
+```
+
+---
+
+### MCP 服务器连接失败
+
+**症状**：MCP 工具未出现，`logs/langgraph.log` 中有超时错误。
+
+**诊断**：
+```bash
+# 检查 MCP 相关错误
+grep -i "mcp\|timeout" logs/langgraph.log | tail -20
+```
+
+**解决**：
+1. 验证 `extensions_config.json` 中 MCP 服务器的 `command` 和 `args` 在服务器外部正常工作（手动运行命令）。
+2. 确认 MCP 服务器的依赖（如 `npx`、`uvx`）已安装并在 PATH 中。
+3. 检查 MCP 服务器是否需要网络访问或特定环境变量。
+
+---
+
+### K8s Provisioner 连接失败
+
+**症状**：沙箱工具请求挂起，日志中有连接拒绝错误。
+
+**解决**：
+1. 验证 `config.yaml` 中 `provisioner_url` 正确且 Provisioner Pod 运行正常。
+2. 检查 `K8S_NAMESPACE` 和 RBAC 配置是否允许 Provisioner 创建 Pod。
+3. 验证 `SANDBOX_IMAGE` 可以从 K8s 节点拉取。
+
+## 数据备份
+
+DeerFlow 将持久化数据存储在：
+- **线程数据**：`backend/.deer-flow/threads/` — 每个线程的上传文件、输出和工作区文件
+- **检查点**：取决于检查点器配置（SQLite：`backend/.deer-flow/checkpoints.db`，Redis：外部存储）
+- **记忆**：`backend/.deer-flow/memory.json`（以及 `agents/*/memory.json`）
+- **自定义 Agent 配置**：`backend/agents/*/config.yaml`
+
+对于生产部署，定期备份这些目录。Docker 部署中，确保这些目录绑定挂载到持久卷，而不是容器内部。
+
+## 停止和重启服务
+
+```bash
+# 停止所有本地服务
+make stop
+
+# 重启（停止后重新启动）
+make stop && make dev
+```
+
+在 Docker 中：
+
+```bash
+docker compose -f docker/docker-compose-dev.yaml down
+docker compose -f docker/docker-compose-dev.yaml up
+```
+
+<Cards num={2}>
+  <Cards.Card title="部署指南" href="/docs/application/deployment-guide" />
+  <Cards.Card title="配置" href="/docs/application/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/application/quick-start.mdx b/frontend/src/content/zh/application/quick-start.mdx
new file mode 100644
index 000000000..a531a6430
--- /dev/null
+++ b/frontend/src/content/zh/application/quick-start.mdx
@@ -0,0 +1,126 @@
+---
+title: 快速上手
+description: 本指南引导你使用 `make dev` 工作流在本地机器上启动 DeerFlow 应用。所有四个服务（LangGraph、Gateway、前端、nginx）一起启动，通过单个 URL 访问。
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
+# 快速上手
+
+<Callout type="info" emoji="⚡">
+  大约 10 分钟即可在本地运行 DeerFlow 应用。你需要一台安装了 Python 3.12+、Node.js 22+ 的机器，以及至少一个 LLM API Key。
+</Callout>
+
+本指南引导你使用 `make dev` 工作流在本地机器上启动 DeerFlow 应用。所有四个服务（LangGraph、Gateway、前端、nginx）一起启动，通过单个 URL 访问。
+
+## 前置条件
+
+检查所有必需工具是否已安装：
+
+```bash
+make check
+```
+
+必需工具：
+
+| 工具 | 最低版本 |
+|---|---|
+| Python | 3.12 |
+| uv | 最新版 |
+| Node.js | 22 |
+| pnpm | 10 |
+| nginx | 任何近期版本 |
+
+在 macOS 上，使用 `brew install python uv node pnpm nginx` 安装。在 Linux 上，使用你的发行版包管理器。
+
+## 步骤
+
+<Steps>
+
+### 克隆仓库
+
+```bash
+git clone https://github.com/bytedance/deer-flow.git
+cd deer-flow
+```
+
+### 安装依赖
+
+```bash
+make install
+```
+
+这会安装后端 Python 依赖（通过 `uv`）和前端 Node.js 依赖（通过 `pnpm`）。
+
+### 创建配置文件
+
+```bash
+cp config.example.yaml config.yaml
+```
+
+然后编辑 `config.yaml` 至少添加一个模型：
+
+```yaml
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+```
+
+启动前设置对应的环境变量：
+
+```bash
+export OPENAI_API_KEY=sk-...
+```
+
+查看[配置](/docs/application/configuration)页面了解其他模型提供商的示例。
+
+### 启动所有服务
+
+```bash
+make dev
+```
+
+这会启动：
+- LangGraph 服务，端口 `2024`
+- Gateway API，端口 `8001`
+- 前端，端口 `3000`
+- nginx 反向代理，端口 `2026`
+
+在浏览器中打开 [http://localhost:2026](http://localhost:2026)。
+
+### 停止所有服务
+
+```bash
+make stop
+```
+
+</Steps>
+
+## `make dev` 做了什么
+
+- 首先停止已有的服务进程（在启动被中断后安全运行）。
+- 每个服务在后台启动，日志写入 `logs/` 目录。
+- nginx 通过端口 `2026` 代理所有流量，所以你只需要一个 URL。
+
+日志文件：
+
+| 服务 | 日志文件 |
+|---|---|
+| LangGraph | `logs/langgraph.log` |
+| Gateway | `logs/gateway.log` |
+| 前端 | `logs/frontend.log` |
+| nginx | `logs/nginx.log` |
+
+<Callout type="tip">
+  如果有问题，先检查日志文件。大多数启动错误（缺失 API Key、配置解析失败）会出现在 <code>logs/langgraph.log</code> 或 <code>logs/gateway.log</code> 中。
+</Callout>
+
+<Cards num={2}>
+  <Cards.Card title="部署指南" href="/docs/application/deployment-guide" />
+  <Cards.Card title="配置" href="/docs/application/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/application/workspace-usage.mdx b/frontend/src/content/zh/application/workspace-usage.mdx
new file mode 100644
index 000000000..5ade5cc13
--- /dev/null
+++ b/frontend/src/content/zh/application/workspace-usage.mdx
@@ -0,0 +1,92 @@
+---
+title: 工作区使用
+description: DeerFlow 工作区是一个基于浏览器的对话界面，你可以在其中向 Agent 发送消息、上传文件、查看中间步骤，以及下载生成的产出物。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 工作区使用
+
+<Callout type="info" emoji="💬">
+  DeerFlow 工作区是你与 Agent 交互的地方。本页面涵盖主要用户界面工作流——创建对话、上传文件、查看产出物和使用技能。
+</Callout>
+
+DeerFlow 工作区是一个基于浏览器的对话界面，你可以在其中向 Agent 发送消息、上传文件、查看中间步骤，以及下载生成的产出物。
+
+## 新建对话
+
+1. 打开 [http://localhost:2026](http://localhost:2026)。
+2. 在主界面点击 **New Thread**（新建线程）按钮，或直接在输入框中输入消息。
+3. 输入你的第一条消息并发送。
+
+每个对话是一个**线程**，有独立的历史记录、产出物和检查点。
+
+## 选择模型
+
+在消息输入区域，点击**模型选择器**从 `config.yaml` 中配置的模型中选择。默认选中第一个配置的模型。
+
+你可以在对话中途切换模型——每次新的发送都会使用所选的模型。
+
+## 选择 Agent
+
+打开 **Agent 选择器**从可用 Agent 中选择：
+
+- **默认 Agent**：使用所有全局工具和技能的通用 Agent。
+- **自定义 Agent**：具有专门技能组合、工具访问和提示词的命名 Agent。
+
+参见 [Agent 与线程](/docs/application/agents-and-threads)页面了解如何创建自定义 Agent。
+
+## 上传文件
+
+拖放文件到消息输入区域，或点击附件图标上传。上传的文件挂载在沙箱的 `/mnt/user-data/uploads/` 路径下，Agent 可以直接读取和处理。
+
+**支持的操作**：
+- 读取和分析文件内容
+- 处理数据文件（CSV、JSON、Excel）
+- 提取 PDF 内容
+- 分析图像（需要支持视觉的模型）
+
+<Callout type="tip">
+  上传大文件时，告诉 Agent 文件的具体内容，以便获得更好的结果（例如"分析这个包含季度销售数据的 CSV"）。
+</Callout>
+
+## 使用技能
+
+某些 Agent 配置暴露了**技能选择器**。技能告诉 Agent 要执行哪类工作（深度研究、数据分析、图表生成等）。
+
+在输入框中点击技能选择器选择一个技能。选定的技能会将专业指令和工作流注入到当前对话中。
+
+## 查看 Agent 思考过程
+
+当 Agent 调用工具或进行推理时，你可以展开**思考步骤**：
+
+- **工具调用**：Agent 正在调用哪个工具、使用什么参数。
+- **工具结果**：工具返回了什么。
+- **思考内容**（如果模型支持）：模型的内部推理过程。
+- **待办列表**（计划模式）：当前任务列表及其完成状态。
+
+点击消息旁边的展开箭头查看完整的推理链。
+
+## 查看产出物
+
+当 Agent 生成文件（报告、图表、代码文件、演示文稿）时，它们会以**产出物**的形式出现在对话中。
+
+点击产出物卡片：
+- 在浏览器中预览文件。
+- 下载文件到本地机器。
+- 复制文件内容。
+
+产出物会持久保存在线程的用户数据目录中（`.deer-flow/threads/{thread_id}/user-data/outputs/`）。
+
+## 管理线程
+
+**查看过往线程**：使用侧边栏浏览之前的对话。
+
+**重命名线程**：`TitleMiddleware` 会在第一次交互后自动生成标题。你也可以手动重命名线程。
+
+**删除线程**：从线程侧边栏菜单中选择删除。这会移除线程状态和所有相关的用户数据文件。
+
+<Cards num={2}>
+  <Cards.Card title="Agent 与线程" href="/docs/application/agents-and-threads" />
+  <Cards.Card title="运维与排障" href="/docs/application/operations-and-troubleshooting" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/_meta.ts b/frontend/src/content/zh/harness/_meta.ts
new file mode 100644
index 000000000..e1934f5ad
--- /dev/null
+++ b/frontend/src/content/zh/harness/_meta.ts
@@ -0,0 +1,48 @@
+import type { MetaRecord } from "nextra";
+
+const meta: MetaRecord = {
+  index: {
+    title: "安装",
+  },
+  "quick-start": {
+    title: "快速上手",
+  },
+  "design-principles": {
+    title: "设计理念",
+  },
+  "lead-agent": {
+    title: "Lead Agent",
+  },
+  middlewares: {
+    title: "中间件",
+  },
+  configuration: {
+    title: "配置",
+  },
+  memory: {
+    title: "记忆系统",
+  },
+  tools: {
+    title: "工具",
+  },
+  skills: {
+    title: "技能",
+  },
+  sandbox: {
+    title: "沙箱",
+  },
+  subagents: {
+    title: "子 Agent",
+  },
+  mcp: {
+    title: "MCP 集成",
+  },
+  customization: {
+    title: "自定义与扩展",
+  },
+  "integration-guide": {
+    title: "集成指南",
+  },
+};
+
+export default meta;
diff --git a/frontend/src/content/zh/harness/configuration.mdx b/frontend/src/content/zh/harness/configuration.mdx
new file mode 100644
index 000000000..eda2370dc
--- /dev/null
+++ b/frontend/src/content/zh/harness/configuration.mdx
@@ -0,0 +1,159 @@
+---
+title: 配置
+description: DeerFlow 的配置系统围绕一个目标设计：每一个有意义的行为都应该可以在配置文件中表达，而不是硬编码在应用程序中。这使部署可重现、可审计，并且易于按环境定制。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 配置
+
+<Callout type="info" emoji="⚙️">
+  所有 DeerFlow Harness 行为都由 <code>config.yaml</code> 驱动。一个文件控制哪些模型可用、沙箱如何运行、加载哪些工具，以及每个子系统的行为。
+</Callout>
+
+DeerFlow 的配置系统围绕一个目标设计：每一个有意义的行为都应该可以在配置文件中表达，而不是硬编码在应用程序中。这使部署可重现、可审计，并且易于按环境定制。
+
+## 配置文件位置
+
+DeerFlow 使用以下优先级顺序解析 `config.yaml`：
+
+1. 显式传递给 `AppConfig.from_file(config_path)` 的路径。
+2. `DEER_FLOW_CONFIG_PATH` 环境变量。
+3. `backend/config.yaml`（相对于后端目录）。
+4. 仓库根目录中的 `config.yaml`。
+
+如果这些路径都不存在，应用程序在启动时会报错。
+
+要使用自定义位置：
+
+```bash
+export DEER_FLOW_CONFIG_PATH=/path/to/my-config.yaml
+```
+
+## 环境变量插值
+
+任何字段值都可以使用 `$VAR_NAME` 语法引用环境变量：
+
+```yaml
+models:
+  - name: gpt-4o
+    api_key: $OPENAI_API_KEY
+```
+
+这使密钥不出现在配置文件本身中，变量在运行时从进程环境解析。
+
+## `use` 字段
+
+许多配置条目使用 `use:` 字段来指定要实例化的 Python 类或对象，格式为：
+
+```
+package.subpackage.module:ClassName
+```
+
+或对于模块级对象：
+
+```
+package.subpackage.module:variable_name
+```
+
+示例：
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+tools:
+  - use: deerflow.community.tavily.tools:web_search_tool
+    api_key: $TAVILY_API_KEY
+```
+
+这是 DeerFlow 实现可插拔性而不硬编码类引用的方式。
+
+## 额外字段透传
+
+对于模型配置，`ModelConfig` 使用 `pydantic ConfigDict(extra="allow")`。这意味着你在模型条目下添加的任何额外字段都直接传递给模型构造函数。这允许提供商特定选项（如 `extra_body`、`reasoning` 或自定义超时键）无需修改 Harness 即可工作：
+
+```yaml
+models:
+  - name: my-model
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    some_provider_specific_option: value  # 传递给 ChatOpenAI 构造函数
+```
+
+## 配置版本
+
+`config.yaml` 包含追踪 schema 版本的 `config_version` 字段：
+
+```yaml
+config_version: 6
+```
+
+当 schema 更改（新字段、重命名章节）时，此数字增加。如果你本地的 `config.yaml` 落后于当前版本，运行：
+
+```bash
+make config-upgrade
+```
+
+这将 `config.example.yaml` 中的新字段合并到你现有的 `config.yaml` 中，而不覆盖你的自定义内容。
+
+## 模块配置速查表
+
+下表将 `config.yaml` 中的每个顶层章节映射到其文档页面：
+
+| 章节 | 描述 | 文档 |
+|---|---|---|
+| `log_level` | 日志级别（`debug`/`info`/`warning`/`error`） | — |
+| `models` | 可用的 LLM 模型 | [Lead Agent](/docs/harness/lead-agent) |
+| `token_usage` | 每次模型调用的 token 追踪 | [中间件](/docs/harness/middlewares) |
+| `tools` | 可用的 Agent 工具 | [工具](/docs/harness/tools) |
+| `tool_groups` | 工具的命名分组 | [工具](/docs/harness/tools) |
+| `tool_search` | 延迟/按需工具加载 | [工具](/docs/harness/tools) |
+| `sandbox` | 沙箱提供者和选项 | [沙箱](/docs/harness/sandbox) |
+| `skills` | 技能目录和容器路径 | [技能](/docs/harness/skills) |
+| `skill_evolution` | Agent 管理的技能创建 | [技能](/docs/harness/skills) |
+| `subagents` | 子 Agent 超时和最大轮次 | [子 Agent](/docs/harness/subagents) |
+| `acp_agents` | 外部 ACP Agent 集成 | [子 Agent](/docs/harness/subagents) |
+| `memory` | 跨会话记忆存储 | [记忆系统](/docs/harness/memory) |
+| `summarization` | 对话摘要压缩 | [中间件](/docs/harness/middlewares) |
+| `title` | 自动生成线程标题 | [中间件](/docs/harness/middlewares) |
+| `checkpointer` | 线程状态持久化 | [Agent 与线程](/docs/application/agents-and-threads) |
+| `guardrails` | 工具调用授权 | — |
+| `uploads` | 文件上传设置（PDF 转换器） | — |
+| `channels` | IM 频道集成（飞书、Slack 等） | — |
+
+## 最小配置示例
+
+最小有效的 `config.yaml` 至少需要一个模型和一个沙箱：
+
+```yaml
+config_version: 6
+
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+tools:
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:bash_tool
+```
+
+从仓库根目录的 `config.example.yaml` 开始，取消注释你需要的章节。
+
+<Cards num={2}>
+  <Cards.Card title="部署指南" href="/docs/application/deployment-guide" />
+  <Cards.Card title="应用配置" href="/docs/application/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/customization.mdx b/frontend/src/content/zh/harness/customization.mdx
new file mode 100644
index 000000000..25f902acd
--- /dev/null
+++ b/frontend/src/content/zh/harness/customization.mdx
@@ -0,0 +1,155 @@
+---
+title: 自定义与扩展
+description: DeerFlow 的可插拔架构意味着系统的大多数部分都可以在不 fork 核心的情况下被替换或扩展。本页面列举了扩展点，并解释如何使用每一个。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 自定义与扩展
+
+<Callout type="info" emoji="🔧">
+  DeerFlow 设计为可适配的。你可以通过编写自定义中间件、添加新工具、构建技能包以及通过 config.yaml 的 <code>use:</code> 字段替换任何内置组件来扩展 Agent 行为。
+</Callout>
+
+DeerFlow 的可插拔架构意味着系统的大多数部分都可以在不 fork 核心的情况下被替换或扩展。本页面列举了扩展点，并解释如何使用每一个。
+
+## 自定义中间件
+
+中间件是为 Lead Agent 添加行为的主要扩展点。它们包裹每次 LLM 调用，可以在模型调用前后读取和修改 Agent 的状态。
+
+添加自定义中间件：
+
+1. 实现 `langchain.agents.middleware` 中的 `AgentMiddleware` 接口。
+2. 在构建 Agent 时通过 `custom_middlewares` 参数传入你的中间件。
+
+```python
+from langchain.agents.middleware import AgentMiddleware
+from deerflow.agents.thread_state import ThreadState
+
+class AuditMiddleware(AgentMiddleware):
+    async def on_start(self, state: ThreadState, config):
+        # 在每次模型调用前运行
+        print(f"[审计] 轮次开始：上下文中有 {len(state.messages)} 条消息")
+        return state, config
+
+    async def on_end(self, state: ThreadState, config):
+        # 在每次模型调用后运行
+        print(f"[审计] 轮次结束：最后一条消息类型 = {state.messages[-1].type}")
+        return state, config
+```
+
+自定义中间件在链末尾 `ClarificationMiddleware` 之前注入，后者始终最后运行。
+
+## 自定义工具
+
+在 `config.yaml` 的 `tools:` 下注册新工具，将其添加到 Agent：
+
+```yaml
+tools:
+  - use: mypackage.tools:my_custom_tool
+    api_key: $MY_TOOL_API_KEY
+```
+
+你的工具必须是 LangChain `BaseTool` 或用 `@tool` 装饰的函数。它将使用 `use:` 类路径和配置条目中的任何额外字段实例化。
+
+```python
+# mypackage/tools.py
+from langchain_core.tools import tool
+
+@tool
+def my_custom_tool(query: str) -> str:
+    """搜索我的自定义数据源。"""
+    return do_search(query)
+```
+
+## 自定义沙箱提供者
+
+沙箱可以通过实现 `SandboxProvider` 接口来替换：
+
+```python
+from deerflow.sandbox.sandbox_provider import SandboxProvider
+
+class MyCustomSandboxProvider(SandboxProvider):
+    def acquire(self, thread_id: str | None = None) -> str:
+        # 返回 sandbox_id
+        ...
+
+    def get(self, sandbox_id: str):
+        # 返回此 id 的沙箱实例
+        ...
+
+    def release(self, sandbox_id: str) -> None:
+        # 清理
+        ...
+```
+
+然后在 `config.yaml` 中引用：
+
+```yaml
+sandbox:
+  use: mypackage.sandbox:MyCustomSandboxProvider
+```
+
+## 自定义记忆存储
+
+通过实现 `MemoryStorage` 将基于文件的记忆替换为任何持久化存储：
+
+```python
+from deerflow.agents.memory.storage import MemoryStorage
+from typing import Any
+
+class RedisMemoryStorage(MemoryStorage):
+    def load(self, agent_name: str | None = None) -> dict[str, Any]:
+        ...
+
+    def reload(self, agent_name: str | None = None) -> dict[str, Any]:
+        ...
+
+    def save(self, memory_data: dict[str, Any], agent_name: str | None = None) -> bool:
+        ...
+```
+
+在 `config.yaml` 中配置：
+
+```yaml
+memory:
+  storage_class: mypackage.storage:RedisMemoryStorage
+```
+
+## 自定义技能
+
+技能是最简单的扩展点。在 `skills/custom/your-skill-name/` 下创建一个目录，并在其中添加 `SKILL.md` 文件。技能将在下次 `load_skills()` 调用时自动被发现。
+
+参见[技能](/docs/harness/skills)页面了解完整的目录结构和 `SKILL.md` 格式。
+
+## 自定义模型
+
+任何与 LangChain 兼容的聊天模型都可以通过在 `use:` 字段中指定来使用：
+
+```yaml
+models:
+  - name: my-custom-model
+    use: mypackage.models:MyCustomChatModel
+    # 任何额外字段都作为 kwargs 传递给构造函数
+    base_url: http://my-model-server:8080
+    api_key: $MY_MODEL_API_KEY
+```
+
+模型类必须实现 LangChain 的 `BaseChatModel` 接口。
+
+## 自定义检查点
+
+线程状态持久化可以使用任何与 LangGraph 兼容的检查点器：
+
+```yaml
+checkpointer:
+  type: sqlite
+  connection_string: ./my-checkpoints.db
+```
+
+对于自定义后端，实现 LangGraph 的 `BaseCheckpointSaver` 接口，并在初始化 `DeerFlowClient` 时以编程方式配置它。
+
+<Cards num={2}>
+  <Cards.Card title="集成指南" href="/docs/harness/integration-guide" />
+  <Cards.Card title="配置" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/design-principles.mdx b/frontend/src/content/zh/harness/design-principles.mdx
new file mode 100644
index 000000000..c25931d14
--- /dev/null
+++ b/frontend/src/content/zh/harness/design-principles.mdx
@@ -0,0 +1,117 @@
+---
+title: 设计理念
+description: 了解 DeerFlow Harness 背后的设计理念，有助于你有效地使用它、自信地扩展它，并推断 Agent 在生产环境中的行为方式。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 设计理念
+
+<Callout type="info" emoji="🏗️">
+  DeerFlow 围绕一个核心思想构建：Agent 行为应该由小型、可观察、可替换的组件组合而成——而不是硬编码到固定的工作流图中。
+</Callout>
+
+了解 DeerFlow Harness 背后的设计理念，有助于你有效地使用它、自信地扩展它，并推断 Agent 在生产环境中的行为方式。
+
+## 为什么是 Harness，而非 Framework
+
+框架提供抽象和构建块，你负责组装各部分并编写连接它们的胶水代码。
+
+**Harness** 更进一步。它打包了一个有主张的、可直接运行的运行时，让 Agent 无需你每次重建相同的基础设施就能完成真实工作。
+
+DeerFlow 之所以是 Harness，是因为它内置了：
+
+- 带有工具路由的 Lead Agent
+- 包裹每次 LLM 调用的中间件链
+- 用于文件和命令的沙箱执行
+- 按需加载专业能力的技能
+- 用于委派并行工作的子 Agent
+- 跨会话连续性的记忆
+- 控制这一切的配置系统
+
+你不需要从头设计编排层——Harness 就是编排层。
+
+## 长时序任务是主要使用场景
+
+DeerFlow 专为需要不止一次提示-响应交互的任务而设计。一个有用的长时序 Agent 必须：
+
+1. 制定计划
+2. 按顺序调用工具
+3. 检查和修改文件
+4. 在失败时恢复
+5. 当任务太宽泛时委派给子 Agent
+6. 最终返回具体的产出物
+
+DeerFlow 中的每一个架构决策都以这个使用场景为标准进行评估。短暂、无状态的交互很简单。在真实世界压力下的长时序、多步骤工作流才是目标。
+
+## 中间件链优于继承
+
+DeerFlow 不要求你子类化 Agent 或覆盖方法来改变其行为。相反，它使用一个**中间件链**来包裹每次 LLM 调用。
+
+每个中间件都是一个小型、专注的插件，可以在模型调用前后检查或修改 Agent 的状态。Lead Agent 的行为完全由活跃的中间件决定。
+
+这种设计有几个优点：
+
+- 各个行为（记忆、摘要、澄清、循环检测）相互隔离，可独立测试。
+- 可以扩展链而无需触碰 Agent 的核心逻辑。
+- 每个中间件的效果是可见且可审计的，因为它只触及其声明的状态。
+
+详见[中间件](/docs/harness/middlewares)页面的完整列表和配置说明。
+
+## 技能提供专业化但不污染上下文
+
+**技能**是面向任务的能力包，包含指令、工作流、最佳实践以及让 Agent 在特定工作类型中有效的工具或资源。
+
+关键设计决策是技能**按需加载**。基础 Agent 保持通用。当任务需要深度研究时，加载研究技能；当任务需要数据分析时，加载分析技能。
+
+这很重要，因为它保持了基础 Agent 上下文的清晰。为撰写学术论文而设计的专业提示词不会污染专注于编码的会话。技能在相关时注入内容，仅此而已。
+
+## 沙箱是执行环境
+
+DeerFlow 为 Agent 提供一个**沙箱**：一个隔离的工作区，Agent 可以在其中读取文件、写入输出、运行命令并生成产出物。
+
+这将 Agent 从文本生成器转变为能够真正完成工作的系统。Agent 不只是描述要写什么代码，而是可以写代码、运行代码并验证结果。
+
+隔离性很重要，因为执行应该可重现且可控。沙箱是 DeerFlow 支持真实行动而不仅仅是对话的原因。
+
+## 上下文工程让长时序任务可控
+
+上下文压力是长时序 Agent 的主要挑战。如果所有内容无限期累积在上下文窗口中，Agent 会变得越来越慢、越来越嘈杂、越来越不可靠。
+
+DeerFlow 通过**上下文工程**来解决这个问题——有意识地控制 Agent 在每个步骤中看到、记住和忽略什么：
+
+- **摘要压缩**：当对话变得太长时，旧轮次被摘要替代。Agent 保留含义而不是全量内容。
+- **子 Agent 上下文隔离**：当工作委派给子 Agent 时，子 Agent 只接收完成其任务所需的信息，而不是完整的父历史。
+- **外部工作记忆**：任务产生的文件和产出物保存在磁盘上，而不在上下文窗口中。Agent 在需要时引用它们。
+- **记忆注入**：跨会话事实以受控的 token 预算注入到系统提示中。
+
+这是 DeerFlow 中最重要的思想之一。好的 Agent 行为不仅仅是关于更强大的模型，也关于在正确时间给模型提供正确的工作集。
+
+## 配置驱动行为
+
+DeerFlow 中所有有意义的行为都通过 `config.yaml` 控制。系统的设计使运维人员无需触碰代码就能改变 Agent 的行为——使用哪些模型、是否启用摘要压缩、如何限制子 Agent、哪些工具可用。
+
+这个设计原则有三个含义：
+
+1. **可重现性**：一个配置文件在某时间点完整描述了 Agent 的行为。
+2. **可部署性**：相同的代码在不同环境中运行方式不同，因为配置不同。
+3. **可审计性**：Agent 能做什么和不能做什么在一处可见。
+
+环境变量插值（`api_key: $OPENAI_API_KEY`）将密钥排除在提交的配置文件之外，同时保持相同的结构。
+
+## 总结
+
+| 设计原则 | 实践含义 |
+|---|---|
+| Harness 而非 Framework | 开箱即用的运行时，所有基础设施已预先连接 |
+| 长时序优先 | 架构假设多步骤、多工具、多轮次任务 |
+| 中间件优于继承 | 行为由小型、隔离的插件组合而成 |
+| 技能提供专业化 | 领域能力按需注入，保持基础干净 |
+| 沙箱用于执行 | 真实文件和命令操作的隔离工作区 |
+| 上下文工程 | 主动管理 Agent 所见内容以保持有效性 |
+| 配置驱动 | 所有关键行为通过 `config.yaml` 控制 |
+
+<Cards num={2}>
+  <Cards.Card title="Lead Agent" href="/docs/harness/lead-agent" />
+  <Cards.Card title="中间件" href="/docs/harness/middlewares" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/index.mdx b/frontend/src/content/zh/harness/index.mdx
new file mode 100644
index 000000000..ea8712487
--- /dev/null
+++ b/frontend/src/content/zh/harness/index.mdx
@@ -0,0 +1,53 @@
+---
+title: 安装 DeerFlow Harness
+description: DeerFlow Harness 是构建自己 Super Agent 系统的 Python SDK 和运行时基础。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 安装 DeerFlow Harness
+
+<Callout type="info" emoji="📦">
+  DeerFlow Harness Python 包将以 <code>deerflow</code> 名称发布。目前尚未正式发布，安装方式<strong>即将推出</strong>。
+</Callout>
+
+DeerFlow Harness 是构建自己 Super Agent 系统的 Python SDK 和运行时基础。
+
+如果你想在自己的产品或工作流中整合技能、记忆、工具、沙箱和子 Agent 等 Agent 能力，这正是你需要的 DeerFlow 组件。
+
+## 包名称
+
+包名将是：
+
+```bash
+pip install deerflow
+```
+
+该包目前还未公开发布，但这是文档正式发布后将使用的安装路径。
+
+## 当前状态
+
+DeerFlow Harness 包**即将推出**。
+
+目前，本章节的存在是为了建立 SDK 入口和包标识，同时公开分发流程正在最终确定中。
+
+## Harness 将提供什么
+
+Harness 专为想要在 DeerFlow 运行时模型之上构建自己 Agent 系统的开发者设计。
+
+它将提供以下基础：
+
+- 构建长时序 Agent
+- 组合运行时能力（记忆、工具、技能、子 Agent）
+- 在沙箱中执行 Agent
+- 通过配置和代码自定义 Agent 行为
+- 将 DeerFlow 集成到自己的应用架构中
+
+## 下一步
+
+在包正式发布之前，了解 DeerFlow Harness 的最佳方式是阅读本章节的概念和实现文档。
+
+<Cards num={2}>
+  <Cards.Card title="快速上手" href="/docs/harness/quick-start" />
+  <Cards.Card title="配置" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/integration-guide.mdx b/frontend/src/content/zh/harness/integration-guide.mdx
new file mode 100644
index 000000000..2d2e389a8
--- /dev/null
+++ b/frontend/src/content/zh/harness/integration-guide.mdx
@@ -0,0 +1,162 @@
+---
+title: 集成指南
+description: DeerFlow Harness 不仅仅是一个独立应用程序——它是一个可以导入并在你自己的后端、API 服务器、自动化系统或多 Agent 协调器中使用的 Python 库。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 集成指南
+
+<Callout type="info" emoji="🔌">
+  DeerFlow Harness 可以嵌入任何 Python 应用程序。本指南涵盖在你自己的系统中将 DeerFlow 作为库使用的集成模式。
+</Callout>
+
+DeerFlow Harness 不仅仅是一个独立应用程序——它是一个可以导入并在你自己的后端、API 服务器、自动化系统或多 Agent 协调器中使用的 Python 库。
+
+## 嵌入 DeerFlowClient
+
+主要集成点是 `DeerFlowClient`。它封装了 LangGraph 运行时，并提供一个简洁的 API，用于在任何 Python 应用中发送消息和流式传输响应。
+
+```python
+from deerflow.client import DeerFlowClient
+from deerflow.config import load_config
+
+# 加载配置（读取 config.yaml 或 DEER_FLOW_CONFIG_PATH）
+load_config()
+
+client = DeerFlowClient()
+```
+
+客户端是线程安全的，设计为实例化一次并在请求之间复用。
+
+## 异步流式传输
+
+推荐的集成模式是异步流式传输。这让你可以在 Agent 生成响应时实时访问每个 token 和事件：
+
+```python
+import asyncio
+
+async def run_agent(thread_id: str, user_message: str):
+    async for event in client.astream(
+        thread_id=thread_id,
+        message=user_message,
+        config={
+            "configurable": {
+                "model_name": "gpt-4o",
+                "subagent_enabled": True,
+            }
+        },
+    ):
+        # 处理每个流式事件
+        yield event
+
+# 在 FastAPI 处理器中：
+# from fastapi.responses import StreamingResponse
+# return StreamingResponse(run_agent(thread_id, message), media_type="text/event-stream")
+```
+
+## 非流式调用
+
+对于批处理或只需要最终结果的场景：
+
+```python
+async def run_agent_sync(thread_id: str, user_message: str) -> dict:
+    result = await client.ainvoke(
+        thread_id=thread_id,
+        message=user_message,
+    )
+    return result
+```
+
+## 线程管理
+
+线程表示持久化对话。使用唯一线程 ID 隔离不同用户会话：
+
+```python
+import uuid
+
+# 新对话
+thread_id = str(uuid.uuid4())
+
+# 继续已有对话（相同 thread_id）
+# 如果配置了检查点，Agent 将看到完整历史
+await client.ainvoke(thread_id=existing_thread_id, message="后续问题")
+```
+
+## 自定义 Agent 配置
+
+通过创建命名 Agent 配置并在运行时传入 `agent_name` 来构建领域特定 Agent：
+
+```python
+# agents/research-assistant/config.yaml 必须存在并包含技能和工具配置
+
+result = await client.ainvoke(
+    thread_id=thread_id,
+    message=user_message,
+    config={
+        "configurable": {
+            "agent_name": "research-assistant",
+            "model_name": "gpt-4o",
+        }
+    },
+)
+```
+
+## 与 FastAPI 集成
+
+DeerFlow Gateway 本身是一个 FastAPI 应用程序。你可以将其作为子应用程序挂载：
+
+```python
+from fastapi import FastAPI
+from deerflow.config import load_config
+
+load_config()
+
+app = FastAPI()
+
+# 挂载 DeerFlow Gateway
+from deerflow.app.gateway.main import app as gateway_app
+app.mount("/deerflow", gateway_app)
+```
+
+或者在你自己的 FastAPI 路由中直接使用 `DeerFlowClient` 进行流式传输：
+
+```python
+from fastapi import FastAPI
+from fastapi.responses import StreamingResponse
+from deerflow.client import DeerFlowClient
+
+app = FastAPI()
+client = DeerFlowClient()
+
+@app.post("/chat/{thread_id}")
+async def chat(thread_id: str, body: dict):
+    async def generate():
+        async for event in client.astream(thread_id=thread_id, message=body["message"]):
+            yield f"data: {event}\n\n"
+    return StreamingResponse(generate(), media_type="text/event-stream")
+```
+
+## 嵌入模式下的配置
+
+当嵌入到另一个应用程序时，显式设置配置路径以避免歧义：
+
+```python
+import os
+os.environ["DEER_FLOW_CONFIG_PATH"] = "/path/to/my-deerflow-config.yaml"
+
+from deerflow.config import load_config
+load_config()
+```
+
+或直接传递路径：
+
+```python
+from deerflow.config import load_config
+load_config(config_path="/path/to/my-deerflow-config.yaml")
+```
+
+<Cards num={2}>
+  <Cards.Card title="自定义与扩展" href="/docs/harness/customization" />
+  <Cards.Card title="配置" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/lead-agent.mdx b/frontend/src/content/zh/harness/lead-agent.mdx
new file mode 100644
index 000000000..c6f16a413
--- /dev/null
+++ b/frontend/src/content/zh/harness/lead-agent.mdx
@@ -0,0 +1,131 @@
+---
+title: Lead Agent
+description: Lead Agent 是 DeerFlow 线程中的核心执行者。每个对话、任务和工作流都通过它进行。理解它的工作方式有助于你有效地配置它，并在需要时扩展它。
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
+# Lead Agent
+
+<Callout type="info" emoji="🧠">
+  Lead Agent 是每个 DeerFlow 线程中的主要推理和编排单元。它决定要做什么、调用工具、委派子 Agent，并返回产出物。
+</Callout>
+
+Lead Agent 是 DeerFlow 线程中的核心执行者。每个对话、任务和工作流都通过它进行。理解它的工作方式有助于你有效地配置它，并在需要时扩展它。
+
+## Lead Agent 的职责
+
+Lead Agent 负责：
+
+- 接收用户消息并维护对话状态
+- 推断接下来要做什么（规划、工具选择、委派）
+- 调用工具——内置工具、社区工具、MCP 工具或技能工具
+- 通过 `task` 工具将子任务委派给子 Agent
+- 管理产出物（文件、输出、交付物）
+- 在计划模式下更新待办列表
+- 向用户返回最终响应或产出物
+
+Lead Agent 不硬编码特定的工作流。它使用模型的推理能力来适应用户提供的任何任务，由系统提示和当前范围内的技能引导。
+
+## 运行时基础
+
+Lead Agent 基于 **LangGraph** 和 **LangChain Agent** 原语构建：
+
+- `langchain.agents` 的 [`create_agent`](https://python.langchain.com/docs/concepts/agents/) 将 LLM 封装成工具调用 Agent 循环。
+- LangGraph 管理 `ThreadState`，提供检查点、流式传输和图执行模型。
+- **中间件链**包裹 Agent 循环的每一轮，提供记忆、摘要和澄清等跨领域能力。
+
+## 执行流程
+
+<Steps>
+
+### 接收消息
+
+用户消息到达并添加到 `ThreadState.messages`。`ThreadState` 保存完整的对话历史、任何活跃的待办列表、累积的产出物和运行时元数据。
+
+### 中间件前处理
+
+在调用模型之前，每个活跃的中间件都有机会修改状态。例如，`MemoryMiddleware` 将持久化的记忆事实注入到系统提示中，`SummarizationMiddleware` 在 token 预算超出时可能压缩旧消息。
+
+### LLM 推理
+
+模型接收当前消息（包括带有活跃技能指令的系统提示），并产生直接回复或一个或多个工具调用请求。
+
+### 工具执行
+
+如果请求了工具调用，它们会被分发到相应的处理器——用于文件和命令操作的沙箱工具、用于网络访问的社区工具，或用于子 Agent 委派的 `task` 工具。
+
+### 中间件后处理
+
+工具结果返回后、下一次模型调用之前，中间件再次运行。`TitleMiddleware` 可能在第一次交互后生成线程标题，`TodoMiddleware` 可能更新任务列表。
+
+### 循环或响应
+
+如果模型需要更多信息（例如，工具返回了部分结果），循环继续。当模型决定任务完成时，它产生最终消息，循环结束。
+
+### 状态更新
+
+`ThreadState` 更新为新消息、产出物和记忆队列。如果配置了检查点，状态将被持久化。
+
+</Steps>
+
+## 模型选择
+
+Lead Agent 在运行时使用以下优先级顺序解析要使用的模型：
+
+1. 每次请求配置中的 `model_name`（或 `model`），如果提供且有效。
+2. 活跃自定义 Agent 配置中的 `model` 字段，如果指定了 Agent。
+3. `config.yaml` 中 `models:` 列表的第一个模型（全局默认值）。
+
+如果请求的模型名称在配置中找不到，系统回退到默认模型并记录警告。
+
+```yaml
+models:
+  - name: my-primary-model
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+    request_timeout: 600.0
+    max_retries: 2
+    supports_vision: true
+
+  - name: my-fast-model
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o-mini
+    api_key: $OPENAI_API_KEY
+```
+
+第一个条目（`my-primary-model`）成为默认值。任何不指定模型或指定未知模型名称的请求都将使用它。
+
+## 思考模式
+
+如果模型支持扩展思考（例如 DeepSeek Reasoner、启用思考的 Doubao、Anthropic Claude 思考模式），Lead Agent 可以在**思考模式**下运行。在此模式下，模型的内部推理步骤在响应流中可见。
+
+思考模式通过每次请求的 `thinking_enabled` 标志控制。如果启用了思考但配置的模型不支持，系统会优雅地回退并记录警告。
+
+## 计划模式
+
+当请求配置中将 `is_plan_mode` 设为 `true` 时，`TodoMiddleware` 被激活。Agent 然后维护一个结构化的任务列表，在处理复杂任务时将条目标记为 `in_progress`、`completed` 或 `pending`。这为用户提供了 Agent 进度的可见性。
+
+计划模式适用于显示增量进度有价值的复杂、多步骤任务。对于简单请求，最好禁用它以避免不必要的开销。
+
+## 自定义 Agent
+
+相同的 Lead Agent 运行时同时为默认 Agent 和你创建的任何自定义 Agent 提供服务。自定义 Agent 的区别仅在于：
+
+- 其**名称**（ASCII slug，从 `display_name` 自动派生）
+- 其**系统提示**或 Agent 特定指令
+- 它有权访问的**技能**
+- 它可以使用的**工具组**
+- 它默认使用的**模型**
+
+自定义 Agent 通过 DeerFlow 应用界面或 `/api/agents` 端点创建。其配置存储在后端目录的 `agents/{name}/config.yaml` 中。
+
+<Callout type="tip">
+  当在线程中选择自定义 Agent 时，Lead Agent 在运行时加载该 Agent 的配置。为特定 Agent 切换模型或技能不需要重启服务器。
+</Callout>
+
+<Cards num={2}>
+  <Cards.Card title="中间件" href="/docs/harness/middlewares" />
+  <Cards.Card title="工具" href="/docs/harness/tools" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/mcp.mdx b/frontend/src/content/zh/harness/mcp.mdx
new file mode 100644
index 000000000..4bf72c1a6
--- /dev/null
+++ b/frontend/src/content/zh/harness/mcp.mdx
@@ -0,0 +1,111 @@
+---
+title: MCP 集成
+description: Model Context Protocol（MCP） 是连接语言模型与外部工具和数据源的开放标准。DeerFlow 的 MCP 集成允许你用任何实现了 MCP 协议的工具服务器扩展 Agent——无需修改 Harness 本身。
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
+# MCP 集成
+
+<Callout type="info" emoji="🔌">
+  Model Context Protocol（MCP）让 DeerFlow
+  能够连接任何外部工具服务器。连接后，MCP 工具与内置工具一样对 Lead Agent 可用。
+</Callout>
+
+**Model Context Protocol（MCP）** 是连接语言模型与外部工具和数据源的开放标准。DeerFlow 的 MCP 集成允许你用任何实现了 MCP 协议的工具服务器扩展 Agent——无需修改 Harness 本身。
+
+## 配置
+
+MCP 服务器在 `extensions_config.json` 中配置，这个文件独立于 `config.yaml`。这种分离允许 MCP 和技能配置独立管理，并在运行时通过 Gateway API 更新。
+
+默认位置是项目根目录（与 `config.yaml` 同一目录）。
+
+```json
+{
+  "mcpServers": {
+    "my-server": {
+      "command": "npx",
+      "args": ["-y", "@my-org/my-mcp-server"],
+      "enabled": true
+    },
+    "filesystem": {
+      "command": "npx",
+      "args": ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"],
+      "enabled": true
+    },
+    "sqlite": {
+      "command": "uvx",
+      "args": ["mcp-server-sqlite", "--db-path", "/path/to/db.sqlite"],
+      "enabled": false
+    }
+  }
+}
+```
+
+每个服务器条目支持：
+
+- `command`：要运行的可执行文件（如 `npx`、`uvx`、`python`）
+- `args`：命令参数数组
+- `enabled`：服务器是否激活（可切换而无需删除条目）
+- `env`：可选地注入到服务器进程中的环境变量
+
+## 工具如何加载
+
+<Steps>
+
+### 启动初始化
+
+DeerFlow 服务器启动时调用 `initialize_mcp_tools()`。这连接到所有启用的 MCP 服务器，检索其工具 schema，并缓存结果。
+
+### 缓存失效
+
+MCP 工具缓存追踪 `extensions_config.json` 的修改时间（`mtime`）。当文件更改时——例如通过 Gateway API 启用或禁用服务器时——缓存被标记为过时，下次请求时重新加载工具。
+
+这意味着 MCP 服务器更改无需重启 DeerFlow 服务器即可生效。
+
+### 工具可用性
+
+加载后，MCP 工具与内置和社区工具一起出现在 Lead Agent 的工具列表中。Agent 使用与其他工具相同的机制选择和调用它们。
+
+</Steps>
+
+## 工具搜索集成
+
+当许多 MCP 服务器暴露大量工具时，预先将所有工具加载到 Agent 上下文中会增加 token 使用量并降低工具选择准确性。
+
+启用**工具搜索**改为按需加载 MCP 工具：
+
+```yaml
+# config.yaml
+tool_search:
+  enabled: true
+```
+
+启用工具搜索后，MCP 工具按名称列在系统提示中，但不包含完整的工具 schema。Agent 使用 `tool_search` 内置工具发现它们，只将需要的工具加载到上下文中。
+
+## OAuth 支持
+
+某些 MCP 服务器需要 OAuth 认证。DeerFlow 的 `mcp/oauth.py` 处理声明了 OAuth 需求的服务器的 OAuth 流程。
+
+当连接到受 OAuth 保护的 MCP 服务器时，DeerFlow 会：
+
+1. 从服务器能力头中检测 OAuth 需求
+2. 使用 `get_initial_oauth_headers()` 构建适当的授权头
+3. 通过 `build_oauth_tool_interceptor()` 用 OAuth 拦截器包装工具调用
+
+OAuth 流程对 Lead Agent 是透明的——它只是调用工具，DeerFlow 处理认证。
+
+## 管理 MCP 服务器
+
+MCP 服务器可以通过多种方式管理：
+
+- **通过 DeerFlow 应用界面**：扩展面板显示已连接的 MCP 服务器，允许你启用/禁用它们。
+- **通过 Gateway API**：`POST /api/extensions/mcp/{name}/enable` 和 `/disable`。
+- **直接编辑 `extensions_config.json`**：适用于脚本化或程序化配置。
+
+由于基于文件 mtime 的缓存失效机制，更改会自动被检测到。
+
+<Cards num={2}>
+  <Cards.Card title="工具" href="/docs/harness/tools" />
+  <Cards.Card title="配置" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/memory.mdx b/frontend/src/content/zh/harness/memory.mdx
new file mode 100644
index 000000000..71de258ad
--- /dev/null
+++ b/frontend/src/content/zh/harness/memory.mdx
@@ -0,0 +1,121 @@
+---
+title: 记忆系统
+description: 记忆是 DeerFlow Harness 的一个运行时功能。它不是简单的对话日志，而是跨多个独立会话持久化、在未来对话中影响 Agent 行为的结构化事实和上下文摘要存储。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 记忆系统
+
+<Callout type="info" emoji="💾">
+  记忆让 DeerFlow 在多个会话中保留有用信息。Agent 记住用户偏好、项目背景和反复出现的事实，这样它可以在不每次从零开始的情况下给出更好的响应。
+</Callout>
+
+记忆是 DeerFlow Harness 的一个运行时功能。它不是简单的对话日志，而是跨多个独立会话持久化、在未来对话中影响 Agent 行为的结构化事实和上下文摘要存储。
+
+## 记忆存储什么
+
+记忆存储包含几类信息：
+
+- **工作上下文**：用户正在进行的项目摘要、目标和反复出现的话题。
+- **个人上下文**：Agent 学到的偏好、沟通风格和其他用户特定细节。
+- **近期关注**：最近的关注领域和活跃任务。
+- **历史**：近几个月的上下文、早期背景和长期事实。
+- **事实**：Agent 从对话中提取的离散具体事实（例如偏好的工具、团队名称、项目约束）。
+
+每个类别随着 Agent 从持续对话中学习而随时间更新。
+
+## 工作原理
+
+记忆由 `MemoryMiddleware` 管理，在每次 Lead Agent 轮次上运行：
+
+1. **注入**：每次对话开始时，Agent 当前记忆以受控的 token 预算（`max_injection_tokens`）注入到系统提示中。
+2. **学习**：对话结束后，后台任务提取新事实并更新相关记忆类别。更新通过 `debounce_seconds` 防抖以批量处理快速变化。
+3. **按 Agent 记忆**：当自定义 Agent 激活时，其记忆独立于全局记忆存储。这保持不同 Agent 的知识隔离。
+
+## 配置
+
+```yaml
+memory:
+  enabled: true
+
+  # 全局记忆文件的存储路径。
+  # 默认：{base_dir}/memory.json（解析为 backend/.deer-flow/memory.json）
+  # 绝对路径按原样使用，相对路径相对于 base_dir 解析。
+  storage_path: memory.json
+
+  # 存储类（默认：基于文件的 JSON 存储）
+  storage_class: deerflow.agents.memory.storage.FileMemoryStorage
+
+  # 处理排队记忆更新前等待的秒数（防抖）
+  debounce_seconds: 30
+
+  # 记忆更新提取使用的模型（null = 使用默认模型）
+  model_name: null
+
+  # 要存储的最大事实数
+  max_facts: 100
+
+  # 存储事实所需的最低置信度分数（0.0–1.0）
+  fact_confidence_threshold: 0.7
+
+  # 是否将记忆注入到系统提示中
+  injection_enabled: true
+
+  # 注入到系统提示的最大 token 数
+  max_injection_tokens: 2000
+```
+
+## 全局记忆与按 Agent 记忆
+
+DeerFlow 支持两级记忆：
+
+- **全局记忆**：存储在 `{base_dir}/memory.json`。在没有特定 Agent 激活或 Agent 没有按 Agent 记忆文件时使用。
+- **按 Agent 记忆**：存储在 `{base_dir}/agents/{agent_name}/memory.json`。当自定义 Agent 激活时使用，使该 Agent 学到的知识保持独立。
+
+`MemoryMiddleware` 根据请求配置中的活跃 `agent_name` 自动选择正确的记忆文件。
+
+用于记忆存储的 Agent 名称通过 `AGENT_NAME_PATTERN` 验证，以确保文件系统安全。
+
+## 存储位置
+
+默认情况下，记忆文件存储在后端基础目录下：
+
+- 基础目录：`backend/.deer-flow/`
+- 全局记忆：`backend/.deer-flow/memory.json`
+- 按 Agent 记忆：`backend/.deer-flow/agents/{agent_name}/memory.json`
+
+你可以用 `storage_path` 字段更改存储路径。相对路径相对于基础目录解析；使用绝对路径可以将记忆存储在自定义位置。
+
+## 自定义存储后端
+
+`storage_class` 字段允许你用自定义实现替换默认的基于文件的存储。任何继承 `MemoryStorage` 并实现 `load()`、`reload()` 和 `save()` 方法的类都可以使用：
+
+```yaml
+memory:
+  storage_class: mypackage.storage.RedisMemoryStorage
+```
+
+如果配置的类无法加载，系统回退到默认的 `FileMemoryStorage` 并记录错误。
+
+## 禁用记忆
+
+完全禁用记忆：
+
+```yaml
+memory:
+  enabled: false
+```
+
+保留记忆存储但阻止注入到系统提示：
+
+```yaml
+memory:
+  enabled: true
+  injection_enabled: false
+```
+
+<Cards num={2}>
+  <Cards.Card title="配置" href="/docs/harness/configuration" />
+  <Cards.Card title="中间件" href="/docs/harness/middlewares" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/middlewares.mdx b/frontend/src/content/zh/harness/middlewares.mdx
new file mode 100644
index 000000000..94b3571c7
--- /dev/null
+++ b/frontend/src/content/zh/harness/middlewares.mdx
@@ -0,0 +1,201 @@
+---
+title: 中间件
+description: 每次 Lead Agent 调用 LLM 时，都会先后执行一条**中间件链**。中间件可以读取和修改 Agent 的状态、向系统提示注入内容、拦截工具调用，并对模型输出做出反应。
+---
+
+import { Callout } from "nextra/components";
+
+# 中间件
+
+<Callout type="info" emoji="🔌">
+  中间件包裹 Lead Agent 中的每次 LLM 调用。它们是添加跨领域行为（如记忆、摘要压缩、澄清和 token 追踪）的主要扩展点。
+</Callout>
+
+每次 Lead Agent 调用 LLM 时，都会先后执行一条**中间件链**。中间件可以读取和修改 Agent 的状态、向系统提示注入内容、拦截工具调用，并对模型输出做出反应。
+
+这种设计使 Agent 核心保持简单稳定，同时允许丰富的可组合行为分层叠加。
+
+## 链的工作方式
+
+中间件链在每次 Agent 调用时根据当前配置和请求参数构建一次。中间件按定义的顺序运行：
+
+1. 运行时中间件（错误处理、线程数据、上传、悬空工具调用修补）
+2. `SummarizationMiddleware` — 上下文压缩（如果启用）
+3. `TodoMiddleware` — 任务列表管理（仅计划模式）
+4. `TokenUsageMiddleware` — token 追踪（如果启用）
+5. `TitleMiddleware` — 自动生成线程标题
+6. `MemoryMiddleware` — 跨会话记忆注入和队列
+7. `ViewImageMiddleware` — 图像细节注入（如果模型支持视觉）
+8. `DeferredToolFilterMiddleware` — 隐藏延迟工具 schema（如果启用工具搜索）
+9. `SubagentLimitMiddleware` — 限制并行子 Agent 调用（如果启用子 Agent）
+10. `LoopDetectionMiddleware` — 打破重复工具调用循环
+11. 自定义中间件（如有）
+12. `ClarificationMiddleware` — 拦截澄清请求（始终最后）
+
+顺序很重要。摘要压缩在早期运行以在其他处理之前减少上下文。澄清总是最后运行，这样它可以在所有其他中间件完成后拦截。
+
+## 中间件参考
+
+### ClarificationMiddleware
+
+拦截澄清工具调用，并将其转换为面向用户的信息请求。当模型决定在继续之前需要询问用户某事时，此中间件会将该请求呈现出来。
+
+**配置**：由 `guardrails.clarification` 设置控制。
+
+---
+
+### LoopDetectionMiddleware
+
+检测 Agent 是否在没有取得进展的情况下重复进行相同的工具调用。检测到循环时，中间件会介入打破循环，防止 Agent 无限消耗轮次。
+
+**配置**：内置，无需用户配置。
+
+---
+
+### MemoryMiddleware
+
+在每次对话开始时读取持久化记忆事实并将其注入到系统提示中。对话结束后，将后台更新排队，以将新信息纳入记忆存储。
+
+**配置**：参见[记忆系统](/docs/harness/memory)页面和 `config.yaml` 中的 `memory:` 部分。
+
+```yaml
+memory:
+  enabled: true
+  injection_enabled: true
+  max_injection_tokens: 2000
+  debounce_seconds: 30
+```
+
+---
+
+### SubagentLimitMiddleware
+
+限制 Agent 在单次轮次中可以进行的并行子 Agent 任务调用数量。这防止 Agent 生成无限数量的并发子 Agent。
+
+**配置**：每次请求配置中的 `subagent_enabled` 和 `max_concurrent_subagents`。
+
+---
+
+### TitleMiddleware
+
+在第一次交互后自动为线程生成标题。标题从用户的第一条消息和 Agent 的响应中派生。
+
+**配置**：`config.yaml` 中的 `title:` 部分。
+
+```yaml
+title:
+  enabled: true
+  max_words: 6
+  max_chars: 60
+  model_name: null  # 使用默认模型
+```
+
+---
+
+### TodoMiddleware
+
+当计划模式激活时，维护一个对用户可见的结构化任务列表。Agent 使用 `write_todos` 工具，随着完成复杂目标，将任务标记为 `pending`、`in_progress` 或 `completed`。
+
+**激活**：在请求配置中设置 `is_plan_mode: true` 时自动启用。不需要 `config.yaml` 条目。
+
+---
+
+### TokenUsageMiddleware
+
+追踪每次模型调用的 LLM token 消耗，并以 `info` 级别记录。对于监控成本和了解长时序任务中 token 的使用位置很有帮助。
+
+**配置**：`config.yaml` 中的 `token_usage:` 部分。
+
+```yaml
+token_usage:
+  enabled: false
+```
+
+---
+
+### SummarizationMiddleware
+
+当对话变长时，对旧消息进行摘要以减少上下文大小。摘要被注入回对话，替代原始消息，在不需要完整 token 成本的情况下保留含义。
+
+**配置**：`config.yaml` 中的 `summarization:` 部分。详见下方详细配置。
+
+---
+
+### ViewImageMiddleware
+
+当当前模型支持视觉（`supports_vision: true`）时，此中间件拦截 `view_image` 工具调用，并将图像内容直接注入到模型的上下文中以便分析。
+
+**激活**：当解析的模型具有 `supports_vision: true` 时自动启用。
+
+---
+
+### DeferredToolFilterMiddleware
+
+当工具搜索启用时，此中间件从模型上下文中隐藏延迟工具 schema。工具通过 `tool_search` 工具按需发现，而不是预先全部列出，从而减少上下文使用。
+
+**配置**：`config.yaml` 中的 `tool_search.enabled: true`。
+
+## 摘要压缩配置详解
+
+`SummarizationMiddleware` 是长时序任务中影响最大的中间件之一。完整配置参考如下：
+
+```yaml
+summarization:
+  enabled: true
+
+  # 摘要使用的模型（null = 使用默认模型）
+  # 推荐使用轻量级、经济的模型，如 "gpt-4o-mini"
+  model_name: null
+
+  # 触发条件——满足任意一个条件时运行摘要
+  trigger:
+    - type: tokens      # 当上下文超过 N 个 token 时触发
+      value: 15564
+    # - type: messages  # 当消息数超过 N 时触发
+    #   value: 50
+    # - type: fraction  # 当上下文达到模型最大输入的 X% 时触发
+    #   value: 0.8
+
+  # 摘要后保留多少最近历史
+  keep:
+    type: messages
+    value: 10           # 保留最近 10 条消息
+    # 或者按 token 保留：
+    # type: tokens
+    # value: 3000
+
+  # 为摘要器准备消息时要裁剪的最大 token 数
+  trim_tokens_to_summarize: 15564
+
+  # 自定义摘要提示词（null = 使用默认 LangChain 提示词）
+  summary_prompt: null
+```
+
+**触发类型**：
+- `tokens`：当对话中总 token 数超过 `value` 时触发。
+- `messages`：当消息数超过 `value` 时触发。
+- `fraction`：当上下文达到模型最大输入 token 限制的 `value` 比例时触发。
+
+**保留类型**：
+- `messages`：摘要后保留最后 `value` 条消息。
+- `tokens`：保留最近 `value` 个 token 的历史。
+- `fraction`：保留模型最大输入 token 限制的 `value` 比例的最近历史。
+
+## 编写自定义中间件
+
+自定义中间件可以注入到链中用于专业用途。中间件必须实现 `langchain.agents.middleware` 中的 `AgentMiddleware` 接口：
+
+```python
+from langchain.agents.middleware import AgentMiddleware
+
+class MyMiddleware(AgentMiddleware):
+    async def on_start(self, state, config):
+        # 在模型调用前运行
+        return state, config
+
+    async def on_end(self, state, config):
+        # 在模型调用后运行
+        return state, config
+```
+
+自定义中间件在链末尾 `ClarificationMiddleware` 之前注入。
diff --git a/frontend/src/content/zh/harness/quick-start.mdx b/frontend/src/content/zh/harness/quick-start.mdx
new file mode 100644
index 000000000..bf335e392
--- /dev/null
+++ b/frontend/src/content/zh/harness/quick-start.mdx
@@ -0,0 +1,113 @@
+---
+title: 快速上手
+description: 学习如何使用 create_deerflow_agent 创建并运行 DeerFlow Agent，从模型初始化到流式响应。
+---
+
+import { Callout, Cards, Steps } from "nextra/components";
+
+# 快速上手
+
+<Callout type="info" emoji="🚀">
+  本指南介绍如何在 Python 中通过 <code>create_deerflow_agent</code>
+  创建并运行一个 DeerFlow Agent。
+</Callout>
+
+理解 DeerFlow Harness 的最快方式，是直接在代码里创建一个 Agent。本快速上手指南将带你完成模型初始化、Agent 创建，以及响应流式输出。
+
+## 前置条件
+
+DeerFlow Harness 需要 Python 3.12 或更高版本。该包是 `deerflow` 代码库的一部分，位于 `backend/packages/harness` 下。
+
+如果你从仓库克隆开始：
+
+```bash
+cd backend
+uv sync
+```
+
+你还需要准备一个来自对应 LangChain Provider 包的聊天模型实例。
+
+## 创建第一个 Agent
+
+<Steps>
+
+### 导入工厂函数与模型类
+
+```python
+from deerflow.agents import create_deerflow_agent
+from langchain_openai import ChatOpenAI
+```
+
+### 创建模型
+
+```python
+model = ChatOpenAI(
+    model="gpt-4o",
+    api_key="YOUR_OPENAI_API_KEY",
+)
+```
+
+### 创建 Agent
+
+```python
+agent = create_deerflow_agent(model)
+```
+
+这会返回一个已经编译好的 LangGraph Agent，并带有 DeerFlow 默认的中间件链。
+
+### 流式获取响应
+
+```python
+for event in agent.stream(
+    {"messages": [{"role": "user", "content": "解释一下 DeerFlow Harness 是什么。"}]},
+    stream_mode=["messages", "values"],
+):
+    print(event)
+```
+
+</Steps>
+
+## 添加工具或行为
+
+你可以通过传入工具、系统提示词、运行时特性、中间件或 checkpointer 来自定义 Agent。
+
+```python
+from deerflow.agents import RuntimeFeatures, create_deerflow_agent
+
+agent = create_deerflow_agent(
+    model,
+    system_prompt="你是一个简洁的研究助手。",
+    features=RuntimeFeatures(subagent=True, memory=False),
+    plan_mode=True,
+    name="research-agent",
+)
+```
+
+常用参数：
+
+| 参数 | 说明 |
+|---|---|
+| `tools` | 提供给 Agent 的额外工具 |
+| `system_prompt` | 自定义系统提示词 |
+| `features` | 启用或替换内置运行时能力 |
+| `extra_middleware` | 将自定义中间件插入默认链路 |
+| `plan_mode` | 启用 Todo 风格的任务跟踪 |
+| `checkpointer` | 为多轮运行持久化状态 |
+| `name` | Agent 的逻辑名称 |
+
+## 什么时候使用 DeerFlowClient
+
+如果你想直接操作底层的编译后 Agent 图，使用 `create_deerflow_agent()`。
+
+如果你想使用更高层的嵌入式应用接口，则应使用 `DeerFlowClient`，例如：
+
+- 面向线程的对话封装，
+- 模型 / 技能 / 记忆管理 API，
+- 文件上传与 artifacts，
+- 与 Gateway 一致的返回格式。
+
+<Cards num={3}>
+  <Cards.Card title="设计理念" href="/docs/harness/design-principles" />
+  <Cards.Card title="Lead Agent" href="/docs/harness/lead-agent" />
+  <Cards.Card title="配置" href="/docs/harness/configuration" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/sandbox.mdx b/frontend/src/content/zh/harness/sandbox.mdx
new file mode 100644
index 000000000..16a05f9e7
--- /dev/null
+++ b/frontend/src/content/zh/harness/sandbox.mdx
@@ -0,0 +1,144 @@
+---
+title: 沙箱
+description: 沙箱为 Lead Agent 提供一个受控环境，在其中可以读取文件、写入输出、运行 Shell 命令并生成产出物。没有沙箱，Agent 只能生成文本；有了沙箱，它可以编写和执行代码、处理数据文件、生成图表并构建交付物。
+---
+
+import { Callout, Cards, Tabs } from "nextra/components";
+
+# 沙箱
+
+<Callout type="info" emoji="📦">
+  沙箱是 Agent 进行文件和命令操作的隔离工作区。它让 DeerFlow 能够采取真实行动，而不仅仅是对话。
+</Callout>
+
+沙箱为 Lead Agent 提供一个受控环境，在其中可以读取文件、写入输出、运行 Shell 命令并生成产出物。没有沙箱，Agent 只能生成文本；有了沙箱，它可以编写和执行代码、处理数据文件、生成图表并构建交付物。
+
+## 沙箱模式
+
+DeerFlow 支持三种沙箱模式，选择适合你部署的一种：
+
+### LocalSandbox（默认）
+
+命令直接在主机机器的文件系统上运行，没有容器隔离。
+
+- **适合**：受信任的单用户本地开发工作流。
+- **风险**：Agent 可以访问主机文件系统。默认使用 `allow_host_bash: false` 防止任意命令执行。
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+  allow_host_bash: false  # 默认；仅对完全受信任的工作流设置为 true
+```
+
+### 基于容器的 AIO 沙箱
+
+命令在隔离容器中运行（Linux/Windows 上的 Docker，macOS 上的 Apple Container）。每个沙箱会话获得一个全新的容器环境。
+
+- **适合**：多用户环境、生产部署，或任何需要执行隔离的场景。
+
+```yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+
+  # 可选：容器镜像（下方显示默认值）
+  image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest
+
+  # 可选：最大并发容器数（默认：3，超出时 LRU 淘汰）
+  replicas: 3
+
+  # 可选：空闲超时（秒，默认：600）
+  idle_timeout: 600
+
+  # 可选：自定义挂载
+  mounts:
+    - host_path: /path/on/host
+      container_path: /home/user/shared
+      read_only: false
+```
+
+安装：`cd backend && uv add 'deerflow-harness[aio-sandbox]'`
+
+### Provisioner 管理的沙箱（Kubernetes）
+
+每个沙箱在 Kubernetes 集群中获得一个专用 Pod，由 Provisioner 服务管理。这提供最强的隔离性，适合有多个并发用户的生产环境。
+
+```yaml
+sandbox:
+  use: deerflow.community.aio_sandbox:AioSandboxProvider
+  provisioner_url: http://provisioner:8002
+```
+
+## 路径映射
+
+沙箱使用路径映射来桥接主机文件系统和容器的虚拟文件系统。始终配置两个关键映射：
+
+| 主机路径 | 容器路径 | 访问权限 |
+|---|---|---|
+| `skills/`（来自 `skills.path`） | `/mnt/skills`（来自 `skills.container_path`） | 只读 |
+| `.deer-flow/threads/{thread_id}/user-data/` | `/mnt/user-data/` | 读写 |
+
+技能目录始终以只读方式挂载。线程将其工作数据（上传文件、输出、中间文件）写入 `/mnt/user-data/`。
+
+### 自定义挂载
+
+你可以为本地沙箱使用 `mounts:` 配置添加额外挂载：
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+  mounts:
+    - host_path: /home/user/my-project
+      container_path: /mnt/my-project
+      read_only: true
+```
+
+<Callout type="warning">
+  自定义挂载的 <code>container_path</code> 不能与保留前缀冲突：
+  <code>/mnt/skills</code>、<code>/mnt/acp-workspace</code> 或 <code>/mnt/user-data</code>。
+</Callout>
+
+## 输出截断
+
+沙箱工具限制输出大小以保持 Agent 上下文可控。这些限制可配置：
+
+```yaml
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+  # bash 使用中间截断（头部 + 尾部）
+  bash_output_max_chars: 20000
+
+  # read_file 使用头部截断
+  read_file_output_max_chars: 50000
+
+  # ls 使用头部截断
+  ls_output_max_chars: 20000
+```
+
+设置为 `0` 禁用截断。
+
+## 安全性
+
+### LocalSandbox
+
+`LocalSandbox` 直接在主机上运行命令。默认情况下，`bash` 工具**被禁用**以防止任意主机命令执行。仅对完全受信任的单用户工作流启用它：
+
+```yaml
+sandbox:
+  allow_host_bash: true  # 危险：授予 Agent 对你机器的 Shell 访问权限
+```
+
+即使没有 `bash`，Agent 也可以通过专用文件工具读写文件。
+
+### 容器沙箱
+
+基于容器的沙箱提供文件系统和进程隔离。Agent 看不到或修改主机文件系统，除非通过显式挂载。Provisioner 管理模式增加了额外一层：每个线程获得自己的隔离 Pod。
+
+### 审计中间件
+
+`SandboxAuditMiddleware` 在每次 Agent 轮次上运行，记录所有沙箱操作，提供会话期间访问了哪些文件、运行了哪些命令的审计跟踪。
+
+<Cards num={2}>
+  <Cards.Card title="工具" href="/docs/harness/tools" />
+  <Cards.Card title="子 Agent" href="/docs/harness/subagents" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/skills.mdx b/frontend/src/content/zh/harness/skills.mdx
new file mode 100644
index 000000000..f0f0aa659
--- /dev/null
+++ b/frontend/src/content/zh/harness/skills.mdx
@@ -0,0 +1,156 @@
+---
+title: 技能
+description: 技能不仅仅是提示词。它是一个自包含的能力包，可以包含结构化指令、分步工作流、领域最佳实践、支撑资源和工具配置。技能按需加载——在任务需要时注入内容，否则不影响上下文。
+---
+
+import { Callout, Cards, FileTree, Steps } from "nextra/components";
+
+# 技能
+
+<Callout type="info" emoji="🎯">
+  技能是面向任务的能力包，教会 Agent 如何完成特定类型的工作。基础 Agent 保持通用；技能在需要时提供专业化。
+</Callout>
+
+技能不仅仅是提示词。它是一个自包含的能力包，可以包含结构化指令、分步工作流、领域最佳实践、支撑资源和工具配置。技能按需加载——在任务需要时注入内容，否则不影响上下文。
+
+## 技能包含什么
+
+每个技能位于 `skills/public/`（或用户创建技能的 `skills/custom/`）下自己的子目录中。目录包含一个 `SKILL.md` 文件，定义技能的元数据、指令和工作流。
+
+<FileTree>
+  <FileTree.Folder name="skills/" defaultOpen>
+    <FileTree.Folder name="public/" defaultOpen>
+      <FileTree.Folder name="deep-research/" defaultOpen>
+        <FileTree.File name="SKILL.md" />
+      </FileTree.Folder>
+      <FileTree.Folder name="data-analysis/">
+        <FileTree.File name="SKILL.md" />
+      </FileTree.Folder>
+    </FileTree.Folder>
+    <FileTree.Folder name="custom/">
+      <FileTree.File name="（你的自定义技能放在这里）" />
+    </FileTree.Folder>
+  </FileTree.Folder>
+</FileTree>
+
+`SKILL.md` 文件是技能的权威定义，由 `skills/parser.py` 解析以提取技能名称、描述、类别、指令以及任何依赖项或工具需求。
+
+## 内置技能
+
+DeerFlow 内置以下公共技能：
+
+| 技能 | 描述 |
+|---|---|
+| `deep-research` | 带来源收集、交叉验证和结构化输出的多步骤研究 |
+| `data-analysis` | 数据探索、统计分析和洞察生成 |
+| `chart-visualization` | 从数据创建图表和可视化 |
+| `ppt-generation` | 演示文稿幻灯片生成 |
+| `image-generation` | AI 图像生成工作流 |
+| `code-documentation` | 自动化代码文档生成 |
+| `newsletter-generation` | 新闻简报内容创作 |
+| `podcast-generation` | 播客脚本和大纲生成 |
+| `academic-paper-review` | 结构化学术论文分析 |
+| `consulting-analysis` | 商业咨询框架和分析 |
+| `systematic-literature-review` | 文献综述方法论和综合 |
+| `github-deep-research` | 仓库和代码深度研究 |
+| `frontend-design` | 前端设计和 UI 工作流 |
+| `web-design-guidelines` | 网页设计标准和审查 |
+| `video-generation` | 视频内容规划和生成 |
+
+## 技能生命周期
+
+<Steps>
+
+### 发现和加载
+
+`skills/loader.py` 中的 `load_skills()` 扫描配置技能路径下的 `public/` 和 `custom/` 目录。它每次调用都重新读取 `ExtensionsConfig.from_file()`，这意味着通过 Gateway API 启用或禁用技能会立即在运行中的 LangGraph 服务器中生效，无需重启。
+
+### 解析
+
+`parser.py` 读取每个 `SKILL.md` 文件并提取结构化元数据：名称、描述、类别、指令以及任何工具或资源需求。
+
+### 安全扫描
+
+`security_scanner.py` 在技能内容加载到 Agent 上下文之前检查潜在危险模式，防止恶意技能内容被注入。
+
+### 依赖安装
+
+`installer.py` 处理技能声明的任何 Python 或系统依赖项，在技能首次加载时安装到运行时环境中。
+
+### 上下文注入
+
+当 Agent 以特定技能在范围内调用时，技能的指令被注入到系统提示中。Agent 在该对话期间可以访问技能的工作流、最佳实践和领域知识。
+
+</Steps>
+
+## 配置
+
+技能系统在 `config.yaml` 的 `skills:` 下配置：
+
+```yaml
+skills:
+  # 主机上的技能目录路径。
+  # 默认：相对于后端目录的 ../skills
+  # 取消注释以自定义：
+  # path: /absolute/path/to/custom/skills
+
+  # 在沙箱容器中挂载技能的路径
+  container_path: /mnt/skills
+```
+
+`container_path` 很重要：它告诉 Agent 在沙箱内哪里找到技能文件。Harness 自动将主机技能目录挂载到这个容器路径。
+
+## 启用和禁用技能
+
+技能可用性在 `extensions_config.json` 中跟踪（独立于 `config.yaml`）。你可以管理技能状态：
+
+- **通过 DeerFlow 应用界面**：技能面板允许你切换技能的启用/禁用状态。
+- **通过 Gateway API**：`POST /api/extensions/skills/{name}/enable` 和 `/disable`。
+- **直接编辑 `extensions_config.json`**。
+
+由于 `load_skills()` 每次调用都重新读取扩展配置，更改立即生效——无需重启服务器。
+
+## 按自定义 Agent 限制技能
+
+自定义 Agent 可以被限制为特定技能子集。在 Agent 的配置中（存储在 `agents/{name}/config.yaml`），设置 `skills` 列表：
+
+```yaml
+# agents/my-researcher/config.yaml
+name: my-researcher
+skills:
+  - deep-research
+  - academic-paper-review
+```
+
+- **省略或 null**：Agent 加载所有全局启用的技能。
+- **空列表 `[]`**：Agent 没有技能。
+- **命名列表**：Agent 只加载那些特定技能。
+
+## 技能进化
+
+DeerFlow 包含一个可选的**技能进化**功能，允许 Agent 在 `skills/custom/` 目录中自主创建和改进技能：
+
+```yaml
+skill_evolution:
+  enabled: false  # 设为 true 允许 Agent 管理技能创建
+  moderation_model_name: null  # 安全扫描模型（null = 使用默认模型）
+```
+
+<Callout type="warning">
+  只在你信任 Agent 输出的环境中启用技能进化。新创建的技能在加载前会经过安全扫描，但该功能给予 Agent 对技能目录的写访问权限。
+</Callout>
+
+## 编写自定义技能
+
+要创建自定义技能：
+
+1. 在 `skills/custom/your-skill-name/` 下创建新目录
+2. 添加定义技能元数据和指令的 `SKILL.md` 文件
+3. 技能将在下次 `load_skills()` 调用时自动被发现
+
+`SKILL.md` 格式遵循与内置技能相同的结构。使用现有公共技能之一作为预期格式的参考。
+
+<Cards num={2}>
+  <Cards.Card title="沙箱" href="/docs/harness/sandbox" />
+  <Cards.Card title="工具" href="/docs/harness/tools" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/subagents.mdx b/frontend/src/content/zh/harness/subagents.mdx
new file mode 100644
index 000000000..929dfccd4
--- /dev/null
+++ b/frontend/src/content/zh/harness/subagents.mdx
@@ -0,0 +1,124 @@
+---
+title: 子 Agent
+description: 当一个任务对单个推理线程来说太宽泛，或者部分任务可以并行完成时，Lead Agent 将工作委派给**子 Agent**。子 Agent 是一个独立的 Agent 调用，接收特定任务、执行并返回结果。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 子 Agent
+
+<Callout type="info" emoji="👥">
+  子 Agent 是 Lead Agent 委派子任务的专注执行者。它们以隔离的上下文运行，在处理并行或专业工作的同时保持主对话清晰。
+</Callout>
+
+当一个任务对单个推理线程来说太宽泛，或者部分任务可以并行完成时，Lead Agent 将工作委派给**子 Agent**。子 Agent 是一个独立的 Agent 调用，接收特定任务、执行并返回结果。
+
+## 为什么子 Agent 很重要
+
+子 Agent 解决了长时序工作流中的两个关键问题：
+
+1. **上下文隔离**：子 Agent 只看到完成其任务所需的信息，而不是整个父对话。这保持了每个 Agent 的工作上下文专注且可控。
+2. **并行性**：多个子 Agent 可以并发运行，允许任务的独立部分（例如同时研究多个话题）并行处理。
+
+## 内置子 Agent
+
+DeerFlow 内置两个子 Agent：
+
+### general-purpose
+
+通用推理和执行 Agent，适合委派需要多步骤推理、网络搜索、文件操作和产出物生成的复杂子任务。
+
+- **默认超时**：900 秒（15 分钟）
+- **默认最大轮次**：160
+
+### bash
+
+专门用于在沙箱内执行命令行任务的子 Agent，适合脚本编写、数据处理、文件转换和环境设置任务。
+
+- **默认超时**：900 秒（15 分钟）
+- **默认最大轮次**：80
+- **可用性**：仅当沙箱的 `bash` 工具可用时才暴露（`allow_host_bash: true` 或配置了容器沙箱）
+
+## 委派流程
+
+Lead Agent 使用内置 `task` 工具将工作委派给子 Agent：
+
+```
+task(
+  agent="general-purpose",
+  task="研究 Acme Corp 的前 5 个竞争对手并总结其定价",
+  context="专注于 B2B SaaS 定价模型"
+)
+```
+
+运行时然后：
+
+1. 从注册表查找子 Agent 配置，应用任何 `config.yaml` 覆盖。
+2. 用子 Agent 自己的提示词和工具创建新的 Agent 调用。
+3. 将子 Agent 运行到完成（或直到超时/最大轮次）。
+4. 将子 Agent 的最终输出作为工具结果返回给 Lead Agent。
+
+## 配置
+
+子 Agent 超时和最大轮次通过 `config.yaml` 中的 `subagents:` 部分控制：
+
+```yaml
+subagents:
+  # 所有子 Agent 的默认超时（秒，默认：900 = 15 分钟）
+  timeout_seconds: 900
+
+  # 可选：覆盖所有子 Agent 的最大轮次
+  # max_turns: 120
+
+  # 可选：按 Agent 覆盖
+  agents:
+    general-purpose:
+      timeout_seconds: 1800  # 复杂任务 30 分钟
+      max_turns: 160
+    bash:
+      timeout_seconds: 300   # 快速命令 5 分钟
+      max_turns: 80
+```
+
+按 Agent 覆盖优先于全局 `timeout_seconds` 和 `max_turns` 设置。
+
+## 并发限制
+
+`SubagentLimitMiddleware` 控制 Lead Agent 在单次轮次中可以并行调用多少个子 Agent，通过每次请求的配置控制：
+
+- `subagent_enabled`：是否为此会话激活子 Agent 委派
+- `max_concurrent_subagents`：单次轮次中最大并行任务调用数（默认：3）
+
+如果 Agent 尝试调用超过限制的子 Agent，中间件会裁剪多余的调用。
+
+## ACP Agent（外部 Agent）
+
+除内置子 Agent 外，DeerFlow 还通过 **Agent Connect Protocol (ACP)** 支持委派给外部 Agent。ACP 允许 DeerFlow 调用作为独立进程运行的 Agent（包括用 ACP 适配器包装的第三方 CLI 工具）。
+
+在 `config.yaml` 中配置 ACP Agent：
+
+```yaml
+acp_agents:
+  claude_code:
+    command: npx
+    args: ["-y", "@zed-industries/claude-agent-acp"]
+    description: 用于实现、重构和调试的 Claude Code
+    model: null
+
+  codex:
+    command: npx
+    args: ["-y", "@zed-industries/codex-acp"]
+    description: 用于仓库任务和代码生成的 Codex CLI
+    model: null
+```
+
+Lead Agent 通过 `invoke_acp_agent` 内置工具调用 ACP Agent。
+
+<Callout type="tip">
+  ACP Agent 作为 DeerFlow 管理的子进程运行，通过 ACP 协议通信。标准 CLI 工具（如原始的 `claude` 或 `codex` 命令）默认不兼容 ACP——请使用上面列出的适配器包或兼容的 ACP 封装器。
+</Callout>
+
+<Cards num={2}>
+  <Cards.Card title="沙箱" href="/docs/harness/sandbox" />
+  <Cards.Card title="MCP 集成" href="/docs/harness/mcp" />
+</Cards>
diff --git a/frontend/src/content/zh/harness/tools.mdx b/frontend/src/content/zh/harness/tools.mdx
new file mode 100644
index 000000000..b23d25f87
--- /dev/null
+++ b/frontend/src/content/zh/harness/tools.mdx
@@ -0,0 +1,210 @@
+---
+title: 工具
+description: Lead Agent 是一个工具调用 Agent。工具是它与世界交互的方式：搜索网络、读写文件、运行命令、委派任务以及向用户呈现输出。
+---
+
+import { Callout, Cards, Tabs } from "nextra/components";
+
+# 工具
+
+<Callout type="info" emoji="🔧">
+  工具是 Lead Agent 可以采取的行动。DeerFlow 提供内置工具、社区集成、MCP 工具和技能工具——全部通过 <code>config.yaml</code> 控制。
+</Callout>
+
+Lead Agent 是一个工具调用 Agent。工具是它与世界交互的方式：搜索网络、读写文件、运行命令、委派任务以及向用户呈现输出。
+
+DeerFlow 将工具分为四类：
+
+1. **内置工具** — 核心运行时能力，始终对 Agent 可用
+2. **社区工具** — 与外部搜索、抓取和图像服务的集成
+3. **MCP 工具** — 由外部 Model Context Protocol 服务器提供的工具
+4. **技能工具** — 与特定技能包捆绑的工具
+
+## 内置工具
+
+内置工具是 Harness 的一部分，无需配置即可使用。
+
+### task
+
+将子任务委派给子 Agent。当任务对单个推理线程来说太宽泛，或并行工作有利时，Lead Agent 使用此工具。
+
+```
+task(agent="general-purpose", task="...", context="...")
+```
+
+参见[子 Agent](/docs/harness/subagents)页面了解子 Agent 的配置方式。
+
+---
+
+### present_files
+
+将输出文件作为产出物呈现给用户。Agent 在生成文件（报告、图表、代码等）后调用此工具，将其显示在对话中。
+
+---
+
+### view_image
+
+读取图像文件并将其内容注入到模型的上下文中进行视觉分析。仅当活跃模型具有 `supports_vision: true` 时可用。
+
+---
+
+### clarification
+
+在继续之前向用户提问。当模型认为没有足够信息来行动时，由 `ClarificationMiddleware` 触发。
+
+---
+
+### setup_agent
+
+动态配置当前 Agent 会话。在设置新自定义 Agent 的引导流程中使用。
+
+---
+
+### invoke_acp_agent
+
+使用 [Agent Connect Protocol (ACP)](https://agentconnectprotocol.org/) 调用外部 Agent。需要在 `config.yaml` 中配置 `acp_agents:`。参见[子 Agent](/docs/harness/subagents)页面了解 ACP 配置。
+
+---
+
+### tool_search
+
+按名称或描述搜索工具，并按需将其加载到 Agent 上下文中。仅当 `config.yaml` 中 `tool_search.enabled: true` 时激活。当 MCP 或其他工具集暴露大量工具且你希望减少上下文使用时很有用。
+
+## 沙箱文件工具
+
+以下工具与沙箱文件系统交互，需要配置并激活沙箱。
+
+| 工具 | 描述 |
+|---|---|
+| `ls` | 列出目录中的文件 |
+| `read_file` | 读取文件内容 |
+| `glob` | 查找匹配模式的文件 |
+| `grep` | 搜索文件内容 |
+| `write_file` | 向文件写入内容 |
+| `str_replace` | 替换文件中的字符串 |
+| `bash` | 执行 Shell 命令（需要 `allow_host_bash: true` 或容器沙箱） |
+
+在 `config.yaml` 的 `tools:` 下配置：
+
+```yaml
+tools:
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:glob_tool
+  - use: deerflow.sandbox.tools:grep_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:str_replace_tool
+  - use: deerflow.sandbox.tools:bash_tool
+```
+
+## 社区工具
+
+社区工具将 Agent 连接到外部服务。在 `config.yaml` 的 `tools:` 下使用 `use:` 字段指定实现来配置。
+
+### 网络搜索
+
+<Tabs items={["DuckDuckGo（默认）", "Tavily", "Exa", "Firecrawl"]}>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+```
+无需 API Key。默认配置，适合开发和通用用途。
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.tavily.tools:web_search_tool
+    api_key: $TAVILY_API_KEY
+```
+高质量搜索，带结构化结果。需要 [Tavily](https://tavily.com) API Key。
+
+安装：`cd backend && uv add 'deerflow-harness[tavily]'`
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.exa.tools:web_search_tool
+    api_key: $EXA_API_KEY
+```
+带神经检索的语义搜索。需要 [Exa](https://exa.ai) API Key。
+
+安装：`cd backend && uv add 'deerflow-harness[exa]'`
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.firecrawl.tools:web_search_tool
+    api_key: $FIRECRAWL_API_KEY
+```
+Firecrawl 驱动的搜索和爬取。需要 [Firecrawl](https://firecrawl.dev) API Key。
+  </Tabs.Tab>
+</Tabs>
+
+### 网页内容抓取
+
+<Tabs items={["Jina AI（默认）", "Exa"]}>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+    api_key: $JINA_API_KEY  # 可选；匿名使用有速率限制
+```
+将网页转换为干净的 Markdown。无 API Key 也可使用，但有更严格的速率限制。
+  </Tabs.Tab>
+  <Tabs.Tab>
+```yaml
+tools:
+  - use: deerflow.community.exa.tools:web_fetch_tool
+    api_key: $EXA_API_KEY
+```
+  </Tabs.Tab>
+</Tabs>
+
+### 图像搜索
+
+```yaml
+tools:
+  - use: deerflow.community.image_search.tools:image_search_tool
+```
+
+## 工具组
+
+工具组允许你将工具组织为命名集合，并限制自定义 Agent 可以访问哪些组：
+
+```yaml
+tool_groups:
+  - name: research
+    tools:
+      - web_search
+      - web_fetch
+      - image_search
+  - name: coding
+    tools:
+      - bash
+      - read_file
+      - write_file
+      - str_replace
+      - glob
+      - grep
+```
+
+自定义 Agent 然后可以在其配置中按名称引用组，将其工具访问限制为仅相关集合。
+
+## 工具搜索（延迟加载）
+
+当有许多工具时（特别是来自多个 MCP 服务器），预先加载所有工具会增加上下文使用量并可能混淆模型。工具搜索功能解决了这个问题：
+
+```yaml
+tool_search:
+  enabled: true
+```
+
+启用后，工具不会直接列在模型上下文中。相反，它们在运行时通过 `tool_search` 内置工具按需发现，Agent 按名称或描述搜索，匹配的工具按需加载到上下文中。
+
+当 MCP 服务器暴露数十个工具时特别有用。
+
+<Cards num={2}>
+  <Cards.Card title="MCP 集成" href="/docs/harness/mcp" />
+  <Cards.Card title="技能" href="/docs/harness/skills" />
+</Cards>
diff --git a/frontend/src/content/zh/index.mdx b/frontend/src/content/zh/index.mdx
index 08d2d3557..55ea93007 100644
--- a/frontend/src/content/zh/index.mdx
+++ b/frontend/src/content/zh/index.mdx
@@ -1,6 +1,66 @@
 ---
-title: 概览
+title: DeerFlow 文档
 description: 了解 DeerFlow，使用 Harness 构建，并部署应用。
 ---
 
-# 概览
+# DeerFlow 文档
+
+DeerFlow 是一个用于构建和运行 Agent 系统的框架。它提供了一个运行时 Harness，可以将 Agent 与记忆、工具、技能、沙箱和子 Agent 组合在一起；同时还提供了一个应用层，将这些能力转化为可用的产品体验。
+
+本文档围绕这两个部分组织：
+
+- **DeerFlow Harness**：用于构建 Agent 系统的核心 SDK 和运行时层。
+- **DeerFlow 应用**：构建在 Harness 之上的参考应用，用于部署、运维和终端用户工作流。
+
+如果你想了解 DeerFlow 的工作原理，从简介开始阅读。如果你想基于核心运行时进行开发，请查阅 Harness 文档。如果你想将 DeerFlow 作为应用部署和使用，请查阅应用文档。
+
+## 从这里开始
+
+### 如果你是 DeerFlow 新手
+
+先从概念概述开始。
+
+- [简介](/docs/introduction)
+- [为什么选择 DeerFlow](/docs/introduction/why-deerflow)
+- [Harness 与应用的区别](/docs/introduction/harness-vs-app)
+
+### 如果你想基于 DeerFlow 进行开发
+
+从 Harness 章节开始。这条路径适合想将 DeerFlow 功能集成到自己系统中，或基于 DeerFlow 运行时构建自定义 Agent 产品的团队。
+
+- [DeerFlow Harness](/docs/harness)
+- [快速上手](/docs/harness/quick-start)
+- [配置](/docs/harness/configuration)
+- [自定义与扩展](/docs/harness/customization)
+
+### 如果你想部署和使用 DeerFlow
+
+从应用章节开始。这条路径适合想将 DeerFlow 作为完整应用运行，并了解如何配置、运维和实际使用的团队。
+
+- [DeerFlow 应用](/docs/application)
+- [快速上手](/docs/application/quick-start)
+- [部署指南](/docs/application/deployment-guide)
+- [工作区使用](/docs/application/workspace-usage)
+
+## 文档结构
+
+### 简介
+
+简介章节帮助你在深入实现细节之前建立正确的思维模型。
+
+### DeerFlow Harness
+
+Harness 章节是技术文档的核心，面向想要构建基于 DeerFlow 系统的开发者。
+
+### DeerFlow 应用
+
+应用章节面向想要将 DeerFlow 作为可用产品部署的团队。
+
+### 教程
+
+教程章节提供实践性的、面向任务的学习内容。
+
+### 参考
+
+参考章节提供详细的查阅资料，包括配置、运行时模式、API 和代码映射。
+
diff --git a/frontend/src/content/zh/introduction/_meta.ts b/frontend/src/content/zh/introduction/_meta.ts
new file mode 100644
index 000000000..b6d0ebe60
--- /dev/null
+++ b/frontend/src/content/zh/introduction/_meta.ts
@@ -0,0 +1,15 @@
+import type { MetaRecord } from "nextra";
+
+const meta: MetaRecord = {
+  "why-deerflow": {
+    title: "为什么选择 DeerFlow",
+  },
+  "core-concepts": {
+    title: "核心概念",
+  },
+  "harness-vs-app": {
+    title: "Harness 与应用",
+  },
+};
+
+export default meta;
diff --git a/frontend/src/content/zh/introduction/core-concepts.mdx b/frontend/src/content/zh/introduction/core-concepts.mdx
new file mode 100644
index 000000000..fd85ab875
--- /dev/null
+++ b/frontend/src/content/zh/introduction/core-concepts.mdx
@@ -0,0 +1,93 @@
+---
+title: 核心概念
+description: 在深入了解 DeerFlow 之前，先建立一些贯穿整个系统的核心概念。这些概念解释了 DeerFlow 的优化目标以及其架构设计的原因。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 核心概念
+
+<Callout type="important" emoji="🧠">
+  如果你将 DeerFlow 理解为一个长时序 Agent 的运行时，而不仅仅是聊天界面或工作流图，它将最易于理解。
+</Callout>
+
+在深入了解 DeerFlow 之前，先建立一些贯穿整个系统的核心概念。这些概念解释了 DeerFlow 的优化目标以及其架构设计的原因。
+
+## Harness
+
+在 DeerFlow 中，**Harness** 是为 Agent 提供所需运行环境的运行时层，让 Agent 能够真正完成工作。
+
+框架通常提供抽象和构建块。Harness 更进一步：它打包了一套有主张的运行时能力，让 Agent 无需每次重建相同基础设施，就能在真实环境中进行规划、行动、使用工具、管理文件和处理长时序任务。
+
+在实践中，DeerFlow 的 Harness 包括：
+
+- 工具访问
+- 技能加载
+- 沙箱执行
+- 记忆
+- 子 Agent 协调
+- 上下文管理
+
+这就是为什么 DeerFlow 既不仅是一个模型封装器，也不仅是一个工作流图——它是一个 Agent 运行环境。
+
+## 长时序 Agent
+
+**长时序 Agent** 是一种在一系列动作中持续有效，而不仅产生单一答案的 Agent。
+
+这类 Agent 可能需要：
+
+1. 制定计划
+2. 反复决定下一步
+3. 多次调用工具
+4. 检查和修改文件
+5. 存储中间结果
+6. 最终返回可用的产出物
+
+重点不只是时间长度，而是跨多个步骤的持续协调。
+
+DeerFlow 专为这类工作设计。其架构假设有用的任务通常需要不止一次工具调用和不止一轮推理。
+
+## 技能（Skill）
+
+**技能**是一个面向任务的能力包，教会 Agent 如何完成某一类工作。
+
+技能不仅仅是一个标签。它通常包括结构化指令、工作流程、最佳实践和支撑资源，可以在相关时加载。这使基础 Agent 保持通用性，同时只在需要时添加专业化行为。
+
+在 DeerFlow 中，深度研究是一项技能。数据分析、内容生成、设计工作流和其他任务类型也可以表示为技能。
+
+这是 DeerFlow 心智模型的重要组成部分：运行时保持通用，技能提供专业化。
+
+## 沙箱（Sandbox）
+
+**沙箱**是 Agent 进行文件和命令操作的隔离执行环境。
+
+DeerFlow 不把 Agent 视为纯文本生成器，而是给它一个工作区，让它可以读取文件、写入输出、运行命令并生成产出物。这使系统在编码、分析和多步骤工作流中更加实用。
+
+隔离性很重要，因为执行应该可控且可重现。沙箱让 DeerFlow 支持真实行动，而不仅仅是对话。
+
+## 子 Agent（Subagent）
+
+**子 Agent** 是处理委派子任务的专注执行者。
+
+当一个任务对单个推理线程来说太宽泛时，DeerFlow 可以将工作拆分为较小的单元并分别运行。子 Agent 有助于并行探索、范围隔离执行，以及减轻主 Agent 的负担。
+
+关键思路是隔离。子 Agent 不需要完整的对话历史或父上下文中的每一个细节——它只需要完成其分配工作所需的信息。
+
+## 上下文工程（Context Engineering）
+
+**上下文工程**是控制 Agent 在何时看到、记住和忽略什么内容的实践，以使其在长期任务中保持有效。
+
+长时序任务会给上下文窗口带来压力。如果所有内容永远内联保存，Agent 会变得越来越慢、越来越嘈杂、越来越不可靠。DeerFlow 通过摘要压缩、为子 Agent 创建范围隔离的上下文、以及将文件系统作为外部工作记忆等技术来解决这一问题。
+
+这是 DeerFlow 中最重要的思想之一。好的 Agent 行为不仅仅依靠更强大的模型——还需要在正确的时间给模型提供正确的工作集。
+
+## 记忆（Memory）
+
+**记忆**允许 Agent 在多个会话中保留有用信息。
+
+DeerFlow 的记忆系统存储用户偏好、项目背景和 Agent 从对话中学到的持久性事实。这些信息在后续会话中注入到系统提示中，让 Agent 无需从零开始即可建立在过往上下文之上。
+
+<Cards num={2}>
+  <Cards.Card title="Harness 与应用" href="/docs/introduction/harness-vs-app" />
+  <Cards.Card title="设计理念" href="/docs/harness/design-principles" />
+</Cards>
diff --git a/frontend/src/content/zh/introduction/harness-vs-app.mdx b/frontend/src/content/zh/introduction/harness-vs-app.mdx
new file mode 100644
index 000000000..7333b6ef7
--- /dev/null
+++ b/frontend/src/content/zh/introduction/harness-vs-app.mdx
@@ -0,0 +1,61 @@
+---
+title: Harness 与应用
+description: DeerFlow 有两个紧密相关但服务于不同目的的层次：.
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# Harness 与应用
+
+<Callout type="info" emoji="⚙️">
+  DeerFlow 应用是构建在 DeerFlow Harness 之上的最佳实践 Super Agent 应用，而 DeerFlow Harness 是构建自己 Agent 系统的 Python SDK 和运行时基础。
+</Callout>
+
+DeerFlow 有两个紧密相关但服务于不同目的的层次：
+
+- **DeerFlow Harness** 是运行时基础层。
+- **DeerFlow 应用** 是构建在该基础之上的最佳实践应用。
+
+理解这一区别，能让其余文档更易于阅读。
+
+## Harness 是运行时层
+
+**DeerFlow Harness** 是用于构建和运行长时序 Agent 的可复用系统。
+
+它提供：长时序任务的规划和执行、工具调用和沙箱执行、技能加载和上下文注入、记忆和跨会话持久化、子 Agent 协调、以及完整的配置和扩展系统。
+
+Harness 是 Python 库和运行时引擎。你可以将它导入到自己的应用中，或者直接使用 DeerFlow 应用，后者已经为你完成了所有集成工作。
+
+## 应用是产品层
+
+**DeerFlow 应用**是一个完整的、可部署的产品，它将 Harness 功能封装成可用的体验：
+
+- 基于浏览器的对话工作区（Next.js 前端）
+- FastAPI Gateway，处理 API 操作
+- nginx 反向代理，统一所有服务
+- LangGraph 服务器，运行 DeerFlow Harness
+
+应用是 Harness 所有功能的参考实现。它展示了如何将运行时能力组装成一个对最终用户和运维团队都易于使用的产品。
+
+## 应该从哪里开始
+
+### 我想将 DeerFlow 集成到自己的系统中
+
+使用 **Harness**。将其作为 Python 库导入，使用 `DeerFlowClient` API 发送消息和流式传输响应，并通过 `config.yaml` 配置行为。
+
+→ 从 [Harness 快速上手](/docs/harness/quick-start) 开始
+
+### 我想运行 DeerFlow 作为对话工作区
+
+使用**应用**。运行 `make dev` 或部署 Docker Compose 配置，然后通过浏览器访问。
+
+→ 从 [应用快速上手](/docs/application/quick-start) 开始
+
+### 我想了解架构原理
+
+先阅读**简介**，然后是 Harness 的[设计理念](/docs/harness/design-principles)。
+
+<Cards num={2}>
+  <Cards.Card title="Harness 快速上手" href="/docs/harness/quick-start" />
+  <Cards.Card title="应用快速上手" href="/docs/application/quick-start" />
+</Cards>
diff --git a/frontend/src/content/zh/introduction/why-deerflow.mdx b/frontend/src/content/zh/introduction/why-deerflow.mdx
new file mode 100644
index 000000000..dd59fc2d5
--- /dev/null
+++ b/frontend/src/content/zh/introduction/why-deerflow.mdx
@@ -0,0 +1,53 @@
+---
+title: 为什么选择 DeerFlow
+description: DeerFlow 的诞生是因为现代 Agent 系统需要的不仅仅是一个聊天循环。一个真正有用的 Agent 必须能够进行长时序规划、将任务拆解为子任务、使用工具、操作文件、安全地运行代码，并在复杂任务中保持足够的上下文连贯性。DeerFlow 正是为提供这样的运行时基础而构建的。
+---
+
+import { Callout, Cards } from "nextra/components";
+
+# 为什么选择 DeerFlow
+
+<Callout type="info" emoji="🦌">
+  DeerFlow 起源于深度研究，但逐渐演化为一个通用的长时序 Agent 运行时——支持技能、记忆、工具和协作调度。
+</Callout>
+
+DeerFlow 的诞生是因为现代 Agent 系统需要的不仅仅是一个聊天循环。一个真正有用的 Agent 必须能够进行长时序规划、将任务拆解为子任务、使用工具、操作文件、安全地运行代码，并在复杂任务中保持足够的上下文连贯性。DeerFlow 正是为提供这样的运行时基础而构建的。
+
+## 从深度研究起步
+
+DeerFlow 的第一个版本围绕一个具体目标设计：生成真正有深度的研究产出，而不是轻量级聊天机器人的概要总结。核心思路是让 AI 系统像研究团队一样工作：制定计划、收集来源、交叉验证发现，并输出有实际深度的结构化结果。
+
+这个定位是有效的，但项目很快揭示了更重要的东西。团队不仅将 DeerFlow 用于研究——他们将其应用于数据分析、报告生成、内部自动化、运营工作流，以及其他同样需要多步骤执行的任务。
+
+共同点是清晰的：有价值的部分不仅仅是研究工作流本身，而是其背后的运行时能力。
+
+## 研究是第一个技能，而非整个系统
+
+这种使用方式的转变带来了一个关键结论：深度研究应该被视为更广泛 Agent 运行时中的一项能力，而不是整个产品的定义。
+
+因此，DeerFlow 从一个以单一研究模式为中心的项目演化为一个通用的长时序 Agent Harness。在这个模型中，研究仍然重要，但它成为众多技能中的一项，而非系统的固定形态。
+
+这就是为什么 DeerFlow 被描述为 **Harness**，而不仅仅是框架或应用。
+
+## 为什么 Harness 很重要
+
+Harness 是一个带有主张的 Agent 运行时。它不只是暴露抽象接口。它打包了 Agent 在真实环境中做有用工作所需的基础设施——工具访问、技能加载、沙箱执行、记忆、子 Agent 协调和上下文管理。
+
+这让开发者可以专注于他们试图解决的问题，而不是重复构建相同的基础设施层。
+
+## DeerFlow 解决的问题
+
+DeerFlow 专注于以下几个具体问题：
+
+**长时序执行**：大多数有用的任务不能在单次推理中完成。DeerFlow 的架构假设工具调用、规划和多轮推理是正常情况，而非特例。
+
+**上下文管理**：随着任务变长，上下文窗口压力成为主要挑战。DeerFlow 通过摘要压缩、范围隔离的子 Agent 上下文和文件系统作为外部记忆来主动管理这一问题。
+
+**技能组合**：不同的工作类型需要不同的方法。DeerFlow 让技能成为可组合的能力包，而不是把所有内容硬编码到主 Agent 逻辑中。
+
+**沙箱执行**：真实任务需要文件操作和命令执行。DeerFlow 提供一个隔离的工作区，让 Agent 可以安全地进行实际操作。
+
+<Cards num={2}>
+  <Cards.Card title="核心概念" href="/docs/introduction/core-concepts" />
+  <Cards.Card title="Harness 与应用" href="/docs/introduction/harness-vs-app" />
+</Cards>
diff --git a/frontend/src/content/zh/reference/_meta.ts b/frontend/src/content/zh/reference/_meta.ts
new file mode 100644
index 000000000..5001ebfd8
--- /dev/null
+++ b/frontend/src/content/zh/reference/_meta.ts
@@ -0,0 +1,9 @@
+import type { MetaRecord } from "nextra";
+
+const meta: MetaRecord = {
+  "model-providers": {
+    title: "模型接入",
+  },
+};
+
+export default meta;
diff --git a/frontend/src/content/zh/reference/model-providers/_meta.ts b/frontend/src/content/zh/reference/model-providers/_meta.ts
new file mode 100644
index 000000000..81fd1b56f
--- /dev/null
+++ b/frontend/src/content/zh/reference/model-providers/_meta.ts
@@ -0,0 +1,9 @@
+import type { MetaRecord } from "nextra";
+
+const meta: MetaRecord = {
+  ark: {
+    title: "火山方舟",
+  },
+};
+
+export default meta;
diff --git a/frontend/src/content/zh/reference/model-providers/ark.mdx b/frontend/src/content/zh/reference/model-providers/ark.mdx
new file mode 100644
index 000000000..6f6e4ed12
--- /dev/null
+++ b/frontend/src/content/zh/reference/model-providers/ark.mdx
@@ -0,0 +1,8 @@
+---
+title: 火山方舟
+description: 火山方舟模型接入指南。
+---
+
+# 火山方舟
+
+## Coding Plan
diff --git a/frontend/src/content/zh/reference/model-providers/index.mdx b/frontend/src/content/zh/reference/model-providers/index.mdx
new file mode 100644
index 000000000..be8f9a450
--- /dev/null
+++ b/frontend/src/content/zh/reference/model-providers/index.mdx
@@ -0,0 +1,7 @@
+---
+title: 模型厂商服务接入
+description: 已支持模型厂商服务的接入参考文档。
+asIndexPage: true
+---
+
+# 更多模型厂商服务接入
diff --git a/frontend/src/content/zh/tutorials/_meta.ts b/frontend/src/content/zh/tutorials/_meta.ts
new file mode 100644
index 000000000..f01216bc0
--- /dev/null
+++ b/frontend/src/content/zh/tutorials/_meta.ts
@@ -0,0 +1,21 @@
+import type { MetaRecord } from "nextra";
+
+const meta: MetaRecord = {
+  "first-conversation": {
+    title: "第一次对话",
+  },
+  "create-your-first-harness": {
+    title: "创建你的第一个 Harness",
+  },
+  "use-tools-and-skills": {
+    title: "使用工具和技能",
+  },
+  "work-with-memory": {
+    title: "使用记忆系统",
+  },
+  "deploy-your-own-deerflow": {
+    title: "部署你的 DeerFlow",
+  },
+};
+
+export default meta;
diff --git a/frontend/src/content/zh/tutorials/create-your-first-harness.mdx b/frontend/src/content/zh/tutorials/create-your-first-harness.mdx
new file mode 100644
index 000000000..622fd6553
--- /dev/null
+++ b/frontend/src/content/zh/tutorials/create-your-first-harness.mdx
@@ -0,0 +1,88 @@
+---
+title: 创建你的第一个 Harness
+description: 本教程介绍如何以编程方式使用 DeerFlow Harness Python SDK——直接在你的 Python 代码中导入和使用 DeerFlow，而不是通过 Web 界面。
+---
+
+# 创建你的第一个 Harness
+
+本教程介绍如何以编程方式使用 DeerFlow Harness Python SDK——直接在你的 Python 代码中导入和使用 DeerFlow，而不是通过 Web 界面。
+
+## 前置条件
+
+- Python 3.12+
+- 已安装 `uv`
+- 已克隆 DeerFlow 仓库
+
+## 安装
+
+```bash
+cd deer-flow/backend
+uv sync
+```
+
+## 创建配置
+
+创建一个最小的 `config.yaml`：
+
+```yaml
+config_version: 6
+
+models:
+  - name: gpt-4o
+    use: langchain_openai:ChatOpenAI
+    model: gpt-4o
+    api_key: $OPENAI_API_KEY
+
+sandbox:
+  use: deerflow.sandbox.local:LocalSandboxProvider
+
+tools:
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+```
+
+## 编写代码
+
+创建 `my_agent.py`：
+
+```python
+import asyncio
+import os
+from deerflow.client import DeerFlowClient
+from deerflow.config import load_config
+
+# 设置 API Key
+os.environ["OPENAI_API_KEY"] = "sk-..."
+
+# 加载配置
+load_config()
+
+client = DeerFlowClient()
+
+async def main():
+    async for event in client.astream(
+        thread_id="my-first-thread",
+        message="用 Python 写一个斐波那契数列函数，包含文档字符串",
+        config={
+            "configurable": {
+                "model_name": "gpt-4o",
+            }
+        },
+    ):
+        print(event)
+
+asyncio.run(main())
+```
+
+## 运行
+
+```bash
+cd backend
+uv run python my_agent.py
+```
+
+## 下一步
+
+- [使用工具和技能](/docs/tutorials/use-tools-and-skills)
+- [快速上手](/docs/harness/quick-start)
diff --git a/frontend/src/content/zh/tutorials/deploy-your-own-deerflow.mdx b/frontend/src/content/zh/tutorials/deploy-your-own-deerflow.mdx
new file mode 100644
index 000000000..897be67ca
--- /dev/null
+++ b/frontend/src/content/zh/tutorials/deploy-your-own-deerflow.mdx
@@ -0,0 +1,72 @@
+---
+title: 部署你的 DeerFlow
+description: 本教程引导你将 DeerFlow 部署到生产环境，使用 Docker Compose 进行多用户访问。
+---
+
+# 部署你的 DeerFlow
+
+本教程引导你将 DeerFlow 部署到生产环境，使用 Docker Compose 进行多用户访问。
+
+## 前置条件
+
+- 已安装 Docker 和 Docker Compose
+- 服务器或 VM（Linux 推荐）
+- LLM API Key
+
+## 步骤
+
+### 1. 克隆仓库
+
+```bash
+git clone https://github.com/bytedance/deer-flow.git
+cd deer-flow
+```
+
+### 2. 创建配置文件
+
+```bash
+cp config.example.yaml config.yaml
+```
+
+编辑 `config.yaml` 添加你的模型配置。
+
+### 3. 创建环境变量文件
+
+```bash
+cat > .env << EOF
+OPENAI_API_KEY=sk-your-key-here
+DEER_FLOW_ROOT=$(pwd)
+BETTER_AUTH_SECRET=$(openssl rand -base64 32)
+BETTER_AUTH_URL=https://your-domain.com
+EOF
+```
+
+### 4. 启动服务
+
+```bash
+docker compose -f docker/docker-compose-dev.yaml up -d
+```
+
+### 5. 验证部署
+
+```bash
+# 检查所有服务健康状态
+curl http://localhost:2026/api/models
+
+# 查看服务日志
+docker compose -f docker/docker-compose-dev.yaml logs -f
+```
+
+访问 `http://your-server:2026` 开始使用。
+
+## 生产注意事项
+
+- 为 nginx 配置 HTTPS/TLS 证书
+- 将 `BETTER_AUTH_SECRET` 设置为强随机字符串
+- 配置防火墙只允许必要端口
+- 定期备份 `backend/.deer-flow/` 目录
+
+## 下一步
+
+- [部署指南（完整版）](/docs/application/deployment-guide)
+- [运维与排障](/docs/application/operations-and-troubleshooting)
diff --git a/frontend/src/content/zh/tutorials/first-conversation.mdx b/frontend/src/content/zh/tutorials/first-conversation.mdx
new file mode 100644
index 000000000..1e1a9d387
--- /dev/null
+++ b/frontend/src/content/zh/tutorials/first-conversation.mdx
@@ -0,0 +1,48 @@
+---
+title: 第一次对话
+description: 本教程引导你在 DeerFlow 中完成第一次完整的 Agent 对话，从启动应用到与 Agent 进行实质性任务交互。
+---
+
+# 第一次对话
+
+本教程引导你在 DeerFlow 中完成第一次完整的 Agent 对话，从启动应用到与 Agent 进行实质性任务交互。
+
+## 前置条件
+
+- DeerFlow 应用已运行（参见[快速上手](/docs/application/quick-start)）
+- 至少在 `config.yaml` 中配置了一个模型
+
+## 步骤
+
+### 1. 打开工作区
+
+在浏览器中访问 [http://localhost:2026](http://localhost:2026)，你将看到对话工作区。
+
+### 2. 发送第一条消息
+
+在输入框中输入问题，例如：
+
+```
+研究 2024 年三大最受欢迎的开源大语言模型框架，并对比它们的优缺点。
+```
+
+按回车发送。
+
+### 3. 观察 Agent 的工作过程
+
+你将看到 Agent 进入工作状态：
+- 展开**思考步骤**查看它调用了哪些工具
+- 观察网络搜索结果的流入
+- 等待最终报告生成
+
+### 4. 与结果互动
+
+报告生成后，你可以：
+- 要求对某部分进行详细说明
+- 要求将报告导出为文件（Agent 会使用 `present_files` 工具）
+- 要求基于研究结果创建图表
+
+## 下一步
+
+- [使用工具和技能](/docs/tutorials/use-tools-and-skills)
+- [工作区使用](/docs/application/workspace-usage)
diff --git a/frontend/src/content/zh/tutorials/use-tools-and-skills.mdx b/frontend/src/content/zh/tutorials/use-tools-and-skills.mdx
new file mode 100644
index 000000000..eedc95308
--- /dev/null
+++ b/frontend/src/content/zh/tutorials/use-tools-and-skills.mdx
@@ -0,0 +1,52 @@
+---
+title: 使用工具和技能
+description: 本教程介绍如何在 DeerFlow 中配置和使用工具（Tools）与技能（Skills），让 Agent 能够访问搜索、文件操作和特定领域能力。
+---
+
+# 使用工具和技能
+
+本教程介绍如何在 DeerFlow 中配置和使用工具（Tools）与技能（Skills），让 Agent 能够访问搜索、文件操作和特定领域能力。
+
+## 配置工具
+
+在 `config.yaml` 中添加工具：
+
+```yaml
+tools:
+  # 网络搜索
+  - use: deerflow.community.ddg_search.tools:web_search_tool
+
+  # 网页内容抓取
+  - use: deerflow.community.jina_ai.tools:web_fetch_tool
+
+  # 沙箱文件操作
+  - use: deerflow.sandbox.tools:ls_tool
+  - use: deerflow.sandbox.tools:read_file_tool
+  - use: deerflow.sandbox.tools:write_file_tool
+  - use: deerflow.sandbox.tools:bash_tool
+```
+
+## 启用技能
+
+通过 DeerFlow 应用界面的技能面板启用技能，或直接编辑 `extensions_config.json`。
+
+示例：启用深度研究技能
+
+1. 打开 DeerFlow 应用
+2. 点击侧边栏中的扩展/技能图标
+3. 找到 `deep-research`，切换为启用
+
+## 使用技能进行研究
+
+启用 `deep-research` 技能后，在对话中选择该技能，然后发送研究请求：
+
+```
+对量子计算的最新进展进行深度研究，重点关注实用化应用前景
+```
+
+Agent 将执行多步骤研究工作流，包括搜索、信息汇总和报告生成。
+
+## 下一步
+
+- [使用记忆系统](/docs/tutorials/work-with-memory)
+- [技能文档](/docs/harness/skills)
diff --git a/frontend/src/content/zh/tutorials/work-with-memory.mdx b/frontend/src/content/zh/tutorials/work-with-memory.mdx
new file mode 100644
index 000000000..64ae09640
--- /dev/null
+++ b/frontend/src/content/zh/tutorials/work-with-memory.mdx
@@ -0,0 +1,57 @@
+---
+title: 使用记忆系统
+description: 本教程介绍如何在 DeerFlow 中启用和使用记忆系统，让 Agent 在多次会话中记住关于你的重要信息。
+---
+
+# 使用记忆系统
+
+本教程介绍如何在 DeerFlow 中启用和使用记忆系统，让 Agent 在多次会话中记住关于你的重要信息。
+
+## 启用记忆
+
+在 `config.yaml` 中启用记忆：
+
+```yaml
+memory:
+  enabled: true
+  injection_enabled: true
+  max_injection_tokens: 2000
+  debounce_seconds: 30
+```
+
+## 记忆的工作方式
+
+记忆通过 `MemoryMiddleware` 自动工作：
+
+1. **第一次对话**：告诉 Agent 关于你的偏好、项目或背景。
+2. **自动学习**：Agent 在后台提取并保存重要事实。
+3. **后续对话**：记忆事实自动注入到系统提示中，Agent 无需你重新解释背景。
+
+## 示例
+
+**第一次对话**：
+```
+我是一名 Python 后端开发者，主要使用 FastAPI 和 PostgreSQL。
+我的团队遵循 PEP 8 代码规范，偏好类型注解。
+请记住这些信息，在以后的代码建议中遵循这些规范。
+```
+
+**后续对话**（无需重复背景）：
+```
+帮我写一个用户认证模块
+```
+
+Agent 会自动生成符合 FastAPI 风格、带类型注解的代码。
+
+## 查看记忆
+
+记忆存储在 `backend/.deer-flow/memory.json`，你可以直接查看和编辑：
+
+```bash
+cat backend/.deer-flow/memory.json
+```
+
+## 下一步
+
+- [部署你的 DeerFlow](/docs/tutorials/deploy-your-own-deerflow)
+- [记忆系统文档](/docs/harness/memory)
diff --git a/frontend/src/core/agents/api.ts b/frontend/src/core/agents/api.ts
index 927b5f20b..984ee8832 100644
--- a/frontend/src/core/agents/api.ts
+++ b/frontend/src/core/agents/api.ts
@@ -1,3 +1,4 @@
+import { fetchWithAuth } from "@/core/api/fetcher";
 import { getBackendBaseURL } from "@/core/config";
 
 import type { Agent, CreateAgentRequest, UpdateAgentRequest } from "./types";
@@ -28,7 +29,7 @@ export async function getAgent(name: string): Promise<Agent> {
 }
 
 export async function createAgent(request: CreateAgentRequest): Promise<Agent> {
-  const res = await fetch(`${getBackendBaseURL()}/api/agents`, {
+  const res = await fetchWithAuth(`${getBackendBaseURL()}/api/agents`, {
     method: "POST",
     headers: { "Content-Type": "application/json" },
     body: JSON.stringify(request),
@@ -44,7 +45,7 @@ export async function updateAgent(
   name: string,
   request: UpdateAgentRequest,
 ): Promise<Agent> {
-  const res = await fetch(`${getBackendBaseURL()}/api/agents/${name}`, {
+  const res = await fetchWithAuth(`${getBackendBaseURL()}/api/agents/${name}`, {
     method: "PUT",
     headers: { "Content-Type": "application/json" },
     body: JSON.stringify(request),
@@ -57,7 +58,7 @@ export async function updateAgent(
 }
 
 export async function deleteAgent(name: string): Promise<void> {
-  const res = await fetch(`${getBackendBaseURL()}/api/agents/${name}`, {
+  const res = await fetchWithAuth(`${getBackendBaseURL()}/api/agents/${name}`, {
     method: "DELETE",
   });
   if (!res.ok) throw new Error(`Failed to delete agent: ${res.statusText}`);
diff --git a/frontend/src/core/api/api-client.ts b/frontend/src/core/api/api-client.ts
index b86251513..5e71730e7 100644
--- a/frontend/src/core/api/api-client.ts
+++ b/frontend/src/core/api/api-client.ts
@@ -4,11 +4,37 @@ import { Client as LangGraphClient } from "@langchain/langgraph-sdk/client";
 
 import { getLangGraphBaseURL } from "../config";
 
+import { isStateChangingMethod, readCsrfCookie } from "./fetcher";
 import { sanitizeRunStreamOptions } from "./stream-mode";
 
+/**
+ * SDK ``onRequest`` hook that mints the ``X-CSRF-Token`` header from the
+ * live ``csrf_token`` cookie just before each outbound fetch.
+ *
+ * Reading the cookie per-request (rather than baking it into the SDK's
+ * ``defaultHeaders`` at construction) handles login / logout / password
+ * change cookie rotation transparently. Both the ``/langgraph-compat/*``
+ * SDK path and the direct REST endpoints in ``fetcher.ts:fetchWithAuth``
+ * share :func:`readCsrfCookie` and :const:`STATE_CHANGING_METHODS` so
+ * the contract stays in lockstep.
+ */
+function injectCsrfHeader(_url: URL, init: RequestInit): RequestInit {
+  if (!isStateChangingMethod(init.method ?? "GET")) {
+    return init;
+  }
+  const token = readCsrfCookie();
+  if (!token) return init;
+  const headers = new Headers(init.headers);
+  if (!headers.has("X-CSRF-Token")) {
+    headers.set("X-CSRF-Token", token);
+  }
+  return { ...init, headers };
+}
+
 function createCompatibleClient(isMock?: boolean): LangGraphClient {
   const client = new LangGraphClient({
     apiUrl: getLangGraphBaseURL(isMock),
+    onRequest: injectCsrfHeader,
   });
 
   const originalRunStream = client.runs.stream.bind(client.runs);
diff --git a/frontend/src/core/api/feedback.ts b/frontend/src/core/api/feedback.ts
new file mode 100644
index 000000000..5af3f02c8
--- /dev/null
+++ b/frontend/src/core/api/feedback.ts
@@ -0,0 +1,42 @@
+import { getBackendBaseURL } from "../config";
+
+import { fetchWithAuth } from "./fetcher";
+
+export interface FeedbackData {
+  feedback_id: string;
+  rating: number;
+  comment: string | null;
+}
+
+export async function upsertFeedback(
+  threadId: string,
+  runId: string,
+  rating: number,
+  comment?: string,
+): Promise<FeedbackData> {
+  const res = await fetchWithAuth(
+    `${getBackendBaseURL()}/api/threads/${encodeURIComponent(threadId)}/runs/${encodeURIComponent(runId)}/feedback`,
+    {
+      method: "PUT",
+      headers: { "Content-Type": "application/json" },
+      body: JSON.stringify({ rating, comment: comment ?? null }),
+    },
+  );
+  if (!res.ok) {
+    throw new Error(`Failed to submit feedback: ${res.status}`);
+  }
+  return res.json();
+}
+
+export async function deleteFeedback(
+  threadId: string,
+  runId: string,
+): Promise<void> {
+  const res = await fetchWithAuth(
+    `${getBackendBaseURL()}/api/threads/${encodeURIComponent(threadId)}/runs/${encodeURIComponent(runId)}/feedback`,
+    { method: "DELETE" },
+  );
+  if (!res.ok && res.status !== 404) {
+    throw new Error(`Failed to delete feedback: ${res.status}`);
+  }
+}
diff --git a/frontend/src/core/api/fetcher.ts b/frontend/src/core/api/fetcher.ts
new file mode 100644
index 000000000..d5f30404c
--- /dev/null
+++ b/frontend/src/core/api/fetcher.ts
@@ -0,0 +1,104 @@
+import { buildLoginUrl } from "@/core/auth/types";
+
+/** HTTP methods that the gateway's CSRFMiddleware checks. */
+export type StateChangingMethod = "POST" | "PUT" | "DELETE" | "PATCH";
+
+export const STATE_CHANGING_METHODS: ReadonlySet<StateChangingMethod> = new Set(
+  ["POST", "PUT", "DELETE", "PATCH"],
+);
+
+/** Mirror of the gateway's ``should_check_csrf`` decision. */
+export function isStateChangingMethod(method: string): boolean {
+  return (STATE_CHANGING_METHODS as ReadonlySet<string>).has(
+    method.toUpperCase(),
+  );
+}
+
+const CSRF_COOKIE_PREFIX = "csrf_token=";
+
+/**
+ * Read the ``csrf_token`` cookie set by the gateway at login.
+ *
+ * SSR-safe: returns ``null`` when ``document`` is undefined so the same
+ * helper can be imported from server components without a guard.
+ *
+ * Uses `String.split` instead of a regex to side-step ESLint's
+ * `prefer-regexp-exec` rule and the cookie value's reliable `; `
+ * separator (set by the gateway, not the browser, so format is stable).
+ */
+export function readCsrfCookie(): string | null {
+  if (typeof document === "undefined") return null;
+  for (const pair of document.cookie.split("; ")) {
+    if (pair.startsWith(CSRF_COOKIE_PREFIX)) {
+      return decodeURIComponent(pair.slice(CSRF_COOKIE_PREFIX.length));
+    }
+  }
+  return null;
+}
+
+/**
+ * Fetch with credentials and automatic CSRF protection.
+ *
+ * Two centralized contracts every API call needs:
+ *
+ * 1. ``credentials: "include"`` so the HttpOnly access_token cookie
+ *    accompanies cross-origin SSR-routed requests.
+ * 2. ``X-CSRF-Token`` header on state-changing methods (POST/PUT/
+ *    DELETE/PATCH), echoed from the ``csrf_token`` cookie. The gateway's
+ *    CSRFMiddleware enforces Double Submit Cookie comparison and returns
+ *    403 if the header is missing — silently breaking every call site
+ *    that uses raw ``fetch()`` instead of this wrapper.
+ *
+ * Auto-redirects to ``/login`` on 401. Caller-supplied headers are
+ * preserved; the helper only ADDS the CSRF header when it isn't already
+ * present, so explicit overrides win.
+ */
+export async function fetchWithAuth(
+  input: RequestInfo | string,
+  init?: RequestInit,
+): Promise<Response> {
+  const url = typeof input === "string" ? input : input.url;
+
+  // Inject CSRF for state-changing methods. GET/HEAD/OPTIONS/TRACE skip
+  // it to mirror the gateway's ``should_check_csrf`` logic exactly.
+  let headers = init?.headers;
+  if (isStateChangingMethod(init?.method ?? "GET")) {
+    const token = readCsrfCookie();
+    if (token) {
+      // Fresh Headers instance so we don't mutate caller-supplied objects.
+      const merged = new Headers(headers);
+      if (!merged.has("X-CSRF-Token")) {
+        merged.set("X-CSRF-Token", token);
+      }
+      headers = merged;
+    }
+  }
+
+  const res = await fetch(url, {
+    ...init,
+    headers,
+    credentials: "include",
+  });
+
+  if (res.status === 401) {
+    window.location.href = buildLoginUrl(window.location.pathname);
+    throw new Error("Unauthorized");
+  }
+
+  return res;
+}
+
+/**
+ * Build headers for CSRF-protected requests.
+ *
+ * **Prefer :func:`fetchWithAuth`** for new code — it injects the header
+ * automatically on state-changing methods. This helper exists for legacy
+ * call sites that need to compose headers manually (e.g. inside
+ * `next/server` route handlers that build their own ``Headers`` object).
+ *
+ * Per RFC-001: Double Submit Cookie pattern.
+ */
+export function getCsrfHeaders(): HeadersInit {
+  const token = readCsrfCookie();
+  return token ? { "X-CSRF-Token": token } : {};
+}
diff --git a/frontend/src/core/auth/AuthProvider.tsx b/frontend/src/core/auth/AuthProvider.tsx
new file mode 100644
index 000000000..652cc49b8
--- /dev/null
+++ b/frontend/src/core/auth/AuthProvider.tsx
@@ -0,0 +1,165 @@
+"use client";
+
+import { useRouter, usePathname } from "next/navigation";
+import React, {
+  createContext,
+  useContext,
+  useState,
+  useCallback,
+  useEffect,
+  type ReactNode,
+} from "react";
+
+import { type User, buildLoginUrl } from "./types";
+
+// Re-export for consumers
+export type { User };
+
+/**
+ * Authentication context provided to consuming components
+ */
+interface AuthContextType {
+  user: User | null;
+  isAuthenticated: boolean;
+  isLoading: boolean;
+  logout: () => Promise<void>;
+  refreshUser: () => Promise<void>;
+}
+
+const AuthContext = createContext<AuthContextType | undefined>(undefined);
+
+interface AuthProviderProps {
+  children: ReactNode;
+  initialUser: User | null;
+}
+
+/**
+ * AuthProvider - Unified authentication context for the application
+ *
+ * Per RFC-001:
+ * - Only holds display information (user), never JWT or tokens
+ * - initialUser comes from server-side guard, avoiding client flicker
+ * - Provides logout and refresh capabilities
+ */
+export function AuthProvider({ children, initialUser }: AuthProviderProps) {
+  const [user, setUser] = useState<User | null>(initialUser);
+  const [isLoading, setIsLoading] = useState(false);
+  const router = useRouter();
+  const pathname = usePathname();
+
+  const isAuthenticated = user !== null;
+
+  /**
+   * Fetch current user from FastAPI
+   * Used when initialUser might be stale (e.g., after tab was inactive)
+   */
+  const refreshUser = useCallback(async () => {
+    try {
+      setIsLoading(true);
+      const res = await fetch("/api/v1/auth/me", {
+        credentials: "include",
+      });
+
+      if (res.ok) {
+        const data = await res.json();
+        setUser(data);
+      } else if (res.status === 401) {
+        // Session expired or invalid
+        setUser(null);
+        // Redirect to login if on a protected route
+        if (pathname?.startsWith("/workspace")) {
+          router.push(buildLoginUrl(pathname));
+        }
+      }
+    } catch (err) {
+      console.error("Failed to refresh user:", err);
+      setUser(null);
+    } finally {
+      setIsLoading(false);
+    }
+  }, [pathname, router]);
+
+  /**
+   * Logout - call FastAPI logout endpoint and clear local state
+   * Per RFC-001: Immediately clear local state, don't wait for server confirmation
+   */
+  const logout = useCallback(async () => {
+    // Immediately clear local state to prevent UI flicker
+    setUser(null);
+
+    try {
+      await fetch("/api/v1/auth/logout", {
+        method: "POST",
+        credentials: "include",
+      });
+    } catch (err) {
+      console.error("Logout request failed:", err);
+      // Still redirect even if logout request fails
+    }
+
+    // Redirect to home page
+    router.push("/");
+  }, [router]);
+
+  /**
+   * Handle visibility change - refresh user when tab becomes visible again.
+   * Throttled to at most once per 60 s to avoid spamming the backend on rapid tab switches.
+   */
+  const lastCheckRef = React.useRef(0);
+
+  useEffect(() => {
+    const handleVisibilityChange = () => {
+      if (document.visibilityState !== "visible" || user === null) return;
+      const now = Date.now();
+      if (now - lastCheckRef.current < 60_000) return;
+      lastCheckRef.current = now;
+      void refreshUser();
+    };
+
+    document.addEventListener("visibilitychange", handleVisibilityChange);
+    return () => {
+      document.removeEventListener("visibilitychange", handleVisibilityChange);
+    };
+  }, [user, refreshUser]);
+
+  const value: AuthContextType = {
+    user,
+    isAuthenticated,
+    isLoading,
+    logout,
+    refreshUser,
+  };
+
+  return <AuthContext.Provider value={value}>{children}</AuthContext.Provider>;
+}
+
+/**
+ * Hook to access authentication context
+ * Throws if used outside AuthProvider - this is intentional for proper usage
+ */
+export function useAuth(): AuthContextType {
+  const context = useContext(AuthContext);
+  if (context === undefined) {
+    throw new Error("useAuth must be used within an AuthProvider");
+  }
+  return context;
+}
+
+/**
+ * Hook to require authentication - redirects to login if not authenticated
+ * Useful for client-side checks in addition to server-side guards
+ */
+export function useRequireAuth(): AuthContextType {
+  const auth = useAuth();
+  const router = useRouter();
+  const pathname = usePathname();
+
+  useEffect(() => {
+    // Only redirect if we're sure user is not authenticated (not just loading)
+    if (!auth.isLoading && !auth.isAuthenticated) {
+      router.push(buildLoginUrl(pathname || "/workspace"));
+    }
+  }, [auth.isAuthenticated, auth.isLoading, router, pathname]);
+
+  return auth;
+}
diff --git a/frontend/src/core/auth/gateway-config.ts b/frontend/src/core/auth/gateway-config.ts
new file mode 100644
index 000000000..61c6ae850
--- /dev/null
+++ b/frontend/src/core/auth/gateway-config.ts
@@ -0,0 +1,34 @@
+import { z } from "zod";
+
+const gatewayConfigSchema = z.object({
+  internalGatewayUrl: z.string().url(),
+  trustedOrigins: z.array(z.string()).min(1),
+});
+
+export type GatewayConfig = z.infer<typeof gatewayConfigSchema>;
+
+let _cached: GatewayConfig | null = null;
+
+export function getGatewayConfig(): GatewayConfig {
+  if (_cached) return _cached;
+
+  const isDev = process.env.NODE_ENV === "development";
+
+  const rawUrl = process.env.DEER_FLOW_INTERNAL_GATEWAY_BASE_URL?.trim();
+  const internalGatewayUrl =
+    rawUrl?.replace(/\/+$/, "") ??
+    (isDev ? "http://localhost:8001" : undefined);
+
+  const rawOrigins = process.env.DEER_FLOW_TRUSTED_ORIGINS?.trim();
+  const trustedOrigins = rawOrigins
+    ? rawOrigins
+        .split(",")
+        .map((s) => s.trim())
+        .filter(Boolean)
+    : isDev
+      ? ["http://localhost:3000"]
+      : undefined;
+
+  _cached = gatewayConfigSchema.parse({ internalGatewayUrl, trustedOrigins });
+  return _cached;
+}
diff --git a/frontend/src/core/auth/proxy-policy.ts b/frontend/src/core/auth/proxy-policy.ts
new file mode 100644
index 000000000..9e6f1f424
--- /dev/null
+++ b/frontend/src/core/auth/proxy-policy.ts
@@ -0,0 +1,55 @@
+export interface ProxyPolicy {
+  /** Allowed upstream path prefixes */
+  readonly allowedPaths: readonly string[];
+  /** Request headers to strip before forwarding */
+  readonly strippedRequestHeaders: ReadonlySet<string>;
+  /** Response headers to strip before returning */
+  readonly strippedResponseHeaders: ReadonlySet<string>;
+  /** Credential mode: which cookie to forward */
+  readonly credential: { readonly type: "cookie"; readonly name: string };
+  /** Timeout in ms */
+  readonly timeoutMs: number;
+  /** CSRF: required for non-GET/HEAD */
+  readonly csrf: boolean;
+}
+
+export const LANGGRAPH_COMPAT_POLICY: ProxyPolicy = {
+  allowedPaths: [
+    "threads",
+    "runs",
+    "assistants",
+    "store",
+    "models",
+    "mcp",
+    "skills",
+    "memory",
+  ],
+  strippedRequestHeaders: new Set([
+    "host",
+    "connection",
+    "keep-alive",
+    "transfer-encoding",
+    "te",
+    "trailer",
+    "upgrade",
+    "authorization",
+    "x-api-key",
+    "origin",
+    "referer",
+    "proxy-authorization",
+    "proxy-authenticate",
+  ]),
+  strippedResponseHeaders: new Set([
+    "connection",
+    "keep-alive",
+    "transfer-encoding",
+    "te",
+    "trailer",
+    "upgrade",
+    "content-length",
+    "set-cookie",
+  ]),
+  credential: { type: "cookie", name: "access_token" },
+  timeoutMs: 120_000,
+  csrf: true,
+};
diff --git a/frontend/src/core/auth/server.ts b/frontend/src/core/auth/server.ts
new file mode 100644
index 000000000..d170a4ae0
--- /dev/null
+++ b/frontend/src/core/auth/server.ts
@@ -0,0 +1,84 @@
+import { cookies } from "next/headers";
+
+import { getGatewayConfig } from "./gateway-config";
+import { type AuthResult, userSchema } from "./types";
+
+const SSR_AUTH_TIMEOUT_MS = 5_000;
+
+/**
+ * Fetch the authenticated user from the gateway using the request's cookies.
+ * Returns a tagged AuthResult — callers use exhaustive switch, no try/catch.
+ */
+export async function getServerSideUser(): Promise<AuthResult> {
+  const cookieStore = await cookies();
+  const sessionCookie = cookieStore.get("access_token");
+
+  let internalGatewayUrl: string;
+  try {
+    internalGatewayUrl = getGatewayConfig().internalGatewayUrl;
+  } catch (err) {
+    return { tag: "config_error", message: String(err) };
+  }
+
+  if (!sessionCookie) {
+    // No session — check whether the system has been initialised yet.
+    const setupController = new AbortController();
+    const setupTimeout = setTimeout(
+      () => setupController.abort(),
+      SSR_AUTH_TIMEOUT_MS,
+    );
+    try {
+      const setupRes = await fetch(
+        `${internalGatewayUrl}/api/v1/auth/setup-status`,
+        {
+          cache: "no-store",
+          signal: setupController.signal,
+        },
+      );
+      clearTimeout(setupTimeout);
+      if (setupRes.ok) {
+        const setupData = (await setupRes.json()) as { needs_setup?: boolean };
+        if (setupData.needs_setup) {
+          return { tag: "system_setup_required" };
+        }
+      }
+    } catch {
+      clearTimeout(setupTimeout);
+      // If setup-status is unreachable/times out, fall through to unauthenticated.
+    }
+    return { tag: "unauthenticated" };
+  }
+
+  const controller = new AbortController();
+  const timeout = setTimeout(() => controller.abort(), SSR_AUTH_TIMEOUT_MS);
+
+  try {
+    const res = await fetch(`${internalGatewayUrl}/api/v1/auth/me`, {
+      headers: { Cookie: `access_token=${sessionCookie.value}` },
+      cache: "no-store",
+      signal: controller.signal,
+    });
+    clearTimeout(timeout); // Clear immediately — covers all response branches
+
+    if (res.ok) {
+      const parsed = userSchema.safeParse(await res.json());
+      if (!parsed.success) {
+        console.error("[SSR auth] Malformed /auth/me response:", parsed.error);
+        return { tag: "gateway_unavailable" };
+      }
+      if (parsed.data.needs_setup) {
+        return { tag: "needs_setup", user: parsed.data };
+      }
+      return { tag: "authenticated", user: parsed.data };
+    }
+    if (res.status === 401 || res.status === 403) {
+      return { tag: "unauthenticated" };
+    }
+    console.error(`[SSR auth] /api/v1/auth/me responded ${res.status}`);
+    return { tag: "gateway_unavailable" };
+  } catch (err) {
+    clearTimeout(timeout);
+    console.error("[SSR auth] Failed to reach gateway:", err);
+    return { tag: "gateway_unavailable" };
+  }
+}
diff --git a/frontend/src/core/auth/types.ts b/frontend/src/core/auth/types.ts
new file mode 100644
index 000000000..fc7d7ed00
--- /dev/null
+++ b/frontend/src/core/auth/types.ts
@@ -0,0 +1,94 @@
+import { z } from "zod";
+
+// ── User schema (single source of truth) ──────────────────────────
+
+export const userSchema = z.object({
+  id: z.string(),
+  email: z.string().email(),
+  system_role: z.enum(["admin", "user"]),
+  needs_setup: z.boolean().optional().default(false),
+});
+
+export type User = z.infer<typeof userSchema>;
+
+// ── SSR auth result (tagged union) ────────────────────────────────
+
+export type AuthResult =
+  | { tag: "authenticated"; user: User }
+  | { tag: "needs_setup"; user: User }
+  | { tag: "system_setup_required" }
+  | { tag: "unauthenticated" }
+  | { tag: "gateway_unavailable" }
+  | { tag: "config_error"; message: string };
+
+export function assertNever(x: never): never {
+  throw new Error(`Unexpected auth result: ${JSON.stringify(x)}`);
+}
+
+export function buildLoginUrl(returnPath: string): string {
+  return `/login?next=${encodeURIComponent(returnPath)}`;
+}
+
+// ── Backend error response parsing ────────────────────────────────
+
+const AUTH_ERROR_CODES = [
+  "invalid_credentials",
+  "token_expired",
+  "token_invalid",
+  "user_not_found",
+  "email_already_exists",
+  "provider_not_found",
+  "not_authenticated",
+  "system_already_initialized",
+] as const;
+
+export type AuthErrorCode = (typeof AUTH_ERROR_CODES)[number];
+
+export interface AuthErrorResponse {
+  code: AuthErrorCode;
+  message: string;
+}
+
+const AuthErrorSchema = z.object({
+  code: z.enum(AUTH_ERROR_CODES),
+  message: z.string(),
+});
+
+const ErrorDetailSchema = z.object({
+  msg: z.string(),
+  type: z.enum(["value_error"]),
+  loc: z.array(z.string()),
+});
+
+export function parseAuthError(data: unknown): AuthErrorResponse {
+  // Try top-level {code, message} first
+  const parsed = AuthErrorSchema.safeParse(data);
+  if (parsed.success) return parsed.data;
+
+  // Unwrap FastAPI's {detail: {code, message}} envelope
+  if (typeof data === "object" && data !== null && "detail" in data) {
+    const detail = (data as Record<string, unknown>).detail;
+    const nested = AuthErrorSchema.safeParse(detail);
+    if (nested.success) return nested.data;
+    // Legacy string-detail responses
+    if (typeof detail === "string") {
+      return { code: "invalid_credentials", message: detail };
+    } else if (Array.isArray(detail)) {
+      // Handle list of error details (e.g. from Pydantic validation)
+      const firstDetail = detail[0];
+      if (typeof firstDetail === "object" && firstDetail !== null) {
+        const errorDetail = ErrorDetailSchema.safeParse(firstDetail);
+        if (errorDetail.success) {
+          return { code: "invalid_credentials", message: errorDetail.data.msg };
+        }
+      }
+    } else if (typeof detail === "object" && detail !== null) {
+      const errorDetail = ErrorDetailSchema.safeParse(detail);
+      if (errorDetail.success) {
+        return { code: "invalid_credentials", message: errorDetail.data.msg };
+      }
+    }
+  }
+
+  return { code: "invalid_credentials", message: "Authentication failed" };
+}
diff --git a/frontend/src/core/i18n/locales/en-US.ts b/frontend/src/core/i18n/locales/en-US.ts
index de94e0c98..7de0d78ec 100644
--- a/frontend/src/core/i18n/locales/en-US.ts
+++ b/frontend/src/core/i18n/locales/en-US.ts
@@ -236,6 +236,7 @@ export const enUS: Translations = {
     reportIssue: "Report a issue",
     contactUs: "Contact us",
     about: "About DeerFlow",
+    logout: "Log out",
   },
 
   // Conversation
@@ -324,6 +325,7 @@ export const enUS: Translations = {
     title: "Settings",
     description: "Adjust how DeerFlow looks and behaves for you.",
     sections: {
+      account: "Account",
       appearance: "Appearance",
       memory: "Memory",
       tools: "Tools",
diff --git a/frontend/src/core/i18n/locales/types.ts b/frontend/src/core/i18n/locales/types.ts
index a8d99e4c7..e42c681f2 100644
--- a/frontend/src/core/i18n/locales/types.ts
+++ b/frontend/src/core/i18n/locales/types.ts
@@ -168,6 +168,7 @@ export interface Translations {
     reportIssue: string;
     contactUs: string;
     about: string;
+    logout: string;
   };
 
   // Conversation
@@ -253,6 +254,7 @@ export interface Translations {
     title: string;
     description: string;
     sections: {
+      account: string;
       appearance: string;
       memory: string;
       tools: string;
diff --git a/frontend/src/core/i18n/locales/zh-CN.ts b/frontend/src/core/i18n/locales/zh-CN.ts
index 600cb8f07..b6416c288 100644
--- a/frontend/src/core/i18n/locales/zh-CN.ts
+++ b/frontend/src/core/i18n/locales/zh-CN.ts
@@ -224,6 +224,7 @@ export const zhCN: Translations = {
     reportIssue: "报告问题",
     contactUs: "联系我们",
     about: "关于 DeerFlow",
+    logout: "退出登录",
   },
 
   // Conversation
@@ -309,6 +310,7 @@ export const zhCN: Translations = {
     title: "设置",
     description: "根据你的偏好调整 DeerFlow 的界面和行为。",
     sections: {
+      account: "账号",
       appearance: "外观",
       memory: "记忆",
       tools: "工具",
diff --git a/frontend/src/core/mcp/api.ts b/frontend/src/core/mcp/api.ts
index 003303238..61e681d34 100644
--- a/frontend/src/core/mcp/api.ts
+++ b/frontend/src/core/mcp/api.ts
@@ -1,3 +1,4 @@
+import { fetchWithAuth } from "@/core/api/fetcher";
 import { getBackendBaseURL } from "@/core/config";
 
 import type { MCPConfig } from "./types";
@@ -8,12 +9,15 @@ export async function loadMCPConfig() {
 }
 
 export async function updateMCPConfig(config: MCPConfig) {
-  const response = await fetch(`${getBackendBaseURL()}/api/mcp/config`, {
-    method: "PUT",
-    headers: {
-      "Content-Type": "application/json",
+  const response = await fetchWithAuth(
+    `${getBackendBaseURL()}/api/mcp/config`,
+    {
+      method: "PUT",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(config),
     },
-    body: JSON.stringify(config),
-  });
+  );
   return response.json();
 }
diff --git a/frontend/src/core/memory/api.ts b/frontend/src/core/memory/api.ts
index 5fcf8e4c0..073328808 100644
--- a/frontend/src/core/memory/api.ts
+++ b/frontend/src/core/memory/api.ts
@@ -1,3 +1,4 @@
+import { fetchWithAuth } from "../api/fetcher";
 import { getBackendBaseURL } from "../config";
 
 import type {
@@ -85,14 +86,14 @@ export async function loadMemory(): Promise<UserMemory> {
 }
 
 export async function clearMemory(): Promise<UserMemory> {
-  const response = await fetch(`${getBackendBaseURL()}/api/memory`, {
+  const response = await fetchWithAuth(`${getBackendBaseURL()}/api/memory`, {
     method: "DELETE",
   });
   return readMemoryResponse(response, "Failed to clear memory");
 }
 
 export async function deleteMemoryFact(factId: string): Promise<UserMemory> {
-  const response = await fetch(
+  const response = await fetchWithAuth(
     `${getBackendBaseURL()}/api/memory/facts/${encodeURIComponent(factId)}`,
     {
       method: "DELETE",
@@ -107,26 +108,32 @@ export async function exportMemory(): Promise<UserMemory> {
 }
 
 export async function importMemory(memory: UserMemory): Promise<UserMemory> {
-  const response = await fetch(`${getBackendBaseURL()}/api/memory/import`, {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
+  const response = await fetchWithAuth(
+    `${getBackendBaseURL()}/api/memory/import`,
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(memory),
     },
-    body: JSON.stringify(memory),
-  });
+  );
   return readMemoryResponse(response, "Failed to import memory");
 }
 
 export async function createMemoryFact(
   input: MemoryFactInput,
 ): Promise<UserMemory> {
-  const response = await fetch(`${getBackendBaseURL()}/api/memory/facts`, {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
+  const response = await fetchWithAuth(
+    `${getBackendBaseURL()}/api/memory/facts`,
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(input),
     },
-    body: JSON.stringify(input),
-  });
+  );
   return readMemoryResponse(response, "Failed to create memory fact");
 }
 
@@ -134,7 +141,7 @@ export async function updateMemoryFact(
   factId: string,
   input: MemoryFactPatchInput,
 ): Promise<UserMemory> {
-  const response = await fetch(
+  const response = await fetchWithAuth(
     `${getBackendBaseURL()}/api/memory/facts/${encodeURIComponent(factId)}`,
     {
       method: "PATCH",
diff --git a/frontend/src/core/skills/api.ts b/frontend/src/core/skills/api.ts
index b6a358f03..03a713d92 100644
--- a/frontend/src/core/skills/api.ts
+++ b/frontend/src/core/skills/api.ts
@@ -1,3 +1,4 @@
+import { fetchWithAuth } from "@/core/api/fetcher";
 import { getBackendBaseURL } from "@/core/config";
 
 import type { Skill } from "./type";
@@ -9,7 +10,7 @@ export async function loadSkills() {
 }
 
 export async function enableSkill(skillName: string, enabled: boolean) {
-  const response = await fetch(
+  const response = await fetchWithAuth(
     `${getBackendBaseURL()}/api/skills/${skillName}`,
     {
       method: "PUT",
@@ -38,13 +39,16 @@ export interface InstallSkillResponse {
 export async function installSkill(
   request: InstallSkillRequest,
 ): Promise<InstallSkillResponse> {
-  const response = await fetch(`${getBackendBaseURL()}/api/skills/install`, {
-    method: "POST",
-    headers: {
-      "Content-Type": "application/json",
+  const response = await fetchWithAuth(
+    `${getBackendBaseURL()}/api/skills/install`,
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify(request),
     },
-    body: JSON.stringify(request),
-  });
+  );
 
   if (!response.ok) {
     // Handle HTTP error responses (4xx, 5xx)
diff --git a/frontend/src/core/threads/hooks.ts b/frontend/src/core/threads/hooks.ts
index 9292ac12b..bd26b1f6e 100644
--- a/frontend/src/core/threads/hooks.ts
+++ b/frontend/src/core/threads/hooks.ts
@@ -8,6 +8,8 @@ import { toast } from "sonner";
 import type { PromptInputMessage } from "@/components/ai-elements/prompt-input";
 
 import { getAPIClient } from "../api";
+import type { FeedbackData } from "../api/feedback";
+import { fetchWithAuth } from "../api/fetcher";
 import { getBackendBaseURL } from "../config";
 import { useI18n } from "../i18n/hooks";
 import type { FileInMessage } from "../messages/utils";
@@ -293,6 +295,9 @@ export function useThreadStream({
     onFinish(state) {
       listeners.current.onFinish?.(state.values);
       void queryClient.invalidateQueries({ queryKey: ["threads", "search"] });
+      void queryClient.invalidateQueries({
+        queryKey: ["thread-message-enrichment"],
+      });
     },
   });
 
@@ -317,7 +322,7 @@ export function useThreadStream({
   useEffect(() => {
     if (
       optimisticMessages.length > 0 &&
-      thread.messages.length > prevMsgCountRef.current
+      thread.messages.length > prevMsgCountRef.current + 1
     ) {
       setOptimisticMessages([]);
     }
@@ -604,7 +609,7 @@ export function useDeleteThread() {
     mutationFn: async ({ threadId }: { threadId: string }) => {
       await apiClient.threads.delete(threadId);
 
-      const response = await fetch(
+      const response = await fetchWithAuth(
         `${getBackendBaseURL()}/api/threads/${encodeURIComponent(threadId)}`,
         {
           method: "DELETE",
@@ -677,3 +682,65 @@ export function useRenameThread() {
     },
   });
 }
+
+/** Per-message enrichment data attached by the backend ``/history`` helper. */
+export interface MessageEnrichment {
+  run_id: string;
+  /** ``undefined`` = not feedback-eligible; ``null`` = eligible but unrated. */
+  feedback?: FeedbackData | null;
+}
+
+/**
+ * Fetch ``/history`` once and index feedback + run_id by message id.
+ *
+ * Replaces the old ``useThreadFeedback`` hook which keyed by AI-message
+ * ordinal position — an inherently fragile mapping that broke whenever
+ * ``ai_tool_call`` messages were interleaved with ``ai_message`` messages.
+ * Keying by ``message.id`` is stable regardless of run count, tool-call
+ * chains, or summarization.
+ *
+ * The ``/history`` response is refreshed on every stream completion via
+ * ``invalidateQueries(["thread-message-enrichment"])`` in ``onFinish``.
+ */
+export function useThreadMessageEnrichment(
+  threadId: string | null | undefined,
+) {
+  return useQuery({
+    queryKey: ["thread-message-enrichment", threadId],
+    queryFn: async (): Promise<Map<string, MessageEnrichment>> => {
+      const empty = new Map<string, MessageEnrichment>();
+      if (!threadId) return empty;
+      const res = await fetchWithAuth(
+        `${getBackendBaseURL()}/api/threads/${encodeURIComponent(threadId)}/history`,
+        {
+          method: "POST",
+          headers: { "Content-Type": "application/json" },
+          body: JSON.stringify({ limit: 1 }),
+        },
+      );
+      if (!res.ok) return empty;
+      const entries = (await res.json()) as Array<{
+        values?: {
+          messages?: Array<{
+            id?: string;
+            run_id?: string;
+            feedback?: FeedbackData | null;
+          }>;
+        };
+      }>;
+      const messages = entries[0]?.values?.messages ?? [];
+      const map = new Map<string, MessageEnrichment>();
+      for (const m of messages) {
+        if (!m.id || !m.run_id) continue;
+        const entry: MessageEnrichment = { run_id: m.run_id };
+        // Preserve presence: "feedback" key absent → ineligible; present with
+        // null → eligible but unrated; present with object → rated.
+        if ("feedback" in m) entry.feedback = m.feedback;
+        map.set(m.id, entry);
+      }
+      return map;
+    },
+    enabled: !!threadId,
+    staleTime: 30_000,
+  });
+}
diff --git a/frontend/src/core/uploads/api.ts b/frontend/src/core/uploads/api.ts
index 23d463c2d..a00a259cb 100644
--- a/frontend/src/core/uploads/api.ts
+++ b/frontend/src/core/uploads/api.ts
@@ -2,6 +2,7 @@
  * API functions for file uploads
  */
 
+import { fetchWithAuth } from "../api/fetcher";
 import { getBackendBaseURL } from "../config";
 
 export interface UploadedFileInfo {
@@ -50,7 +51,7 @@ export async function uploadFiles(
     formData.append("files", file);
   });
 
-  const response = await fetch(
+  const response = await fetchWithAuth(
     `${getBackendBaseURL()}/api/threads/${threadId}/uploads`,
     {
       method: "POST",
@@ -91,7 +92,7 @@ export async function deleteUploadedFile(
   threadId: string,
   filename: string,
 ): Promise<{ success: boolean; message: string }> {
-  const response = await fetch(
+  const response = await fetchWithAuth(
     `${getBackendBaseURL()}/api/threads/${threadId}/uploads/${filename}`,
     {
       method: "DELETE",
diff --git a/frontend/src/env.js b/frontend/src/env.js
index f00fa7a6c..ea90cac5d 100644
--- a/frontend/src/env.js
+++ b/frontend/src/env.js
@@ -7,12 +7,6 @@ export const env = createEnv({
    * isn't built with invalid env vars.
    */
   server: {
-    BETTER_AUTH_SECRET:
-      process.env.NODE_ENV === "production"
-        ? z.string()
-        : z.string().optional(),
-    BETTER_AUTH_GITHUB_CLIENT_ID: z.string().optional(),
-    BETTER_AUTH_GITHUB_CLIENT_SECRET: z.string().optional(),
     GITHUB_OAUTH_TOKEN: z.string().optional(),
     NODE_ENV: z
       .enum(["development", "test", "production"])
@@ -35,10 +29,6 @@ export const env = createEnv({
    * middlewares) or client-side so we need to destruct manually.
    */
   runtimeEnv: {
-    BETTER_AUTH_SECRET: process.env.BETTER_AUTH_SECRET,
-    BETTER_AUTH_GITHUB_CLIENT_ID: process.env.BETTER_AUTH_GITHUB_CLIENT_ID,
-    BETTER_AUTH_GITHUB_CLIENT_SECRET:
-      process.env.BETTER_AUTH_GITHUB_CLIENT_SECRET,
     NODE_ENV: process.env.NODE_ENV,
 
     NEXT_PUBLIC_BACKEND_BASE_URL: process.env.NEXT_PUBLIC_BACKEND_BASE_URL,
diff --git a/frontend/src/server/better-auth/client.ts b/frontend/src/server/better-auth/client.ts
deleted file mode 100644
index 493f84993..000000000
--- a/frontend/src/server/better-auth/client.ts
+++ /dev/null
@@ -1,5 +0,0 @@
-import { createAuthClient } from "better-auth/react";
-
-export const authClient = createAuthClient();
-
-export type Session = typeof authClient.$Infer.Session;
diff --git a/frontend/src/server/better-auth/config.ts b/frontend/src/server/better-auth/config.ts
deleted file mode 100644
index abf50faca..000000000
--- a/frontend/src/server/better-auth/config.ts
+++ /dev/null
@@ -1,9 +0,0 @@
-import { betterAuth } from "better-auth";
-
-export const auth = betterAuth({
-  emailAndPassword: {
-    enabled: true,
-  },
-});
-
-export type Session = typeof auth.$Infer.Session;
diff --git a/frontend/src/server/better-auth/index.ts b/frontend/src/server/better-auth/index.ts
deleted file mode 100644
index d705e873e..000000000
--- a/frontend/src/server/better-auth/index.ts
+++ /dev/null
@@ -1 +0,0 @@
-export { auth } from "./config";
diff --git a/frontend/src/server/better-auth/server.ts b/frontend/src/server/better-auth/server.ts
deleted file mode 100644
index 064cd349c..000000000
--- a/frontend/src/server/better-auth/server.ts
+++ /dev/null
@@ -1,8 +0,0 @@
-import { headers } from "next/headers";
-import { cache } from "react";
-
-import { auth } from ".";
-
-export const getSession = cache(async () =>
-  auth.api.getSession({ headers: await headers() }),
-);