From a36186cf548d0ba3af971ef873a93aa4cce01445 Mon Sep 17 00:00:00 2001 From: rayhpeng Date: Sun, 12 Apr 2026 15:32:02 +0800 Subject: [PATCH] docs: update CLAUDE.md and config docs for per-user isolation --- backend/CLAUDE.md | 25 +++++++++++++------ .../harness/deerflow/config/memory_config.py | 5 ++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/backend/CLAUDE.md b/backend/CLAUDE.md index 88295b9ff..563034c2f 100644 --- a/backend/CLAUDE.md +++ b/backend/CLAUDE.md @@ -158,7 +158,7 @@ from deerflow.config import get_app_config Middlewares execute in strict order in `packages/harness/deerflow/agents/lead_agent/agent.py`: -1. **ThreadDataMiddleware** - Creates per-thread directories (`backend/.deer-flow/threads/{thread_id}/user-data/{workspace,uploads,outputs}`); Web UI thread deletion now follows LangGraph thread removal with Gateway cleanup of the local `.deer-flow/threads/{thread_id}` directory +1. **ThreadDataMiddleware** - Creates per-thread directories under the user's isolation scope (`backend/.deer-flow/users/{user_id}/threads/{thread_id}/user-data/{workspace,uploads,outputs}`); resolves `user_id` via `get_effective_user_id()` (falls back to `"default"` in no-auth mode); Web UI thread deletion now follows LangGraph thread removal with Gateway cleanup of the local thread directory 2. **UploadsMiddleware** - Tracks and injects newly uploaded files into conversation 3. **SandboxMiddleware** - Acquires sandbox, stores `sandbox_id` in state 4. **DanglingToolCallMiddleware** - Injects placeholder ToolMessages for AIMessage tool_calls that lack responses (e.g., due to user interruption) @@ -229,7 +229,7 @@ Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → **Virtual Path System**: - Agent sees: `/mnt/user-data/{workspace,uploads,outputs}`, `/mnt/skills` -- Physical: `backend/.deer-flow/threads/{thread_id}/user-data/...`, `deer-flow/skills/` +- Physical: `backend/.deer-flow/users/{user_id}/threads/{thread_id}/user-data/...`, `deer-flow/skills/` - Translation: `replace_virtual_path()` / `replace_virtual_paths_in_command()` - Detection: `is_local_sandbox()` checks `sandbox_id == "local"` @@ -269,7 +269,7 @@ Proxied through nginx: `/api/langgraph/*` → LangGraph, all other `/api/*` → - `invoke_acp_agent` - Invokes external ACP-compatible agents from `config.yaml` - ACP launchers must be real ACP adapters. The standard `codex` CLI is not ACP-compatible by itself; configure a wrapper such as `npx -y @zed-industries/codex-acp` or an installed `codex-acp` binary - Missing ACP executables now return an actionable error message instead of a raw `[Errno 2]` -- Each ACP agent uses a per-thread workspace at `{base_dir}/threads/{thread_id}/acp-workspace/`. The workspace is accessible to the lead agent via the virtual path `/mnt/acp-workspace/` (read-only). In docker sandbox mode, the directory is volume-mounted into the container at `/mnt/acp-workspace` (read-only); in local sandbox mode, path translation is handled by `tools.py` +- Each ACP agent uses a per-thread workspace at `{base_dir}/users/{user_id}/threads/{thread_id}/acp-workspace/`. The workspace is accessible to the lead agent via the virtual path `/mnt/acp-workspace/` (read-only). In docker sandbox mode, the directory is volume-mounted into the container at `/mnt/acp-workspace` (read-only); in local sandbox mode, path translation is handled by `tools.py` - `image_search/` - Image search via DuckDuckGo ### MCP System (`packages/harness/deerflow/mcp/`) @@ -338,18 +338,27 @@ Bridges external messaging platforms (Feishu, Slack, Telegram) to the DeerFlow a **Components**: - `updater.py` - LLM-based memory updates with fact extraction, whitespace-normalized fact deduplication (trims leading/trailing whitespace before comparing), and atomic file I/O -- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time) +- `queue.py` - Debounced update queue (per-thread deduplication, configurable wait time); captures `user_id` at enqueue time so it survives the `threading.Timer` boundary - `prompt.py` - Prompt templates for memory updates +- `storage.py` - File-based storage with per-user isolation; cache keyed by `(user_id, agent_name)` tuple -**Data Structure** (stored in `backend/.deer-flow/memory.json`): +**Per-User Isolation**: +- Memory is stored per-user at `{base_dir}/users/{user_id}/memory.json` +- Per-agent per-user memory at `{base_dir}/users/{user_id}/agents/{agent_name}/memory.json` +- `user_id` is resolved via `get_effective_user_id()` from `deerflow.runtime.user_context` +- In no-auth mode, `user_id` defaults to `"default"` (constant `DEFAULT_USER_ID`) +- Absolute `storage_path` in config opts out of per-user isolation +- **Migration**: Run `PYTHONPATH=. python scripts/migrate_user_isolation.py` to move legacy `memory.json` and `threads/` into per-user layout; supports `--dry-run` + +**Data Structure** (stored in `{base_dir}/users/{user_id}/memory.json`): - **User Context**: `workContext`, `personalContext`, `topOfMind` (1-3 sentence summaries) - **History**: `recentMonths`, `earlierContext`, `longTermBackground` - **Facts**: Discrete facts with `id`, `content`, `category` (preference/knowledge/context/behavior/goal), `confidence` (0-1), `createdAt`, `source` **Workflow**: -1. `MemoryMiddleware` filters messages (user inputs + final AI responses) and queues conversation +1. `MemoryMiddleware` filters messages (user inputs + final AI responses), captures `user_id` via `get_effective_user_id()`, and queues conversation with the captured `user_id` 2. Queue debounces (30s default), batches updates, deduplicates per-thread -3. Background thread invokes LLM to extract context updates and facts +3. Background thread invokes LLM to extract context updates and facts, using the stored `user_id` (not the contextvar, which is unavailable on timer threads) 4. Applies updates atomically (temp file + rename) with cache invalidation, skipping duplicate fact content before append 5. Next interaction injects top 15 facts + context into `` tags in system prompt @@ -357,7 +366,7 @@ Focused regression coverage for the updater lives in `backend/tests/test_memory_ **Configuration** (`config.yaml` → `memory`): - `enabled` / `injection_enabled` - Master switches -- `storage_path` - Path to memory.json +- `storage_path` - Path to memory.json (absolute path opts out of per-user isolation) - `debounce_seconds` - Wait time before processing (default: 30) - `model_name` - LLM for updates (null = default model) - `max_facts` / `fact_confidence_threshold` - Fact storage limits (100 / 0.7) diff --git a/backend/packages/harness/deerflow/config/memory_config.py b/backend/packages/harness/deerflow/config/memory_config.py index 8565aa216..f9153262f 100644 --- a/backend/packages/harness/deerflow/config/memory_config.py +++ b/backend/packages/harness/deerflow/config/memory_config.py @@ -14,8 +14,9 @@ class MemoryConfig(BaseModel): default="", description=( "Path to store memory data. " - "If empty, defaults to `{base_dir}/memory.json` (see Paths.memory_file). " - "Absolute paths are used as-is. " + "If empty, defaults to per-user memory at `{base_dir}/users/{user_id}/memory.json`. " + "Absolute paths are used as-is and opt out of per-user isolation " + "(all users share the same file). " "Relative paths are resolved against `Paths.base_dir` " "(not the backend working directory). " "Note: if you previously set this to `.deer-flow/memory.json`, "