mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-28 12:48:40 +00:00
Squashes 25 PR commits onto current main. AppConfig becomes a pure value object with no ambient lookup. Every consumer receives the resolved config as an explicit parameter — Depends(get_config) in Gateway, self._app_config in DeerFlowClient, runtime.context.app_config in agent runs, AppConfig.from_file() at the LangGraph Server registration boundary. Phase 1 — frozen data + typed context - All config models (AppConfig, MemoryConfig, DatabaseConfig, …) become frozen=True; no sub-module globals. - AppConfig.from_file() is pure (no side-effect singleton loaders). - Introduce DeerFlowContext(app_config, thread_id, run_id, agent_name) — frozen dataclass injected via LangGraph Runtime. - Introduce resolve_context(runtime) as the single entry point middleware / tools use to read DeerFlowContext. Phase 2 — pure explicit parameter passing - Gateway: app.state.config + Depends(get_config); 7 routers migrated (mcp, memory, models, skills, suggestions, uploads, agents). - DeerFlowClient: __init__(config=...) captures config locally. - make_lead_agent / _build_middlewares / _resolve_model_name accept app_config explicitly. - RunContext.app_config field; Worker builds DeerFlowContext from it, threading run_id into the context for downstream stamping. - Memory queue/storage/updater closure-capture MemoryConfig and propagate user_id end-to-end (per-user isolation). - Sandbox/skills/community/factories/tools thread app_config. - resolve_context() rejects non-typed runtime.context. - Test suite migrated off AppConfig.current() monkey-patches. - AppConfig.current() classmethod deleted. Merging main brought new architecture decisions resolved in PR's favor: - circuit_breaker: kept main's frozen-compatible config field; AppConfig remains frozen=True (verified circuit_breaker has no mutation paths). - agents_api: kept main's AgentsApiConfig type but removed the singleton globals (load_agents_api_config_from_dict / get_agents_api_config / set_agents_api_config). 8 routes in agents.py now read via Depends(get_config). - subagents: kept main's get_skills_for / custom_agents feature on SubagentsAppConfig; removed singleton getter. registry.py now reads app_config.subagents directly. - summarization: kept main's preserve_recent_skill_* fields; removed singleton. - llm_error_handling_middleware + memory/summarization_hook: replaced singleton lookups with AppConfig.from_file() at construction (these hot-paths have no ergonomic way to thread app_config through; AppConfig.from_file is a pure load). - worker.py + thread_data_middleware.py: DeerFlowContext.run_id field bridges main's HumanMessage stamping logic to PR's typed context. Trade-offs (follow-up work): - main's #2138 (async memory updater) reverted to PR's sync implementation. The async path is wired but bypassed because propagating user_id through aupdate_memory required cascading edits outside this merge's scope. - tests/test_subagent_skills_config.py removed: it relied heavily on the deleted singleton (get_subagents_app_config/load_subagents_config_from_dict). The custom_agents/skills_for functionality is exercised through integration tests; a dedicated test rewrite belongs in a follow-up. Verification: backend test suite — 2560 passed, 4 skipped, 84 failures. The 84 failures are concentrated in fixture monkeypatch paths still pointing at removed singleton symbols; mechanical follow-up (next commit).
161 lines
5.2 KiB
Python
161 lines
5.2 KiB
Python
"""One-time migration: move legacy thread dirs and memory into per-user layout.
|
|
|
|
Usage:
|
|
PYTHONPATH=. python scripts/migrate_user_isolation.py [--dry-run]
|
|
|
|
The script is idempotent — re-running it after a successful migration is a no-op.
|
|
"""
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import shutil
|
|
from pathlib import Path
|
|
|
|
from deerflow.config.paths import Paths, get_paths
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def migrate_thread_dirs(
|
|
paths: Paths,
|
|
thread_owner_map: dict[str, str],
|
|
*,
|
|
dry_run: bool = False,
|
|
) -> list[dict]:
|
|
"""Move legacy thread directories into per-user layout.
|
|
|
|
Args:
|
|
paths: Paths instance.
|
|
thread_owner_map: Mapping of thread_id -> user_id from threads_meta table.
|
|
dry_run: If True, only log what would happen.
|
|
|
|
Returns:
|
|
List of migration report entries.
|
|
"""
|
|
report: list[dict] = []
|
|
legacy_threads = paths.base_dir / "threads"
|
|
if not legacy_threads.exists():
|
|
logger.info("No legacy threads directory found — nothing to migrate.")
|
|
return report
|
|
|
|
for thread_dir in sorted(legacy_threads.iterdir()):
|
|
if not thread_dir.is_dir():
|
|
continue
|
|
thread_id = thread_dir.name
|
|
user_id = thread_owner_map.get(thread_id, "default")
|
|
dest = paths.base_dir / "users" / user_id / "threads" / thread_id
|
|
|
|
entry = {"thread_id": thread_id, "user_id": user_id, "action": ""}
|
|
|
|
if dest.exists():
|
|
conflicts_dir = paths.base_dir / "migration-conflicts" / thread_id
|
|
entry["action"] = f"conflict -> {conflicts_dir}"
|
|
if not dry_run:
|
|
conflicts_dir.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(thread_dir), str(conflicts_dir))
|
|
logger.warning("Conflict for thread %s: moved to %s", thread_id, conflicts_dir)
|
|
else:
|
|
entry["action"] = f"moved -> {dest}"
|
|
if not dry_run:
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(thread_dir), str(dest))
|
|
logger.info("Migrated thread %s -> user %s", thread_id, user_id)
|
|
|
|
report.append(entry)
|
|
|
|
# Clean up empty legacy threads dir
|
|
if not dry_run and legacy_threads.exists() and not any(legacy_threads.iterdir()):
|
|
legacy_threads.rmdir()
|
|
|
|
return report
|
|
|
|
|
|
def migrate_memory(
|
|
paths: Paths,
|
|
user_id: str = "default",
|
|
*,
|
|
dry_run: bool = False,
|
|
) -> None:
|
|
"""Move legacy global memory.json into per-user layout.
|
|
|
|
Args:
|
|
paths: Paths instance.
|
|
user_id: Target user to receive the legacy memory.
|
|
dry_run: If True, only log.
|
|
"""
|
|
legacy_mem = paths.base_dir / "memory.json"
|
|
if not legacy_mem.exists():
|
|
logger.info("No legacy memory.json found — nothing to migrate.")
|
|
return
|
|
|
|
dest = paths.user_memory_file(user_id)
|
|
if dest.exists():
|
|
legacy_backup = paths.base_dir / "memory.legacy.json"
|
|
logger.warning("Destination %s exists; renaming legacy to %s", dest, legacy_backup)
|
|
if not dry_run:
|
|
legacy_mem.rename(legacy_backup)
|
|
return
|
|
|
|
logger.info("Migrating memory.json -> %s", dest)
|
|
if not dry_run:
|
|
dest.parent.mkdir(parents=True, exist_ok=True)
|
|
shutil.move(str(legacy_mem), str(dest))
|
|
|
|
|
|
def _build_owner_map_from_db(paths: Paths) -> dict[str, str]:
|
|
"""Query threads_meta table for thread_id -> user_id mapping.
|
|
|
|
Uses raw sqlite3 to avoid async dependencies.
|
|
"""
|
|
import sqlite3
|
|
|
|
db_path = paths.base_dir / "deer-flow.db"
|
|
if not db_path.exists():
|
|
logger.info("No database found at %s — using empty owner map.", db_path)
|
|
return {}
|
|
|
|
conn = sqlite3.connect(str(db_path))
|
|
try:
|
|
cursor = conn.execute("SELECT thread_id, user_id FROM threads_meta WHERE user_id IS NOT NULL")
|
|
return {row[0]: row[1] for row in cursor.fetchall()}
|
|
except sqlite3.OperationalError as e:
|
|
logger.warning("Failed to query threads_meta: %s", e)
|
|
return {}
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def main() -> None:
|
|
parser = argparse.ArgumentParser(description="Migrate DeerFlow data to per-user layout")
|
|
parser.add_argument("--dry-run", action="store_true", help="Log actions without making changes")
|
|
args = parser.parse_args()
|
|
|
|
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
|
|
|
paths = get_paths()
|
|
logger.info("Base directory: %s", paths.base_dir)
|
|
logger.info("Dry run: %s", args.dry_run)
|
|
|
|
owner_map = _build_owner_map_from_db(paths)
|
|
logger.info("Found %d thread ownership records in DB", len(owner_map))
|
|
|
|
report = migrate_thread_dirs(paths, owner_map, dry_run=args.dry_run)
|
|
migrate_memory(paths, user_id="default", dry_run=args.dry_run)
|
|
|
|
if report:
|
|
logger.info("Migration report:")
|
|
for entry in report:
|
|
logger.info(" thread=%s user=%s action=%s", entry["thread_id"], entry["user_id"], entry["action"])
|
|
else:
|
|
logger.info("No threads to migrate.")
|
|
|
|
unowned = [e for e in report if e["user_id"] == "default"]
|
|
if unowned:
|
|
logger.warning("%d thread(s) had no owner and were assigned to 'default':", len(unowned))
|
|
for e in unowned:
|
|
logger.warning(" %s", e["thread_id"])
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|