From 4df595b033937d1f2e8f27da420a2b70b6b31736 Mon Sep 17 00:00:00 2001 From: greatmengqi Date: Thu, 16 Apr 2026 11:15:36 +0800 Subject: [PATCH] refactor(config): harden AppConfig lifecycle based on review feedback - set_override now returns a Token; add reset_override() for proper cleanup - DeerFlowContext.app_config typed as AppConfig (via TYPE_CHECKING) instead of Any - resolve_context now warns on empty thread_id to surface silent fallback - Client uses set_override() in addition to init() so multiple clients with different configs don't clobber each other at the process-global level - current() auto-load now warns instead of debug-logs, since implicit file-load at arbitrary call sites is easy to miss - Document that _global needs no lock (atomic pointer swap under GIL) --- backend/packages/harness/deerflow/client.py | 17 +++++++++-- .../harness/deerflow/config/app_config.py | 30 +++++++++++++++---- .../deerflow/config/deer_flow_context.py | 21 ++++++++++--- 3 files changed, 56 insertions(+), 12 deletions(-) diff --git a/backend/packages/harness/deerflow/client.py b/backend/packages/harness/deerflow/client.py index ca3e0c572..a2f1b7cfe 100644 --- a/backend/packages/harness/deerflow/client.py +++ b/backend/packages/harness/deerflow/client.py @@ -143,8 +143,12 @@ class DeerFlowClient: middlewares: Optional list of custom middlewares to inject into the agent. """ if config_path is not None: - AppConfig.init(AppConfig.from_file(config_path)) + config = AppConfig.from_file(config_path) + AppConfig.init(config) self._app_config = AppConfig.current() + # Scope this client's config to the current context so it doesn't + # leak into unrelated async tasks when multiple clients coexist. + self._config_token = AppConfig.set_override(self._app_config) if agent_name is not None and not AGENT_NAME_PATTERN.match(agent_name): raise ValueError(f"Invalid agent name '{agent_name}'. Must match pattern: {AGENT_NAME_PATTERN.pattern}") @@ -172,6 +176,13 @@ class DeerFlowClient: self._agent = None self._agent_config_key = None + def _reload_config(self) -> None: + """Reload config from file, update both process-global and context override.""" + config = AppConfig.from_file() + AppConfig.init(config) + self._app_config = config + self._config_token = AppConfig.set_override(config) + # ------------------------------------------------------------------ # Internal helpers # ------------------------------------------------------------------ @@ -850,7 +861,7 @@ class DeerFlowClient: self._agent = None self._agent_config_key = None - AppConfig.init(AppConfig.from_file()) + self._reload_config() reloaded = AppConfig.current().extensions return {"mcp_servers": {name: server.model_dump() for name, server in reloaded.mcp_servers.items()}} @@ -917,7 +928,7 @@ class DeerFlowClient: self._agent = None self._agent_config_key = None - AppConfig.init(AppConfig.from_file()) + self._reload_config() updated = next((s for s in load_skills(enabled_only=False) if s.name == name), None) if updated is None: diff --git a/backend/packages/harness/deerflow/config/app_config.py b/backend/packages/harness/deerflow/config/app_config.py index ea106b12a..f94f4f94e 100644 --- a/backend/packages/harness/deerflow/config/app_config.py +++ b/backend/packages/harness/deerflow/config/app_config.py @@ -2,7 +2,7 @@ from __future__ import annotations import logging import os -from contextvars import ContextVar +from contextvars import ContextVar, Token from pathlib import Path from typing import Any, ClassVar, Self @@ -223,6 +223,10 @@ class AppConfig(BaseModel): return next((group for group in self.tool_groups if group.name == name), None) # -- Lifecycle (process-global + per-context override) -- + # + # _global is a plain class variable. Assignment is atomic under the GIL + # (single pointer swap), so no lock is needed for the current read/write + # pattern. If this ever changes to read-modify-write, add a threading.Lock. _global: ClassVar[AppConfig | None] = None _override: ClassVar[ContextVar[AppConfig]] = ContextVar("deerflow_app_config_override") @@ -233,15 +237,28 @@ class AppConfig(BaseModel): cls._global = config @classmethod - def set_override(cls, config: AppConfig) -> None: - """Set a per-context override (e.g., for a single invocation with custom config).""" - cls._override.set(config) + def set_override(cls, config: AppConfig) -> Token[AppConfig]: + """Set a per-context override. Returns a token for reset_override(). + + Use this in DeerFlowClient or test fixtures to scope a config to the + current async context without polluting the process-global value. + """ + return cls._override.set(config) + + @classmethod + def reset_override(cls, token: Token[AppConfig]) -> None: + """Restore the override to its previous value.""" + cls._override.reset(token) @classmethod def current(cls) -> AppConfig: """Get the current AppConfig. Priority: per-context override > process-global > auto-load from file. + + The auto-load fallback exists for backward compatibility. Prefer calling + ``AppConfig.init()`` explicitly at process startup so that config errors + surface early rather than at an arbitrary first-use call site. """ try: return cls._override.get() @@ -249,7 +266,10 @@ class AppConfig(BaseModel): pass if cls._global is not None: return cls._global - logger.debug("AppConfig not initialized, auto-loading from file") + logger.warning( + "AppConfig.current() called before init(); auto-loading from file. " + "Call AppConfig.init() at process startup to surface config errors early." + ) config = cls.from_file() cls._global = config return config diff --git a/backend/packages/harness/deerflow/config/deer_flow_context.py b/backend/packages/harness/deerflow/config/deer_flow_context.py index 591aeaee4..2e7822f51 100644 --- a/backend/packages/harness/deerflow/config/deer_flow_context.py +++ b/backend/packages/harness/deerflow/config/deer_flow_context.py @@ -6,8 +6,14 @@ via Runtime[DeerFlowContext] parameters, through resolve_context(). from __future__ import annotations +import logging from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from deerflow.config.app_config import AppConfig + +logger = logging.getLogger(__name__) @dataclass(frozen=True) @@ -18,7 +24,7 @@ class DeerFlowContext: Mutable runtime state (e.g. sandbox_id) flows through ThreadState, not here. """ - app_config: Any # AppConfig — typed as Any to avoid circular import at module level + app_config: AppConfig thread_id: str agent_name: str | None = None @@ -37,9 +43,12 @@ def resolve_context(runtime: Any) -> DeerFlowContext: # Try dict context first (legacy path, tests), then configurable if isinstance(ctx, dict): + thread_id = ctx.get("thread_id", "") + if not thread_id: + logger.warning("resolve_context: dict context has empty thread_id — may cause incorrect path resolution") return DeerFlowContext( app_config=AppConfig.current(), - thread_id=ctx.get("thread_id", ""), + thread_id=thread_id, agent_name=ctx.get("agent_name"), ) @@ -52,8 +61,12 @@ def resolve_context(runtime: Any) -> DeerFlowContext: # Outside runnable context (e.g. unit tests) cfg = {} + thread_id = cfg.get("thread_id", "") + if not thread_id: + logger.warning("resolve_context: falling back to empty thread_id — no DeerFlowContext or configurable found") + return DeerFlowContext( app_config=AppConfig.current(), - thread_id=cfg.get("thread_id", ""), + thread_id=thread_id, agent_name=cfg.get("agent_name"), )