From 1ad1420e315d16f4d94a8091fcea673fcf4b86b5 Mon Sep 17 00:00:00 2001 From: Xun Date: Fri, 1 May 2026 13:23:26 +0800 Subject: [PATCH] refactor(skills): Unified skill storage capability (#2613) --- backend/app/gateway/routers/skills.py | 118 +++--- backend/docs/HARNESS_APP_SPLIT.md | 343 ------------------ .../deerflow/agents/lead_agent/prompt.py | 12 +- backend/packages/harness/deerflow/client.py | 18 +- .../harness/deerflow/config/skills_config.py | 10 +- .../harness/deerflow/skills/__init__.py | 13 +- .../harness/deerflow/skills/installer.py | 86 ----- .../harness/deerflow/skills/loader.py | 105 ------ .../harness/deerflow/skills/manager.py | 161 -------- .../harness/deerflow/skills/parser.py | 8 +- .../deerflow/skills/security_scanner.py | 3 +- .../deerflow/skills/storage/__init__.py | 83 +++++ .../skills/storage/local_skill_storage.py | 198 ++++++++++ .../deerflow/skills/storage/skill_storage.py | 254 +++++++++++++ .../packages/harness/deerflow/skills/types.py | 16 +- .../harness/deerflow/skills/validation.py | 6 +- .../harness/deerflow/subagents/executor.py | 4 +- .../deerflow/tools/skill_manage_tool.py | 92 +++-- backend/tests/conftest.py | 15 + backend/tests/test_client.py | 88 +++-- backend/tests/test_client_e2e.py | 68 ++-- backend/tests/test_lead_agent_prompt.py | 4 +- backend/tests/test_lead_agent_skills.py | 4 +- .../test_local_sandbox_provider_mounts.py | 8 +- .../tests/test_local_skill_storage_write.py | 162 +++++++++ backend/tests/test_skill_manage_tool.py | 15 +- backend/tests/test_skills_custom_router.py | 60 +-- backend/tests/test_skills_installer.py | 32 +- backend/tests/test_skills_loader.py | 13 +- 29 files changed, 1031 insertions(+), 968 deletions(-) delete mode 100644 backend/docs/HARNESS_APP_SPLIT.md delete mode 100644 backend/packages/harness/deerflow/skills/loader.py delete mode 100644 backend/packages/harness/deerflow/skills/manager.py create mode 100644 backend/packages/harness/deerflow/skills/storage/__init__.py create mode 100644 backend/packages/harness/deerflow/skills/storage/local_skill_storage.py create mode 100644 backend/packages/harness/deerflow/skills/storage/skill_storage.py create mode 100644 backend/tests/test_local_skill_storage_write.py diff --git a/backend/app/gateway/routers/skills.py b/backend/app/gateway/routers/skills.py index 19a85c4cf..78462ae09 100644 --- a/backend/app/gateway/routers/skills.py +++ b/backend/app/gateway/routers/skills.py @@ -1,7 +1,5 @@ -import errno import json import logging -import shutil from pathlib import Path from fastapi import APIRouter, Depends, HTTPException @@ -12,21 +10,11 @@ from app.gateway.path_utils import resolve_thread_virtual_path from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async from deerflow.config.app_config import AppConfig from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config -from deerflow.skills import Skill, load_skills -from deerflow.skills.installer import SkillAlreadyExistsError, ainstall_skill_from_archive -from deerflow.skills.manager import ( - append_history, - atomic_write, - custom_skill_exists, - ensure_custom_skill_is_editable, - get_custom_skill_dir, - get_custom_skill_file, - get_skill_history_file, - read_custom_skill_content, - read_history, - validate_skill_markdown_content, -) +from deerflow.skills import Skill +from deerflow.skills.installer import SkillAlreadyExistsError from deerflow.skills.security_scanner import scan_skill_content +from deerflow.skills.storage import get_or_new_skill_storage +from deerflow.skills.types import SKILL_MD_FILE, SkillCategory logger = logging.getLogger(__name__) @@ -39,7 +27,7 @@ class SkillResponse(BaseModel): name: str = Field(..., description="Name of the skill") description: str = Field(..., description="Description of what the skill does") license: str | None = Field(None, description="License information") - category: str = Field(..., description="Category of the skill (public or custom)") + category: SkillCategory = Field(..., description="Category of the skill (public or custom)") enabled: bool = Field(default=True, description="Whether this skill is enabled") @@ -105,7 +93,7 @@ def _skill_to_response(skill: Skill) -> SkillResponse: ) async def list_skills(config: AppConfig = Depends(get_config)) -> SkillsListResponse: try: - skills = load_skills(enabled_only=False, app_config=config) + skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False) return SkillsListResponse(skills=[_skill_to_response(skill) for skill in skills]) except Exception as e: logger.error(f"Failed to load skills: {e}", exc_info=True) @@ -118,10 +106,10 @@ async def list_skills(config: AppConfig = Depends(get_config)) -> SkillsListResp summary="Install Skill", description="Install a skill from a .skill file (ZIP archive) located in the thread's user-data directory.", ) -async def install_skill(request: SkillInstallRequest) -> SkillInstallResponse: +async def install_skill(request: SkillInstallRequest, config: AppConfig = Depends(get_config)) -> SkillInstallResponse: try: skill_file_path = resolve_thread_virtual_path(request.thread_id, request.path) - result = await ainstall_skill_from_archive(skill_file_path) + result = await get_or_new_skill_storage(app_config=config).ainstall_skill_from_archive(skill_file_path) await refresh_skills_system_prompt_cache_async() return SkillInstallResponse(**result) except FileNotFoundError as e: @@ -140,7 +128,7 @@ async def install_skill(request: SkillInstallRequest) -> SkillInstallResponse: @router.get("/skills/custom", response_model=SkillsListResponse, summary="List Custom Skills") async def list_custom_skills(config: AppConfig = Depends(get_config)) -> SkillsListResponse: try: - skills = [skill for skill in load_skills(enabled_only=False, app_config=config) if skill.category == "custom"] + skills = [skill for skill in get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False) if skill.category == SkillCategory.CUSTOM] return SkillsListResponse(skills=[_skill_to_response(skill) for skill in skills]) except Exception as e: logger.error("Failed to list custom skills: %s", e, exc_info=True) @@ -151,11 +139,11 @@ async def list_custom_skills(config: AppConfig = Depends(get_config)) -> SkillsL async def get_custom_skill(skill_name: str, config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse: try: skill_name = skill_name.replace("\r\n", "").replace("\n", "") - skills = load_skills(enabled_only=False, app_config=config) - skill = next((s for s in skills if s.name == skill_name and s.category == "custom"), None) + skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False) + skill = next((s for s in skills if s.name == skill_name and s.category == SkillCategory.CUSTOM), None) if skill is None: raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found") - return CustomSkillContentResponse(**_skill_to_response(skill).model_dump(), content=read_custom_skill_content(skill_name, app_config=config)) + return CustomSkillContentResponse(**_skill_to_response(skill).model_dump(), content=get_or_new_skill_storage(app_config=config).read_custom_skill(skill_name)) except HTTPException: raise except Exception as e: @@ -167,26 +155,25 @@ async def get_custom_skill(skill_name: str, config: AppConfig = Depends(get_conf async def update_custom_skill(skill_name: str, request: CustomSkillUpdateRequest, config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse: try: skill_name = skill_name.replace("\r\n", "").replace("\n", "") - ensure_custom_skill_is_editable(skill_name, app_config=config) - validate_skill_markdown_content(skill_name, request.content) - scan = await scan_skill_content(request.content, executable=False, location=f"{skill_name}/SKILL.md", app_config=config) + storage = get_or_new_skill_storage(app_config=config) + storage.ensure_custom_skill_is_editable(skill_name) + storage.validate_skill_markdown_content(skill_name, request.content) + scan = await scan_skill_content(request.content, executable=False, location=f"{skill_name}/{SKILL_MD_FILE}", app_config=config) if scan.decision == "block": raise HTTPException(status_code=400, detail=f"Security scan blocked the edit: {scan.reason}") - skill_file = get_custom_skill_dir(skill_name, app_config=config) / "SKILL.md" - prev_content = skill_file.read_text(encoding="utf-8") - atomic_write(skill_file, request.content) - append_history( + prev_content = storage.read_custom_skill(skill_name) + storage.write_custom_skill(skill_name, SKILL_MD_FILE, request.content) + storage.append_history( skill_name, { "action": "human_edit", "author": "human", "thread_id": None, - "file_path": "SKILL.md", + "file_path": SKILL_MD_FILE, "prev_content": prev_content, "new_content": request.content, "scanner": {"decision": scan.decision, "reason": scan.reason}, }, - app_config=config, ) await refresh_skills_system_prompt_cache_async() return await get_custom_skill(skill_name, config) @@ -205,28 +192,19 @@ async def update_custom_skill(skill_name: str, request: CustomSkillUpdateRequest async def delete_custom_skill(skill_name: str, config: AppConfig = Depends(get_config)) -> dict[str, bool]: try: skill_name = skill_name.replace("\r\n", "").replace("\n", "") - ensure_custom_skill_is_editable(skill_name, app_config=config) - skill_dir = get_custom_skill_dir(skill_name, app_config=config) - prev_content = read_custom_skill_content(skill_name, app_config=config) - try: - append_history( - skill_name, - { - "action": "human_delete", - "author": "human", - "thread_id": None, - "file_path": "SKILL.md", - "prev_content": prev_content, - "new_content": None, - "scanner": {"decision": "allow", "reason": "Deletion requested."}, - }, - app_config=config, - ) - except OSError as e: - if not isinstance(e, PermissionError) and e.errno not in {errno.EACCES, errno.EPERM, errno.EROFS}: - raise - logger.warning("Skipping delete history write for custom skill %s due to readonly/permission failure; continuing with skill directory removal: %s", skill_name, e) - shutil.rmtree(skill_dir) + storage = get_or_new_skill_storage(app_config=config) + storage.delete_custom_skill( + skill_name, + history_meta={ + "action": "human_delete", + "author": "human", + "thread_id": None, + "file_path": SKILL_MD_FILE, + "prev_content": None, + "new_content": None, + "scanner": {"decision": "allow", "reason": "Deletion requested."}, + }, + ) await refresh_skills_system_prompt_cache_async() return {"success": True} except FileNotFoundError as e: @@ -242,9 +220,10 @@ async def delete_custom_skill(skill_name: str, config: AppConfig = Depends(get_c async def get_custom_skill_history(skill_name: str, config: AppConfig = Depends(get_config)) -> CustomSkillHistoryResponse: try: skill_name = skill_name.replace("\r\n", "").replace("\n", "") - if not custom_skill_exists(skill_name, app_config=config) and not get_skill_history_file(skill_name, app_config=config).exists(): + storage = get_or_new_skill_storage(app_config=config) + if not storage.custom_skill_exists(skill_name) and not storage.get_skill_history_file(skill_name).exists(): raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found") - return CustomSkillHistoryResponse(history=read_history(skill_name, app_config=config)) + return CustomSkillHistoryResponse(history=storage.read_history(skill_name)) except HTTPException: raise except Exception as e: @@ -255,34 +234,35 @@ async def get_custom_skill_history(skill_name: str, config: AppConfig = Depends( @router.post("/skills/custom/{skill_name}/rollback", response_model=CustomSkillContentResponse, summary="Rollback Custom Skill") async def rollback_custom_skill(skill_name: str, request: SkillRollbackRequest, config: AppConfig = Depends(get_config)) -> CustomSkillContentResponse: try: - if not custom_skill_exists(skill_name, app_config=config) and not get_skill_history_file(skill_name, app_config=config).exists(): + storage = get_or_new_skill_storage(app_config=config) + if not storage.custom_skill_exists(skill_name) and not storage.get_skill_history_file(skill_name).exists(): raise HTTPException(status_code=404, detail=f"Custom skill '{skill_name}' not found") - history = read_history(skill_name, app_config=config) + history = storage.read_history(skill_name) if not history: raise HTTPException(status_code=400, detail=f"Custom skill '{skill_name}' has no history") record = history[request.history_index] target_content = record.get("prev_content") if target_content is None: raise HTTPException(status_code=400, detail="Selected history entry has no previous content to roll back to") - validate_skill_markdown_content(skill_name, target_content) - scan = await scan_skill_content(target_content, executable=False, location=f"{skill_name}/SKILL.md", app_config=config) - skill_file = get_custom_skill_file(skill_name, app_config=config) + storage.validate_skill_markdown_content(skill_name, target_content) + scan = await scan_skill_content(target_content, executable=False, location=f"{skill_name}/{SKILL_MD_FILE}", app_config=config) + skill_file = storage.get_custom_skill_file(skill_name) current_content = skill_file.read_text(encoding="utf-8") if skill_file.exists() else None history_entry = { "action": "rollback", "author": "human", "thread_id": None, - "file_path": "SKILL.md", + "file_path": SKILL_MD_FILE, "prev_content": current_content, "new_content": target_content, "rollback_from_ts": record.get("ts"), "scanner": {"decision": scan.decision, "reason": scan.reason}, } if scan.decision == "block": - append_history(skill_name, history_entry, app_config=config) + storage.append_history(skill_name, history_entry) raise HTTPException(status_code=400, detail=f"Rollback blocked by security scanner: {scan.reason}") - atomic_write(skill_file, target_content) - append_history(skill_name, history_entry, app_config=config) + storage.write_custom_skill(skill_name, SKILL_MD_FILE, target_content) + storage.append_history(skill_name, history_entry) await refresh_skills_system_prompt_cache_async() return await get_custom_skill(skill_name, config) except HTTPException: @@ -307,7 +287,7 @@ async def rollback_custom_skill(skill_name: str, request: SkillRollbackRequest, async def get_skill(skill_name: str, config: AppConfig = Depends(get_config)) -> SkillResponse: try: skill_name = skill_name.replace("\r\n", "").replace("\n", "") - skills = load_skills(enabled_only=False, app_config=config) + skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False) skill = next((s for s in skills if s.name == skill_name), None) if skill is None: @@ -330,7 +310,7 @@ async def get_skill(skill_name: str, config: AppConfig = Depends(get_config)) -> async def update_skill(skill_name: str, request: SkillUpdateRequest, config: AppConfig = Depends(get_config)) -> SkillResponse: try: skill_name = skill_name.replace("\r\n", "").replace("\n", "") - skills = load_skills(enabled_only=False, app_config=config) + skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False) skill = next((s for s in skills if s.name == skill_name), None) if skill is None: @@ -356,7 +336,7 @@ async def update_skill(skill_name: str, request: SkillUpdateRequest, config: App reload_extensions_config() await refresh_skills_system_prompt_cache_async() - skills = load_skills(enabled_only=False, app_config=config) + skills = get_or_new_skill_storage(app_config=config).load_skills(enabled_only=False) updated_skill = next((s for s in skills if s.name == skill_name), None) if updated_skill is None: diff --git a/backend/docs/HARNESS_APP_SPLIT.md b/backend/docs/HARNESS_APP_SPLIT.md deleted file mode 100644 index 42bf60607..000000000 --- a/backend/docs/HARNESS_APP_SPLIT.md +++ /dev/null @@ -1,343 +0,0 @@ -# DeerFlow 后端拆分设计文档:Harness + App - -> 状态:Draft -> 作者:DeerFlow Team -> 日期:2026-03-13 - -## 1. 背景与动机 - -DeerFlow 后端当前是一个单一 Python 包(`src.*`),包含了从底层 agent 编排到上层用户产品的所有代码。随着项目发展,这种结构带来了几个问题: - -- **复用困难**:其他产品(CLI 工具、Slack bot、第三方集成)想用 agent 能力,必须依赖整个后端,包括 FastAPI、IM SDK 等不需要的依赖 -- **职责模糊**:agent 编排逻辑和用户产品逻辑混在同一个 `src/` 下,边界不清晰 -- **依赖膨胀**:LangGraph Server 运行时不需要 FastAPI/uvicorn/Slack SDK,但当前必须安装全部依赖 - -本文档提出将后端拆分为两部分:**deerflow-harness**(可发布的 agent 框架包)和 **app**(不打包的用户产品代码)。 - -## 2. 核心概念 - -### 2.1 Harness(线束/框架层) - -Harness 是 agent 的构建与编排框架,回答 **"如何构建和运行 agent"** 的问题: - -- Agent 工厂与生命周期管理 -- Middleware pipeline -- 工具系统(内置工具 + MCP + 社区工具) -- 沙箱执行环境 -- 子 agent 委派 -- 记忆系统 -- 技能加载与注入 -- 模型工厂 -- 配置系统 - -**Harness 是一个可发布的 Python 包**(`deerflow-harness`),可以独立安装和使用。 - -**Harness 的设计原则**:对上层应用完全无感知。它不知道也不关心谁在调用它——可以是 Web App、CLI、Slack Bot、或者一个单元测试。 - -### 2.2 App(应用层) - -App 是面向用户的产品代码,回答 **"如何将 agent 呈现给用户"** 的问题: - -- Gateway API(FastAPI REST 接口) -- IM Channels(飞书、Slack、Telegram 集成) -- Custom Agent 的 CRUD 管理 -- 文件上传/下载的 HTTP 接口 - -**App 不打包、不发布**,它是 DeerFlow 项目内部的应用代码,直接运行。 - -**App 依赖 Harness,但 Harness 不依赖 App。** - -### 2.3 边界划分 - -| 模块 | 归属 | 说明 | -|------|------|------| -| `config/` | Harness | 配置系统是基础设施 | -| `reflection/` | Harness | 动态模块加载工具 | -| `utils/` | Harness | 通用工具函数 | -| `agents/` | Harness | Agent 工厂、middleware、state、memory | -| `subagents/` | Harness | 子 agent 委派系统 | -| `sandbox/` | Harness | 沙箱执行环境 | -| `tools/` | Harness | 工具注册与发现 | -| `mcp/` | Harness | MCP 协议集成 | -| `skills/` | Harness | 技能加载、解析、定义 schema | -| `models/` | Harness | LLM 模型工厂 | -| `community/` | Harness | 社区工具(tavily、jina 等) | -| `client.py` | Harness | 嵌入式 Python 客户端 | -| `gateway/` | App | FastAPI REST API | -| `channels/` | App | IM 平台集成 | - -**关于 Custom Agents**:agent 定义格式(`config.yaml` + `SOUL.md` schema)由 Harness 层的 `config/agents_config.py` 定义,但文件的存储、CRUD、发现机制由 App 层的 `gateway/routers/agents.py` 负责。 - -## 3. 目标架构 - -### 3.1 目录结构 - -``` -backend/ -├── packages/ -│ └── harness/ -│ ├── pyproject.toml # deerflow-harness 包定义 -│ └── deerflow/ # Python 包根(import 前缀: deerflow.*) -│ ├── __init__.py -│ ├── config/ -│ ├── reflection/ -│ ├── utils/ -│ ├── agents/ -│ │ ├── lead_agent/ -│ │ ├── middlewares/ -│ │ ├── memory/ -│ │ ├── checkpointer/ -│ │ └── thread_state.py -│ ├── subagents/ -│ ├── sandbox/ -│ ├── tools/ -│ ├── mcp/ -│ ├── skills/ -│ ├── models/ -│ ├── community/ -│ └── client.py -├── app/ # 不打包(import 前缀: app.*) -│ ├── __init__.py -│ ├── gateway/ -│ │ ├── __init__.py -│ │ ├── app.py -│ │ ├── config.py -│ │ ├── path_utils.py -│ │ └── routers/ -│ └── channels/ -│ ├── __init__.py -│ ├── base.py -│ ├── manager.py -│ ├── service.py -│ ├── store.py -│ ├── message_bus.py -│ ├── feishu.py -│ ├── slack.py -│ └── telegram.py -├── pyproject.toml # uv workspace root -├── langgraph.json -├── tests/ -├── docs/ -└── Makefile -``` - -### 3.2 Import 规则 - -两个层使用不同的 import 前缀,职责边界一目了然: - -```python -# --------------------------------------------------------------- -# Harness 内部互相引用(deerflow.* 前缀) -# --------------------------------------------------------------- -from deerflow.agents import make_lead_agent -from deerflow.models import create_chat_model -from deerflow.config import get_app_config -from deerflow.tools import get_available_tools - -# --------------------------------------------------------------- -# App 内部互相引用(app.* 前缀) -# --------------------------------------------------------------- -from app.gateway.app import app -from app.gateway.routers.uploads import upload_files -from app.channels.service import start_channel_service - -# --------------------------------------------------------------- -# App 调用 Harness(单向依赖,Harness 永远不 import app) -# --------------------------------------------------------------- -from deerflow.agents import make_lead_agent -from deerflow.models import create_chat_model -from deerflow.skills import load_skills -from deerflow.config.extensions_config import get_extensions_config -``` - -**App 调用 Harness 示例 — Gateway 中启动 agent**: - -```python -# app/gateway/routers/chat.py -from deerflow.agents.lead_agent.agent import make_lead_agent -from deerflow.models import create_chat_model -from deerflow.config import get_app_config - -async def create_chat_session(thread_id: str, model_name: str): - config = get_app_config() - model = create_chat_model(name=model_name) - agent = make_lead_agent(config=...) - # ... 使用 agent 处理用户消息 -``` - -**App 调用 Harness 示例 — Channel 中查询 skills**: - -```python -# app/channels/manager.py -from deerflow.skills import load_skills -from deerflow.agents.memory.updater import get_memory_data - -def handle_status_command(): - skills = load_skills(enabled_only=True) - memory = get_memory_data() - return f"Skills: {len(skills)}, Memory facts: {len(memory.get('facts', []))}" -``` - -**禁止方向**:Harness 代码中绝不能出现 `from app.` 或 `import app.`。 - -### 3.3 为什么 App 不打包 - -| 方面 | 打包(放 packages/ 下) | 不打包(放 backend/app/) | -|------|------------------------|--------------------------| -| 命名空间 | 需要 pkgutil `extend_path` 合并,或独立前缀 | 天然独立,`app.*` vs `deerflow.*` | -| 发布需求 | 没有——App 是项目内部代码 | 不需要 pyproject.toml | -| 复杂度 | 需要管理两个包的构建、版本、依赖声明 | 直接运行,零额外配置 | -| 运行方式 | `pip install deerflow-app` | `PYTHONPATH=. uvicorn app.gateway.app:app` | - -App 的唯一消费者是 DeerFlow 项目自身,没有独立发布的需求。放在 `backend/app/` 下作为普通 Python 包,通过 `PYTHONPATH` 或 editable install 让 Python 找到即可。 - -### 3.4 依赖关系 - -``` -┌─────────────────────────────────────┐ -│ app/ (不打包,直接运行) │ -│ ├── fastapi, uvicorn │ -│ ├── slack-sdk, lark-oapi, ... │ -│ └── import deerflow.* │ -└──────────────┬──────────────────────┘ - │ - ▼ -┌─────────────────────────────────────┐ -│ deerflow-harness (可发布的包) │ -│ ├── langgraph, langchain │ -│ ├── markitdown, pydantic, ... │ -│ └── 零 app 依赖 │ -└─────────────────────────────────────┘ -``` - -**依赖分类**: - -| 分类 | 依赖包 | -|------|--------| -| Harness only | agent-sandbox, langchain*, langgraph*, markdownify, markitdown, pydantic, pyyaml, readabilipy, tavily-python, firecrawl-py, tiktoken, ddgs, duckdb, httpx, kubernetes, dotenv | -| App only | fastapi, uvicorn, sse-starlette, python-multipart, lark-oapi, slack-sdk, python-telegram-bot, markdown-to-mrkdwn | -| Shared | langgraph-sdk(channels 用 HTTP client), pydantic, httpx | - -### 3.5 Workspace 配置 - -`backend/pyproject.toml`(workspace root): - -```toml -[project] -name = "deer-flow" -version = "0.1.0" -requires-python = ">=3.12" -dependencies = ["deerflow-harness"] - -[dependency-groups] -dev = ["pytest>=8.0.0", "ruff>=0.14.11"] -# App 的额外依赖(fastapi 等)也声明在 workspace root,因为 app 不打包 -app = ["fastapi", "uvicorn", "sse-starlette", "python-multipart"] -channels = ["lark-oapi", "slack-sdk", "python-telegram-bot"] - -[tool.uv.workspace] -members = ["packages/harness"] - -[tool.uv.sources] -deerflow-harness = { workspace = true } -``` - -## 4. 当前的跨层依赖问题 - -在拆分之前,需要先解决 `client.py` 中两处从 harness 到 app 的反向依赖: - -### 4.1 `_validate_skill_frontmatter` - -```python -# client.py — harness 导入了 app 层代码 -from src.gateway.routers.skills import _validate_skill_frontmatter -``` - -**解决方案**:将该函数提取到 `deerflow/skills/validation.py`。这是一个纯逻辑函数(解析 YAML frontmatter、校验字段),与 FastAPI 无关。 - -### 4.2 `CONVERTIBLE_EXTENSIONS` + `convert_file_to_markdown` - -```python -# client.py — harness 导入了 app 层代码 -from src.gateway.routers.uploads import CONVERTIBLE_EXTENSIONS, convert_file_to_markdown -``` - -**解决方案**:将它们提取到 `deerflow/utils/file_conversion.py`。仅依赖 `markitdown` + `pathlib`,是通用工具函数。 - -## 5. 基础设施变更 - -### 5.1 LangGraph Server - -LangGraph Server 只需要 harness 包。`langgraph.json` 更新: - -```json -{ - "dependencies": ["./packages/harness"], - "graphs": { - "lead_agent": "deerflow.agents:make_lead_agent" - }, - "checkpointer": { - "path": "./packages/harness/deerflow/runtime/checkpointer/async_provider.py:make_checkpointer" - } -} -``` - -### 5.2 Gateway API - -```bash -# serve.sh / Makefile -# PYTHONPATH 包含 backend/ 根目录,使 app.* 和 deerflow.* 都能被找到 -PYTHONPATH=. uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 -``` - -### 5.3 Nginx - -无需变更(只做 URL 路由,不涉及 Python 模块路径)。 - -### 5.4 Docker - -Dockerfile 中的 module 引用从 `src.` 改为 `deerflow.` / `app.`,`COPY` 命令需覆盖 `packages/` 和 `app/` 目录。 - -## 6. 实施计划 - -分 3 个 PR 递进执行: - -### PR 1:提取共享工具函数(Low Risk) - -1. 创建 `src/skills/validation.py`,从 `gateway/routers/skills.py` 提取 `_validate_skill_frontmatter` -2. 创建 `src/utils/file_conversion.py`,从 `gateway/routers/uploads.py` 提取文件转换逻辑 -3. 更新 `client.py`、`gateway/routers/skills.py`、`gateway/routers/uploads.py` 的 import -4. 运行全部测试确认无回归 - -### PR 2:Rename + 物理拆分(High Risk,原子操作) - -1. 创建 `packages/harness/` 目录,创建 `pyproject.toml` -2. `git mv` 将 harness 相关模块从 `src/` 移入 `packages/harness/deerflow/` -3. `git mv` 将 app 相关模块从 `src/` 移入 `app/` -4. 全局替换 import: - - harness 模块:`src.*` → `deerflow.*`(所有 `.py` 文件、`langgraph.json`、测试、文档) - - app 模块:`src.gateway.*` → `app.gateway.*`、`src.channels.*` → `app.channels.*` -5. 更新 workspace root `pyproject.toml` -6. 更新 `langgraph.json`、`Makefile`、`Dockerfile` -7. `uv sync` + 全部测试 + 手动验证服务启动 - -### PR 3:边界检查 + 文档(Low Risk) - -1. 添加 lint 规则:检查 harness 不 import app 模块 -2. 更新 `CLAUDE.md`、`README.md` - -## 7. 风险与缓解 - -| 风险 | 影响 | 缓解措施 | -|------|------|----------| -| 全局 rename 误伤 | 字符串中的 `src` 被错误替换 | 正则精确匹配 `\bsrc\.`,review diff | -| LangGraph Server 找不到模块 | 服务启动失败 | `langgraph.json` 的 `dependencies` 指向正确的 harness 包路径 | -| App 的 `PYTHONPATH` 缺失 | Gateway/Channel 启动 import 报错 | Makefile/Docker 统一设置 `PYTHONPATH=.` | -| `config.yaml` 中的 `use` 字段引用旧路径 | 运行时模块解析失败 | `config.yaml` 中的 `use` 字段同步更新为 `deerflow.*` | -| 测试中 `sys.path` 混乱 | 测试失败 | 用 editable install(`uv sync`)确保 deerflow 可导入,`conftest.py` 中添加 `app/` 到 `sys.path` | - -## 8. 未来演进 - -- **独立发布**:harness 可以发布到内部 PyPI,让其他项目直接 `pip install deerflow-harness` -- **插件化 App**:不同的 app(web、CLI、bot)可以各自独立,都依赖同一个 harness -- **更细粒度拆分**:如果 harness 内部模块继续增长,可以进一步拆分(如 `deerflow-sandbox`、`deerflow-mcp`) diff --git a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py index b7487115a..9b6fd9cd4 100644 --- a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py +++ b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py @@ -8,8 +8,8 @@ from functools import lru_cache from typing import TYPE_CHECKING from deerflow.config.agents_config import load_agent_soul -from deerflow.skills import load_skills -from deerflow.skills.types import Skill +from deerflow.skills.storage import get_or_new_skill_storage +from deerflow.skills.types import Skill, SkillCategory from deerflow.subagents import get_available_subagent_names if TYPE_CHECKING: @@ -26,7 +26,7 @@ _enabled_skills_refresh_event = threading.Event() def _load_enabled_skills_sync() -> list[Skill]: - return list(load_skills(enabled_only=True)) + return list(get_or_new_skill_storage().load_skills(enabled_only=True)) def _start_enabled_skills_refresh_thread() -> None: @@ -127,11 +127,11 @@ def _get_enabled_skills_for_config(app_config: AppConfig | None = None) -> list[ """ if app_config is None: return _get_enabled_skills() - return list(load_skills(enabled_only=True, app_config=app_config)) + return list(get_or_new_skill_storage(app_config=app_config).load_skills(enabled_only=True)) -def _skill_mutability_label(category: str) -> str: - return "[custom, editable]" if category == "custom" else "[built-in]" +def _skill_mutability_label(category: SkillCategory | str) -> str: + return "[custom, editable]" if category == SkillCategory.CUSTOM else "[built-in]" def clear_skills_system_prompt_cache() -> None: diff --git a/backend/packages/harness/deerflow/client.py b/backend/packages/harness/deerflow/client.py index d47e8998e..2ba9302cc 100644 --- a/backend/packages/harness/deerflow/client.py +++ b/backend/packages/harness/deerflow/client.py @@ -41,7 +41,7 @@ from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig from deerflow.config.paths import get_paths from deerflow.models import create_chat_model from deerflow.runtime.user_context import get_effective_user_id -from deerflow.skills.installer import install_skill_from_archive +from deerflow.skills.storage import get_or_new_skill_storage from deerflow.uploads.manager import ( claim_unique_filename, delete_file_safe, @@ -752,8 +752,6 @@ class DeerFlowClient: Dict with "skills" key containing list of skill info dicts, matching the Gateway API ``SkillsListResponse`` schema. """ - from deerflow.skills.loader import load_skills - return { "skills": [ { @@ -763,7 +761,7 @@ class DeerFlowClient: "category": s.category, "enabled": s.enabled, } - for s in load_skills(enabled_only=enabled_only) + for s in get_or_new_skill_storage().load_skills(enabled_only=enabled_only) ] } @@ -872,9 +870,9 @@ class DeerFlowClient: Returns: Skill info dict, or None if not found. """ - from deerflow.skills.loader import load_skills + from deerflow.skills.storage import get_or_new_skill_storage - skill = next((s for s in load_skills(enabled_only=False) if s.name == name), None) + skill = next((s for s in get_or_new_skill_storage().load_skills(enabled_only=False) if s.name == name), None) if skill is None: return None return { @@ -899,9 +897,9 @@ class DeerFlowClient: ValueError: If the skill is not found. OSError: If the config file cannot be written. """ - from deerflow.skills.loader import load_skills + from deerflow.skills.storage import get_or_new_skill_storage - skills = load_skills(enabled_only=False) + skills = get_or_new_skill_storage().load_skills(enabled_only=False) skill = next((s for s in skills if s.name == name), None) if skill is None: raise ValueError(f"Skill '{name}' not found") @@ -924,7 +922,7 @@ class DeerFlowClient: self._agent_config_key = None reload_extensions_config() - updated = next((s for s in load_skills(enabled_only=False) if s.name == name), None) + updated = next((s for s in get_or_new_skill_storage().load_skills(enabled_only=False) if s.name == name), None) if updated is None: raise RuntimeError(f"Skill '{name}' disappeared after update") return { @@ -948,7 +946,7 @@ class DeerFlowClient: FileNotFoundError: If the file does not exist. ValueError: If the file is invalid. """ - return install_skill_from_archive(skill_path) + return get_or_new_skill_storage().install_skill_from_archive(skill_path) # ------------------------------------------------------------------ # Public API — memory management diff --git a/backend/packages/harness/deerflow/config/skills_config.py b/backend/packages/harness/deerflow/config/skills_config.py index 31a6ca902..266a98b91 100644 --- a/backend/packages/harness/deerflow/config/skills_config.py +++ b/backend/packages/harness/deerflow/config/skills_config.py @@ -11,6 +11,10 @@ def _default_repo_root() -> Path: class SkillsConfig(BaseModel): """Configuration for skills system""" + use: str = Field( + default="deerflow.skills.storage.local_skill_storage:LocalSkillStorage", + description="Class path of the SkillStorage implementation.", + ) path: str | None = Field( default=None, description="Path to skills directory. If not specified, defaults to ../skills relative to backend directory", @@ -35,10 +39,8 @@ class SkillsConfig(BaseModel): path = _default_repo_root() / path return path.resolve() else: - # Default: ../skills relative to backend directory - from deerflow.skills.loader import get_skills_root_path - - return get_skills_root_path() + # Default: /skills + return _default_repo_root() / "skills" def get_skill_container_path(self, skill_name: str, category: str = "public") -> str: """ diff --git a/backend/packages/harness/deerflow/skills/__init__.py b/backend/packages/harness/deerflow/skills/__init__.py index 4fcb7cc0d..77910d5c2 100644 --- a/backend/packages/harness/deerflow/skills/__init__.py +++ b/backend/packages/harness/deerflow/skills/__init__.py @@ -1,16 +1,17 @@ -from .installer import SkillAlreadyExistsError, SkillSecurityScanError, ainstall_skill_from_archive, install_skill_from_archive -from .loader import get_skills_root_path, load_skills +from __future__ import annotations + +from .installer import SkillAlreadyExistsError, SkillSecurityScanError +from .storage import LocalSkillStorage, SkillStorage, get_or_new_skill_storage from .types import Skill from .validation import ALLOWED_FRONTMATTER_PROPERTIES, _validate_skill_frontmatter __all__ = [ - "load_skills", - "get_skills_root_path", "Skill", "ALLOWED_FRONTMATTER_PROPERTIES", "_validate_skill_frontmatter", - "install_skill_from_archive", - "ainstall_skill_from_archive", "SkillAlreadyExistsError", "SkillSecurityScanError", + "SkillStorage", + "LocalSkillStorage", + "get_or_new_skill_storage", ] diff --git a/backend/packages/harness/deerflow/skills/installer.py b/backend/packages/harness/deerflow/skills/installer.py index e58367817..536ba0f88 100644 --- a/backend/packages/harness/deerflow/skills/installer.py +++ b/backend/packages/harness/deerflow/skills/installer.py @@ -10,13 +10,10 @@ import logging import posixpath import shutil import stat -import tempfile import zipfile from pathlib import Path, PurePosixPath, PureWindowsPath -from deerflow.skills.loader import get_skills_root_path from deerflow.skills.security_scanner import scan_skill_content -from deerflow.skills.validation import _validate_skill_frontmatter logger = logging.getLogger(__name__) @@ -195,80 +192,6 @@ async def _scan_skill_archive_contents_or_raise(skill_dir: Path, skill_name: str await _scan_skill_file_or_raise(skill_dir, path, skill_name, executable=_is_script_support_file(rel_path)) -async def ainstall_skill_from_archive( - zip_path: str | Path, - *, - skills_root: Path | None = None, -) -> dict: - """Install a skill from a .skill archive (ZIP). - - Args: - zip_path: Path to the .skill file. - skills_root: Override the skills root directory. If None, uses - the default from config. - - Returns: - Dict with success, skill_name, message. - - Raises: - FileNotFoundError: If the file does not exist. - ValueError: If the file is invalid (wrong extension, bad ZIP, - invalid frontmatter, duplicate name). - """ - logger.info("Installing skill from %s", zip_path) - path = Path(zip_path) - if not path.is_file(): - if not path.exists(): - raise FileNotFoundError(f"Skill file not found: {zip_path}") - raise ValueError(f"Path is not a file: {zip_path}") - if path.suffix != ".skill": - raise ValueError("File must have .skill extension") - - if skills_root is None: - skills_root = get_skills_root_path() - custom_dir = skills_root / "custom" - custom_dir.mkdir(parents=True, exist_ok=True) - - with tempfile.TemporaryDirectory() as tmp: - tmp_path = Path(tmp) - - try: - zf = zipfile.ZipFile(path, "r") - except FileNotFoundError: - raise FileNotFoundError(f"Skill file not found: {zip_path}") from None - except (zipfile.BadZipFile, IsADirectoryError): - raise ValueError("File is not a valid ZIP archive") from None - - with zf: - safe_extract_skill_archive(zf, tmp_path) - - skill_dir = resolve_skill_dir_from_archive(tmp_path) - - is_valid, message, skill_name = _validate_skill_frontmatter(skill_dir) - if not is_valid: - raise ValueError(f"Invalid skill: {message}") - if not skill_name or "/" in skill_name or "\\" in skill_name or ".." in skill_name: - raise ValueError(f"Invalid skill name: {skill_name}") - - target = custom_dir / skill_name - if target.exists(): - raise SkillAlreadyExistsError(f"Skill '{skill_name}' already exists") - - await _scan_skill_archive_contents_or_raise(skill_dir, skill_name) - - with tempfile.TemporaryDirectory(prefix=f".installing-{skill_name}-", dir=custom_dir) as staging_root: - staging_target = Path(staging_root) / skill_name - shutil.copytree(skill_dir, staging_target) - _move_staged_skill_into_reserved_target(staging_target, target) - logger.info("Skill %r installed to %s", skill_name, target) - - return { - "success": True, - "skill_name": skill_name, - "message": f"Skill '{skill_name}' installed successfully", - } - - def _run_async_install(coro): try: loop = asyncio.get_running_loop() @@ -279,12 +202,3 @@ def _run_async_install(coro): with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: return executor.submit(asyncio.run, coro).result() return asyncio.run(coro) - - -def install_skill_from_archive( - zip_path: str | Path, - *, - skills_root: Path | None = None, -) -> dict: - """Install a skill from a .skill archive (ZIP).""" - return _run_async_install(ainstall_skill_from_archive(zip_path, skills_root=skills_root)) diff --git a/backend/packages/harness/deerflow/skills/loader.py b/backend/packages/harness/deerflow/skills/loader.py deleted file mode 100644 index 6597fd0fd..000000000 --- a/backend/packages/harness/deerflow/skills/loader.py +++ /dev/null @@ -1,105 +0,0 @@ -import logging -import os -from pathlib import Path - -from deerflow.config.app_config import AppConfig - -from .parser import parse_skill_file -from .types import Skill - -logger = logging.getLogger(__name__) - - -def get_skills_root_path() -> Path: - """ - Get the root path of the skills directory. - - Returns: - Path to the skills directory (deer-flow/skills) - """ - # loader.py lives at packages/harness/deerflow/skills/loader.py — 5 parents up reaches backend/ - backend_dir = Path(__file__).resolve().parent.parent.parent.parent.parent - # skills directory is sibling to backend directory - skills_dir = backend_dir.parent / "skills" - return skills_dir - - -def load_skills(skills_path: Path | None = None, use_config: bool = True, enabled_only: bool = False, *, app_config: AppConfig | None = None) -> list[Skill]: - """ - Load all skills from the skills directory. - - Scans both public and custom skill directories, parsing SKILL.md files - to extract metadata. The enabled state is determined by the skills_state_config.json file. - - Args: - skills_path: Optional custom path to skills directory. - If not provided and use_config is True, uses path from config. - Otherwise defaults to deer-flow/skills - use_config: Whether to load skills path from config (default: True) - enabled_only: If True, only return enabled skills (default: False) - - Returns: - List of Skill objects, sorted by name - """ - if skills_path is None: - if use_config: - try: - from deerflow.config import get_app_config - - config = app_config or get_app_config() - skills_path = config.skills.get_skills_path() - except Exception: - # Fallback to default if config fails - skills_path = get_skills_root_path() - else: - skills_path = get_skills_root_path() - - if not skills_path.exists(): - return [] - - skills_by_name: dict[str, Skill] = {} - - # Scan public and custom directories - for category in ["public", "custom"]: - category_path = skills_path / category - if not category_path.exists() or not category_path.is_dir(): - continue - - for current_root, dir_names, file_names in os.walk(category_path, followlinks=True): - # Keep traversal deterministic and skip hidden directories. - dir_names[:] = sorted(name for name in dir_names if not name.startswith(".")) - if "SKILL.md" not in file_names: - continue - - skill_file = Path(current_root) / "SKILL.md" - relative_path = skill_file.parent.relative_to(category_path) - - skill = parse_skill_file(skill_file, category=category, relative_path=relative_path) - if skill: - skills_by_name[skill.name] = skill - - skills = list(skills_by_name.values()) - - # Load skills state configuration and update enabled status - # NOTE: We use ExtensionsConfig.from_file() instead of get_extensions_config() - # to always read the latest configuration from disk. This ensures that changes - # made through the Gateway API (which runs in a separate process) are immediately - # reflected in the LangGraph Server when loading skills. - try: - from deerflow.config.extensions_config import ExtensionsConfig - - extensions_config = ExtensionsConfig.from_file() - for skill in skills: - skill.enabled = extensions_config.is_skill_enabled(skill.name, skill.category) - except Exception as e: - # If config loading fails, default to all enabled - logger.warning("Failed to load extensions config: %s", e) - - # Filter by enabled status if requested - if enabled_only: - skills = [skill for skill in skills if skill.enabled] - - # Sort by name for consistent ordering - skills.sort(key=lambda s: s.name) - - return skills diff --git a/backend/packages/harness/deerflow/skills/manager.py b/backend/packages/harness/deerflow/skills/manager.py deleted file mode 100644 index d7d81353d..000000000 --- a/backend/packages/harness/deerflow/skills/manager.py +++ /dev/null @@ -1,161 +0,0 @@ -"""Utilities for managing custom skills and their history.""" - -from __future__ import annotations - -import json -import re -import tempfile -from datetime import UTC, datetime -from pathlib import Path -from typing import Any - -from deerflow.config import get_app_config -from deerflow.config.app_config import AppConfig -from deerflow.skills.loader import load_skills -from deerflow.skills.validation import _validate_skill_frontmatter - -SKILL_FILE_NAME = "SKILL.md" -HISTORY_FILE_NAME = "HISTORY.jsonl" -HISTORY_DIR_NAME = ".history" -ALLOWED_SUPPORT_SUBDIRS = {"references", "templates", "scripts", "assets"} -_SKILL_NAME_PATTERN = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$") - - -def get_skills_root_dir(*, app_config: AppConfig | None = None) -> Path: - config = app_config or get_app_config() - return config.skills.get_skills_path() - - -def get_public_skills_dir(*, app_config: AppConfig | None = None) -> Path: - return get_skills_root_dir(app_config=app_config) / "public" - - -def get_custom_skills_dir(*, app_config: AppConfig | None = None) -> Path: - path = get_skills_root_dir(app_config=app_config) / "custom" - path.mkdir(parents=True, exist_ok=True) - return path - - -def validate_skill_name(name: str) -> str: - normalized = name.strip() - if not _SKILL_NAME_PATTERN.fullmatch(normalized): - raise ValueError("Skill name must be hyphen-case using lowercase letters, digits, and hyphens only.") - if len(normalized) > 64: - raise ValueError("Skill name must be 64 characters or fewer.") - return normalized - - -def get_custom_skill_dir(name: str, *, app_config: AppConfig | None = None) -> Path: - return get_custom_skills_dir(app_config=app_config) / validate_skill_name(name) - - -def get_custom_skill_file(name: str, *, app_config: AppConfig | None = None) -> Path: - return get_custom_skill_dir(name, app_config=app_config) / SKILL_FILE_NAME - - -def get_custom_skill_history_dir(*, app_config: AppConfig | None = None) -> Path: - path = get_custom_skills_dir(app_config=app_config) / HISTORY_DIR_NAME - path.mkdir(parents=True, exist_ok=True) - return path - - -def get_skill_history_file(name: str, *, app_config: AppConfig | None = None) -> Path: - return get_custom_skill_history_dir(app_config=app_config) / f"{validate_skill_name(name)}.jsonl" - - -def get_public_skill_dir(name: str, *, app_config: AppConfig | None = None) -> Path: - return get_public_skills_dir(app_config=app_config) / validate_skill_name(name) - - -def custom_skill_exists(name: str, *, app_config: AppConfig | None = None) -> bool: - return get_custom_skill_file(name, app_config=app_config).exists() - - -def public_skill_exists(name: str, *, app_config: AppConfig | None = None) -> bool: - return (get_public_skill_dir(name, app_config=app_config) / SKILL_FILE_NAME).exists() - - -def ensure_custom_skill_is_editable(name: str, *, app_config: AppConfig | None = None) -> None: - if custom_skill_exists(name, app_config=app_config): - return - if public_skill_exists(name, app_config=app_config): - raise ValueError(f"'{name}' is a built-in skill. To customise it, create a new skill with the same name under skills/custom/.") - raise FileNotFoundError(f"Custom skill '{name}' not found.") - - -def ensure_safe_support_path(name: str, relative_path: str, *, app_config: AppConfig | None = None) -> Path: - skill_dir = get_custom_skill_dir(name, app_config=app_config).resolve() - if not relative_path or relative_path.endswith("/"): - raise ValueError("Supporting file path must include a filename.") - relative = Path(relative_path) - if relative.is_absolute(): - raise ValueError("Supporting file path must be relative.") - if any(part in {"..", ""} for part in relative.parts): - raise ValueError("Supporting file path must not contain parent-directory traversal.") - - top_level = relative.parts[0] if relative.parts else "" - if top_level not in ALLOWED_SUPPORT_SUBDIRS: - raise ValueError(f"Supporting files must live under one of: {', '.join(sorted(ALLOWED_SUPPORT_SUBDIRS))}.") - - target = (skill_dir / relative).resolve() - allowed_root = (skill_dir / top_level).resolve() - try: - target.relative_to(allowed_root) - except ValueError as exc: - raise ValueError("Supporting file path must stay within the selected support directory.") from exc - return target - - -def validate_skill_markdown_content(name: str, content: str) -> None: - with tempfile.TemporaryDirectory() as tmp_dir: - temp_skill_dir = Path(tmp_dir) / validate_skill_name(name) - temp_skill_dir.mkdir(parents=True, exist_ok=True) - (temp_skill_dir / SKILL_FILE_NAME).write_text(content, encoding="utf-8") - is_valid, message, parsed_name = _validate_skill_frontmatter(temp_skill_dir) - if not is_valid: - raise ValueError(message) - if parsed_name != name: - raise ValueError(f"Frontmatter name '{parsed_name}' must match requested skill name '{name}'.") - - -def atomic_write(path: Path, content: str) -> None: - path.parent.mkdir(parents=True, exist_ok=True) - with tempfile.NamedTemporaryFile("w", encoding="utf-8", delete=False, dir=str(path.parent)) as tmp_file: - tmp_file.write(content) - tmp_path = Path(tmp_file.name) - tmp_path.replace(path) - - -def append_history(name: str, record: dict[str, Any], *, app_config: AppConfig | None = None) -> None: - history_path = get_skill_history_file(name, app_config=app_config) - history_path.parent.mkdir(parents=True, exist_ok=True) - payload = { - "ts": datetime.now(UTC).isoformat(), - **record, - } - with history_path.open("a", encoding="utf-8") as f: - f.write(json.dumps(payload, ensure_ascii=False)) - f.write("\n") - - -def read_history(name: str, *, app_config: AppConfig | None = None) -> list[dict[str, Any]]: - history_path = get_skill_history_file(name, app_config=app_config) - if not history_path.exists(): - return [] - records: list[dict[str, Any]] = [] - for line in history_path.read_text(encoding="utf-8").splitlines(): - if not line.strip(): - continue - records.append(json.loads(line)) - return records - - -def list_custom_skills(*, app_config: AppConfig | None = None) -> list: - return [skill for skill in load_skills(enabled_only=False, app_config=app_config) if skill.category == "custom"] - - -def read_custom_skill_content(name: str, *, app_config: AppConfig | None = None) -> str: - skill_file = get_custom_skill_file(name, app_config=app_config) - if not skill_file.exists(): - raise FileNotFoundError(f"Custom skill '{name}' not found.") - return skill_file.read_text(encoding="utf-8") diff --git a/backend/packages/harness/deerflow/skills/parser.py b/backend/packages/harness/deerflow/skills/parser.py index 63bcfef7c..b5f56488a 100644 --- a/backend/packages/harness/deerflow/skills/parser.py +++ b/backend/packages/harness/deerflow/skills/parser.py @@ -4,24 +4,24 @@ from pathlib import Path import yaml -from .types import Skill +from .types import SKILL_MD_FILE, Skill, SkillCategory logger = logging.getLogger(__name__) -def parse_skill_file(skill_file: Path, category: str, relative_path: Path | None = None) -> Skill | None: +def parse_skill_file(skill_file: Path, category: SkillCategory, relative_path: Path | None = None) -> Skill | None: """Parse a SKILL.md file and extract metadata. Args: skill_file: Path to the SKILL.md file. - category: Category of the skill ('public' or 'custom'). + category: Category of the skill. relative_path: Relative path from the category root to the skill directory. Defaults to the skill directory name when omitted. Returns: Skill object if parsing succeeds, None otherwise. """ - if not skill_file.exists() or skill_file.name != "SKILL.md": + if not skill_file.exists() or skill_file.name != SKILL_MD_FILE: return None try: diff --git a/backend/packages/harness/deerflow/skills/security_scanner.py b/backend/packages/harness/deerflow/skills/security_scanner.py index d3798be54..3bddb018f 100644 --- a/backend/packages/harness/deerflow/skills/security_scanner.py +++ b/backend/packages/harness/deerflow/skills/security_scanner.py @@ -10,6 +10,7 @@ from dataclasses import dataclass from deerflow.config import get_app_config from deerflow.config.app_config import AppConfig from deerflow.models import create_chat_model +from deerflow.skills.types import SKILL_MD_FILE logger = logging.getLogger(__name__) @@ -36,7 +37,7 @@ def _extract_json_object(raw: str) -> dict | None: return None -async def scan_skill_content(content: str, *, executable: bool = False, location: str = "SKILL.md", app_config: AppConfig | None = None) -> ScanResult: +async def scan_skill_content(content: str, *, executable: bool = False, location: str = SKILL_MD_FILE, app_config: AppConfig | None = None) -> ScanResult: """Screen skill content before it is written to disk.""" rubric = ( "You are a security reviewer for AI agent skills. " diff --git a/backend/packages/harness/deerflow/skills/storage/__init__.py b/backend/packages/harness/deerflow/skills/storage/__init__.py new file mode 100644 index 000000000..044d7adae --- /dev/null +++ b/backend/packages/harness/deerflow/skills/storage/__init__.py @@ -0,0 +1,83 @@ +"""SkillStorage singleton + reflection-based factory. + +Mirrors the pattern used by ``deerflow/sandbox/sandbox_provider.py``. +""" + +from __future__ import annotations + +from deerflow.skills.storage.local_skill_storage import LocalSkillStorage +from deerflow.skills.storage.skill_storage import SkillStorage + +_default_skill_storage: SkillStorage | None = None +_default_skill_storage_config: object | None = None # AppConfig identity the singleton was built from + + +def get_or_new_skill_storage(**kwargs) -> SkillStorage: + """Return a ``SkillStorage`` instance — either a new one or the process singleton. + + **New instance** is created (never cached) when: + - ``skills_path`` is provided — uses it as the ``host_path`` override (class still resolved via config). + - ``app_config`` is provided — constructs a storage from ``app_config.skills`` + so that per-request config (e.g. Gateway ``Depends(get_config)``) is respected + without polluting the process-level singleton. + + **Singleton** is returned (created on first call, then reused) when neither + ``skills_path`` nor ``app_config`` is given — uses ``get_app_config()`` to + resolve the active configuration. + """ + global _default_skill_storage, _default_skill_storage_config + + from deerflow.config import get_app_config + from deerflow.config.skills_config import SkillsConfig + + def _make_storage(skills_config: SkillsConfig, *, host_path: str | None = None, **kwargs) -> SkillStorage: + from deerflow.reflection import resolve_class + + cls = resolve_class(skills_config.use, SkillStorage) + return cls( + host_path=host_path if host_path is not None else str(skills_config.get_skills_path()), + container_path=skills_config.container_path, + **kwargs, + ) + + skills_path = kwargs.pop("skills_path", None) + app_config = kwargs.pop("app_config", None) + + if skills_path is not None: + if app_config is not None: + return _make_storage(app_config.skills, host_path=str(skills_path), **kwargs) + # No app_config: use a default SkillsConfig so we never need to read config.yaml + # when the caller has already supplied an explicit host path. + from deerflow.config.skills_config import SkillsConfig + + return _make_storage(SkillsConfig(), host_path=str(skills_path), **kwargs) + + if app_config is not None: + return _make_storage(app_config.skills, **kwargs) + + # If the singleton was manually injected (e.g. in tests) without a config + # identity (_default_skill_storage_config is None), skip get_app_config() + # entirely to avoid requiring a config.yaml on disk. + if _default_skill_storage is not None and _default_skill_storage_config is None: + return _default_skill_storage + + app_config_now = get_app_config() + if _default_skill_storage is None or _default_skill_storage_config is not app_config_now: + _default_skill_storage = _make_storage(app_config_now.skills, **kwargs) + _default_skill_storage_config = app_config_now + return _default_skill_storage + + +def reset_skill_storage() -> None: + """Clear the cached singleton (used in tests and hot-reload scenarios).""" + global _default_skill_storage, _default_skill_storage_config + _default_skill_storage = None + _default_skill_storage_config = None + + +__all__ = [ + "LocalSkillStorage", + "SkillStorage", + "get_or_new_skill_storage", + "reset_skill_storage", +] diff --git a/backend/packages/harness/deerflow/skills/storage/local_skill_storage.py b/backend/packages/harness/deerflow/skills/storage/local_skill_storage.py new file mode 100644 index 000000000..047cd6163 --- /dev/null +++ b/backend/packages/harness/deerflow/skills/storage/local_skill_storage.py @@ -0,0 +1,198 @@ +"""Local-filesystem implementation of ``SkillStorage``.""" + +from __future__ import annotations + +import errno +import json +import logging +import os +import shutil +import tempfile +from collections.abc import Iterable +from datetime import UTC, datetime +from pathlib import Path + +from deerflow.config.skills_config import _default_repo_root +from deerflow.skills.storage.skill_storage import SKILL_MD_FILE, SkillStorage +from deerflow.skills.types import SkillCategory + +logger = logging.getLogger(__name__) + +DEFAULT_SKILLS_CONTAINER_PATH = "/mnt/skills" + + +class LocalSkillStorage(SkillStorage): + """Skill storage backed by the local filesystem. + + Layout:: + + /public//SKILL.md + /custom//SKILL.md + /custom/.history/.jsonl + """ + + def __init__( + self, + host_path: str | None = None, + container_path: str = DEFAULT_SKILLS_CONTAINER_PATH, + app_config=None, + ) -> None: + super().__init__(container_path=container_path) + if host_path is None: + from deerflow.config import get_app_config + + config = app_config or get_app_config() + self._host_root: Path = config.skills.get_skills_path() + else: + path = Path(host_path) + if not path.is_absolute(): + path = _default_repo_root() / path + self._host_root = path.resolve() + + # ------------------------------------------------------------------ + # Abstract operation implementations + # ------------------------------------------------------------------ + + def get_skills_root_path(self) -> Path: + return self._host_root + + def custom_skill_exists(self, name: str) -> bool: + return self.get_custom_skill_file(name).exists() + + def public_skill_exists(self, name: str) -> bool: + normalized_name = self.validate_skill_name(name) + return (self._host_root / SkillCategory.PUBLIC.value / normalized_name / SKILL_MD_FILE).exists() + + def _iter_skill_files(self) -> Iterable[tuple[SkillCategory, Path, Path]]: + if not self._host_root.exists(): + return + for category in SkillCategory: + category_path = self._host_root / category.value + if not category_path.exists() or not category_path.is_dir(): + continue + for current_root, dir_names, file_names in os.walk(category_path, followlinks=True): + dir_names[:] = sorted(name for name in dir_names if not name.startswith(".")) + if SKILL_MD_FILE not in file_names: + continue + yield category, category_path, Path(current_root) / SKILL_MD_FILE + + def read_custom_skill(self, name: str) -> str: + if not self.custom_skill_exists(name): + raise FileNotFoundError(f"Custom skill '{name}' not found.") + return (self.get_custom_skill_dir(name) / SKILL_MD_FILE).read_text(encoding="utf-8") + + def write_custom_skill(self, name: str, relative_path: str, content: str) -> None: + target = self.validate_relative_path(relative_path, self.get_custom_skill_dir(name)) + target.parent.mkdir(parents=True, exist_ok=True) + with tempfile.NamedTemporaryFile( + "w", + encoding="utf-8", + delete=False, + dir=str(target.parent), + ) as tmp_file: + tmp_file.write(content) + tmp_path = Path(tmp_file.name) + tmp_path.replace(target) + + async def ainstall_skill_from_archive(self, archive_path: str | Path) -> dict: + import zipfile + + from deerflow.skills.installer import ( + SkillAlreadyExistsError, + _move_staged_skill_into_reserved_target, + _scan_skill_archive_contents_or_raise, + resolve_skill_dir_from_archive, + safe_extract_skill_archive, + ) + from deerflow.skills.validation import _validate_skill_frontmatter + + logger.info("Installing skill from %s", archive_path) + path = Path(archive_path) + if not path.is_file(): + if not path.exists(): + raise FileNotFoundError(f"Skill file not found: {archive_path}") + raise ValueError(f"Path is not a file: {archive_path}") + if path.suffix != ".skill": + raise ValueError("File must have .skill extension") + + custom_dir = self._host_root / "custom" + custom_dir.mkdir(parents=True, exist_ok=True) + + with tempfile.TemporaryDirectory() as tmp: + tmp_path = Path(tmp) + + try: + zf = zipfile.ZipFile(path, "r") + except FileNotFoundError: + raise FileNotFoundError(f"Skill file not found: {archive_path}") from None + except (zipfile.BadZipFile, IsADirectoryError): + raise ValueError("File is not a valid ZIP archive") from None + + with zf: + safe_extract_skill_archive(zf, tmp_path) + + skill_dir = resolve_skill_dir_from_archive(tmp_path) + + is_valid, message, skill_name = _validate_skill_frontmatter(skill_dir) + if not is_valid: + raise ValueError(f"Invalid skill: {message}") + if not skill_name or "/" in skill_name or "\\" in skill_name or ".." in skill_name: + raise ValueError(f"Invalid skill name: {skill_name}") + + target = custom_dir / skill_name + if target.exists(): + raise SkillAlreadyExistsError(f"Skill '{skill_name}' already exists") + + await _scan_skill_archive_contents_or_raise(skill_dir, skill_name) + + with tempfile.TemporaryDirectory(prefix=f".installing-{skill_name}-", dir=custom_dir) as staging_root: + staging_target = Path(staging_root) / skill_name + shutil.copytree(skill_dir, staging_target) + _move_staged_skill_into_reserved_target(staging_target, target) + logger.info("Skill %r installed to %s", skill_name, target) + + return { + "success": True, + "skill_name": skill_name, + "message": f"Skill '{skill_name}' installed successfully", + } + + def delete_custom_skill(self, name: str, *, history_meta: dict | None = None) -> None: + self.validate_skill_name(name) + self.ensure_custom_skill_is_editable(name) + target = self.get_custom_skill_dir(name) + if history_meta is not None: + prev_content = self.read_custom_skill(name) + try: + self.append_history(name, {**history_meta, "prev_content": prev_content}) + except OSError as e: + if not isinstance(e, PermissionError) and e.errno not in {errno.EACCES, errno.EPERM, errno.EROFS}: + raise + logger.warning( + "Skipping delete history write for custom skill %s due to readonly/permission failure; continuing with skill directory removal: %s", + name, + e, + ) + if target.exists(): + shutil.rmtree(target) + + def append_history(self, name: str, record: dict) -> None: + self.validate_skill_name(name) + payload = {"ts": datetime.now(UTC).isoformat(), **record} + history_path = self.get_skill_history_file(name) + history_path.parent.mkdir(parents=True, exist_ok=True) + with history_path.open("a", encoding="utf-8") as f: + f.write(json.dumps(payload, ensure_ascii=False)) + f.write("\n") + + def read_history(self, name: str) -> list[dict]: + self.validate_skill_name(name) + history_path = self.get_skill_history_file(name) + if not history_path.exists(): + return [] + records: list[dict] = [] + for line in history_path.read_text(encoding="utf-8").splitlines(): + if not line.strip(): + continue + records.append(json.loads(line)) + return records diff --git a/backend/packages/harness/deerflow/skills/storage/skill_storage.py b/backend/packages/harness/deerflow/skills/storage/skill_storage.py new file mode 100644 index 000000000..15af90102 --- /dev/null +++ b/backend/packages/harness/deerflow/skills/storage/skill_storage.py @@ -0,0 +1,254 @@ +"""Abstract SkillStorage base class with template-method flows.""" + +from __future__ import annotations + +import logging +import re +from abc import ABC, abstractmethod +from collections.abc import Iterable +from pathlib import Path + +from deerflow.skills.types import SKILL_MD_FILE, Skill, SkillCategory # noqa: F401 + +logger = logging.getLogger(__name__) + +_SKILL_NAME_PATTERN = re.compile(r"^[a-z0-9]+(?:-[a-z0-9]+)*$") + + +class SkillStorage(ABC): + """Abstract base for skill storage backends. + + Subclasses implement a small set of storage-medium-specific atomic + operations; this base class provides final template-method flows + (load_skills, history serialisation, path helpers, validation) that + compose them with protocol-level helpers. + """ + + def __init__(self, container_path: str = "/mnt/skills") -> None: + self._container_root = container_path + + # ------------------------------------------------------------------ + # Static protocol helpers (not storage-specific) + # ------------------------------------------------------------------ + + @staticmethod + def validate_skill_name(name: str) -> str: + """Validate and normalise a skill name; return the normalised form.""" + normalized = name.strip() + if not _SKILL_NAME_PATTERN.fullmatch(normalized): + raise ValueError("Skill name must be hyphen-case using lowercase letters, digits, and hyphens only.") + if len(normalized) > 64: + raise ValueError("Skill name must be 64 characters or fewer.") + return normalized + + @staticmethod + def validate_relative_path(relative_path: str, base_dir: Path) -> Path: + """Validate *relative_path* against *base_dir* and return the resolved target. + + Checks that *relative_path* is non-empty, then joins it with *base_dir* + and resolves the result (following symlinks). Raises ``ValueError`` if + the resolved target does not lie within *base_dir*. + """ + if not relative_path: + raise ValueError("relative_path must not be empty.") + resolved_base = base_dir.resolve() + target = (resolved_base / relative_path).resolve() + try: + target.relative_to(resolved_base) + except ValueError as exc: + raise ValueError("relative_path must resolve within the skill directory.") from exc + return target + + @staticmethod + def validate_skill_markdown_content(name: str, content: str) -> None: + """Validate SKILL.md content: parse frontmatter and check name matches.""" + import tempfile + + from deerflow.skills.validation import _validate_skill_frontmatter + + with tempfile.TemporaryDirectory() as tmp_dir: + temp_skill_dir = Path(tmp_dir) / SkillStorage.validate_skill_name(name) + temp_skill_dir.mkdir(parents=True, exist_ok=True) + (temp_skill_dir / SKILL_MD_FILE).write_text(content, encoding="utf-8") + is_valid, message, parsed_name = _validate_skill_frontmatter(temp_skill_dir) + if not is_valid: + raise ValueError(message) + if parsed_name != name: + raise ValueError(f"Frontmatter name '{parsed_name}' must match requested skill name '{name}'.") + + def ensure_safe_support_path(self, name: str, relative_path: str) -> Path: + """Validate and return the resolved absolute path for a support file.""" + _ALLOWED_SUPPORT_SUBDIRS = {"references", "templates", "scripts", "assets"} + skill_dir = self.get_custom_skill_dir(self.validate_skill_name(name)).resolve() + if not relative_path or relative_path.endswith("/"): + raise ValueError("Supporting file path must include a filename.") + relative = Path(relative_path) + if relative.is_absolute(): + raise ValueError("Supporting file path must be relative.") + if any(part in {"..", ""} for part in relative.parts): + raise ValueError("Supporting file path must not contain parent-directory traversal.") + top_level = relative.parts[0] if relative.parts else "" + if top_level not in _ALLOWED_SUPPORT_SUBDIRS: + raise ValueError(f"Supporting files must live under one of: {', '.join(sorted(_ALLOWED_SUPPORT_SUBDIRS))}.") + target = (skill_dir / relative).resolve() + allowed_root = (skill_dir / top_level).resolve() + try: + target.relative_to(allowed_root) + except ValueError as exc: + raise ValueError("Supporting file path must stay within the selected support directory.") from exc + return target + + # ------------------------------------------------------------------ + # Abstract atomic operations (storage-medium specific) + # ------------------------------------------------------------------ + + @abstractmethod + def get_skills_root_path(self) -> Path: + """Absolute host path to the skills root, used for sandbox mounts. + + Origin: ``deerflow.skills.loader.get_skills_root_path``. + """ + + @abstractmethod + def _iter_skill_files(self) -> Iterable[tuple[SkillCategory, Path, Path]]: + """Yield ``(category, category_root, skill_md_path)`` for every SKILL.md. + + Origin: extracted from directory-walk logic inside + ``deerflow.skills.loader.load_skills``. + """ + + @abstractmethod + def read_custom_skill(self, name: str) -> str: + """Read SKILL.md content for a custom skill. + + Origin: ``deerflow.skills.manager.read_custom_skill_content``. + """ + + @abstractmethod + def write_custom_skill(self, name: str, relative_path: str, content: str) -> None: + """Atomically write a text file under ``custom//``. + + Origin: ``deerflow.skills.manager.atomic_write``. + """ + + @abstractmethod + async def ainstall_skill_from_archive(self, archive_path: str | Path) -> dict: + """Async install of a skill from a ``.skill`` ZIP archive. + + Origin: ``deerflow.skills.installer.ainstall_skill_from_archive``. + """ + + def install_skill_from_archive(self, archive_path: str | Path) -> dict: + """Sync wrapper — delegates to :meth:`ainstall_skill_from_archive`.""" + from deerflow.skills.installer import _run_async_install + + return _run_async_install(self.ainstall_skill_from_archive(archive_path)) + + @abstractmethod + def delete_custom_skill(self, name: str, *, history_meta: dict | None = None) -> None: + """Delete a custom skill (validation + optional history + directory removal). + + Origin: ``app.gateway.routers.skills.delete_custom_skill`` + ``skill_manage_tool``. + """ + + @abstractmethod + def custom_skill_exists(self, name: str) -> bool: + """Origin: ``deerflow.skills.manager.custom_skill_exists``.""" + + @abstractmethod + def public_skill_exists(self, name: str) -> bool: + """Origin: ``deerflow.skills.manager.public_skill_exists``.""" + + @abstractmethod + def append_history(self, name: str, record: dict) -> None: + """Append a JSONL history entry for ``name``. + + Origin: ``deerflow.skills.manager.append_history``. + """ + + @abstractmethod + def read_history(self, name: str) -> list[dict]: + """Return all history records for ``name``, oldest first. + + Origin: ``deerflow.skills.manager.read_history``. + """ + + # ------------------------------------------------------------------ + # Concrete path helpers (layout is part of the SKILL.md protocol) + # ------------------------------------------------------------------ + + def get_container_root(self) -> str: + """Origin: ``deerflow.config.skills_config.SkillsConfig.container_path`` accessor.""" + return self._container_root + + def get_custom_skill_dir(self, name: str) -> Path: + """Path to ``custom/``. Does not create the directory. + + Origin: ``deerflow.skills.manager.get_custom_skill_dir``. + """ + normalized_name = self.validate_skill_name(name) + return self.get_skills_root_path() / SkillCategory.CUSTOM.value / normalized_name + + def get_custom_skill_file(self, name: str) -> Path: + """Path to ``custom//SKILL.md``. + + Origin: ``deerflow.skills.manager.get_custom_skill_file``. + """ + normalized_name = self.validate_skill_name(name) + return self.get_custom_skill_dir(normalized_name) / SKILL_MD_FILE + + def get_skill_history_file(self, name: str) -> Path: + """Path to ``custom/.history/.jsonl``. Does not create parents. + + Origin: ``deerflow.skills.manager.get_skill_history_file``. + """ + normalized_name = self.validate_skill_name(name) + return self.get_skills_root_path() / SkillCategory.CUSTOM.value / ".history" / f"{normalized_name}.jsonl" + + # ------------------------------------------------------------------ + # Final template-method flows + # ------------------------------------------------------------------ + + def load_skills(self, *, enabled_only: bool = False) -> list[Skill]: + """Discover all skills, merge enabled state, sort and optionally filter. + + Origin: ``deerflow.skills.loader.load_skills``. + """ + from deerflow.skills.parser import parse_skill_file + + skills_by_name: dict[str, Skill] = {} + for category, category_root, md_path in self._iter_skill_files(): + skill = parse_skill_file( + md_path, + category=category, + relative_path=md_path.parent.relative_to(category_root), + ) + if skill: + skills_by_name[skill.name] = skill + + skills = list(skills_by_name.values()) + + # Merge enabled state from extensions config (re-read every call so + # changes made by another process are picked up immediately). + try: + from deerflow.config.extensions_config import ExtensionsConfig + + extensions_config = ExtensionsConfig.from_file() + for skill in skills: + skill.enabled = extensions_config.is_skill_enabled(skill.name, skill.category) + except Exception as e: + logger.warning("Failed to load extensions config: %s", e) + + if enabled_only: + skills = [s for s in skills if s.enabled] + + skills.sort(key=lambda s: s.name) + return skills + + def ensure_custom_skill_is_editable(self, name: str) -> None: + """Origin: ``deerflow.skills.manager.ensure_custom_skill_is_editable``.""" + if self.custom_skill_exists(name): + return + if self.public_skill_exists(name): + raise ValueError(f"'{name}' is a built-in skill. To customise it, create a new skill with the same name under skills/custom/.") + raise FileNotFoundError(f"Custom skill '{name}' not found.") diff --git a/backend/packages/harness/deerflow/skills/types.py b/backend/packages/harness/deerflow/skills/types.py index 0cdb668f3..fcf37ca2f 100644 --- a/backend/packages/harness/deerflow/skills/types.py +++ b/backend/packages/harness/deerflow/skills/types.py @@ -1,6 +1,20 @@ from dataclasses import dataclass +from enum import StrEnum from pathlib import Path +SKILL_MD_FILE = "SKILL.md" + + +class SkillCategory(StrEnum): + """Source category for a skill. + + - ``PUBLIC``: built-in skill bundled with the platform, read-only. + - ``CUSTOM``: user-authored skill that can be edited or deleted. + """ + + PUBLIC = "public" + CUSTOM = "custom" + @dataclass class Skill: @@ -12,7 +26,7 @@ class Skill: skill_dir: Path skill_file: Path relative_path: Path # Relative path from category root to skill directory - category: str # 'public' or 'custom' + category: SkillCategory # 'public' or 'custom' enabled: bool = False # Whether this skill is enabled @property diff --git a/backend/packages/harness/deerflow/skills/validation.py b/backend/packages/harness/deerflow/skills/validation.py index 4c0f80857..f8af5d3d0 100644 --- a/backend/packages/harness/deerflow/skills/validation.py +++ b/backend/packages/harness/deerflow/skills/validation.py @@ -8,6 +8,8 @@ from pathlib import Path import yaml +from deerflow.skills.types import SKILL_MD_FILE + # Allowed properties in SKILL.md frontmatter ALLOWED_FRONTMATTER_PROPERTIES = {"name", "description", "license", "allowed-tools", "metadata", "compatibility", "version", "author"} @@ -21,9 +23,9 @@ def _validate_skill_frontmatter(skill_dir: Path) -> tuple[bool, str, str | None] Returns: Tuple of (is_valid, message, skill_name). """ - skill_md = skill_dir / "SKILL.md" + skill_md = skill_dir / SKILL_MD_FILE if not skill_md.exists(): - return False, "SKILL.md not found", None + return False, f"{SKILL_MD_FILE} not found", None content = skill_md.read_text(encoding="utf-8") if not content.startswith("---"): diff --git a/backend/packages/harness/deerflow/subagents/executor.py b/backend/packages/harness/deerflow/subagents/executor.py index c8da0789b..f36cfbed5 100644 --- a/backend/packages/harness/deerflow/subagents/executor.py +++ b/backend/packages/harness/deerflow/subagents/executor.py @@ -300,10 +300,10 @@ class SubagentExecutor: return [] try: - from deerflow.skills.loader import load_skills + from deerflow.skills.storage import get_or_new_skill_storage # Use asyncio.to_thread to avoid blocking the event loop (LangGraph ASGI requirement) - all_skills = await asyncio.to_thread(load_skills, enabled_only=True) + all_skills = await asyncio.to_thread(get_or_new_skill_storage().load_skills, enabled_only=True) logger.info(f"[trace={self.trace_id}] Subagent {self.config.name} loaded {len(all_skills)} enabled skills from disk") except Exception: logger.warning(f"[trace={self.trace_id}] Failed to load skills for subagent {self.config.name}", exc_info=True) diff --git a/backend/packages/harness/deerflow/tools/skill_manage_tool.py b/backend/packages/harness/deerflow/tools/skill_manage_tool.py index 3b7a109cc..c0114eb08 100644 --- a/backend/packages/harness/deerflow/tools/skill_manage_tool.py +++ b/backend/packages/harness/deerflow/tools/skill_manage_tool.py @@ -4,7 +4,6 @@ from __future__ import annotations import asyncio import logging -import shutil from typing import Any from weakref import WeakValueDictionary @@ -14,20 +13,10 @@ from langgraph.typing import ContextT from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache_async from deerflow.agents.thread_state import ThreadState from deerflow.mcp.tools import _make_sync_tool_wrapper -from deerflow.skills.manager import ( - append_history, - atomic_write, - custom_skill_exists, - ensure_custom_skill_is_editable, - ensure_safe_support_path, - get_custom_skill_dir, - get_custom_skill_file, - public_skill_exists, - read_custom_skill_content, - validate_skill_markdown_content, - validate_skill_name, -) from deerflow.skills.security_scanner import scan_skill_content +from deerflow.skills.storage import get_or_new_skill_storage +from deerflow.skills.storage.skill_storage import SkillStorage +from deerflow.skills.types import SKILL_MD_FILE logger = logging.getLogger(__name__) @@ -96,50 +85,50 @@ async def _skill_manage_impl( replace: Replacement text for patch. expected_count: Optional expected number of replacements for patch. """ - name = validate_skill_name(name) + name = SkillStorage.validate_skill_name(name) lock = _get_lock(name) thread_id = _get_thread_id(runtime) + skill_storage = get_or_new_skill_storage() async with lock: if action == "create": - if await _to_thread(custom_skill_exists, name): + if await _to_thread(skill_storage.custom_skill_exists, name): raise ValueError(f"Custom skill '{name}' already exists.") if content is None: raise ValueError("content is required for create.") - await _to_thread(validate_skill_markdown_content, name, content) - scan = await _scan_or_raise(content, executable=False, location=f"{name}/SKILL.md") - skill_file = await _to_thread(get_custom_skill_file, name) - await _to_thread(atomic_write, skill_file, content) + await _to_thread(skill_storage.validate_skill_markdown_content, name, content) + scan = await _scan_or_raise(content, executable=False, location=f"{name}/{SKILL_MD_FILE}") + await _to_thread(skill_storage.write_custom_skill, name, SKILL_MD_FILE, content) await _to_thread( - append_history, + skill_storage.append_history, name, - _history_record(action="create", file_path="SKILL.md", prev_content=None, new_content=content, thread_id=thread_id, scanner=scan), + _history_record(action="create", file_path=SKILL_MD_FILE, prev_content=None, new_content=content, thread_id=thread_id, scanner=scan), ) await refresh_skills_system_prompt_cache_async() return f"Created custom skill '{name}'." if action == "edit": - await _to_thread(ensure_custom_skill_is_editable, name) + await _to_thread(skill_storage.ensure_custom_skill_is_editable, name) if content is None: raise ValueError("content is required for edit.") - await _to_thread(validate_skill_markdown_content, name, content) - scan = await _scan_or_raise(content, executable=False, location=f"{name}/SKILL.md") - skill_file = await _to_thread(get_custom_skill_file, name) + await _to_thread(skill_storage.validate_skill_markdown_content, name, content) + scan = await _scan_or_raise(content, executable=False, location=f"{name}/{SKILL_MD_FILE}") + skill_file = skill_storage.get_custom_skill_file(name) prev_content = await _to_thread(skill_file.read_text, encoding="utf-8") - await _to_thread(atomic_write, skill_file, content) + await _to_thread(skill_storage.write_custom_skill, name, SKILL_MD_FILE, content) await _to_thread( - append_history, + skill_storage.append_history, name, - _history_record(action="edit", file_path="SKILL.md", prev_content=prev_content, new_content=content, thread_id=thread_id, scanner=scan), + _history_record(action="edit", file_path=SKILL_MD_FILE, prev_content=prev_content, new_content=content, thread_id=thread_id, scanner=scan), ) await refresh_skills_system_prompt_cache_async() return f"Updated custom skill '{name}'." if action == "patch": - await _to_thread(ensure_custom_skill_is_editable, name) + await _to_thread(skill_storage.ensure_custom_skill_is_editable, name) if find is None or replace is None: raise ValueError("find and replace are required for patch.") - skill_file = await _to_thread(get_custom_skill_file, name) + skill_file = skill_storage.get_custom_skill_file(name) prev_content = await _to_thread(skill_file.read_text, encoding="utf-8") occurrences = prev_content.count(find) if occurrences == 0: @@ -148,64 +137,67 @@ async def _skill_manage_impl( raise ValueError(f"Expected {expected_count} replacements but found {occurrences}.") replacement_count = expected_count if expected_count is not None else 1 new_content = prev_content.replace(find, replace, replacement_count) - await _to_thread(validate_skill_markdown_content, name, new_content) - scan = await _scan_or_raise(new_content, executable=False, location=f"{name}/SKILL.md") - await _to_thread(atomic_write, skill_file, new_content) + await _to_thread(skill_storage.validate_skill_markdown_content, name, new_content) + scan = await _scan_or_raise(new_content, executable=False, location=f"{name}/{SKILL_MD_FILE}") + await _to_thread(skill_storage.write_custom_skill, name, SKILL_MD_FILE, new_content) await _to_thread( - append_history, + skill_storage.append_history, name, - _history_record(action="patch", file_path="SKILL.md", prev_content=prev_content, new_content=new_content, thread_id=thread_id, scanner=scan), + _history_record(action="patch", file_path=SKILL_MD_FILE, prev_content=prev_content, new_content=new_content, thread_id=thread_id, scanner=scan), ) await refresh_skills_system_prompt_cache_async() return f"Patched custom skill '{name}' ({replacement_count} replacement(s) applied, {occurrences} match(es) found)." if action == "delete": - await _to_thread(ensure_custom_skill_is_editable, name) - skill_dir = await _to_thread(get_custom_skill_dir, name) - prev_content = await _to_thread(read_custom_skill_content, name) await _to_thread( - append_history, + skill_storage.delete_custom_skill, name, - _history_record(action="delete", file_path="SKILL.md", prev_content=prev_content, new_content=None, thread_id=thread_id, scanner={"decision": "allow", "reason": "Deletion requested."}), + history_meta=_history_record( + action="delete", + file_path=SKILL_MD_FILE, + prev_content=None, + new_content=None, + thread_id=thread_id, + scanner={"decision": "allow", "reason": "Deletion requested."}, + ), ) - await _to_thread(shutil.rmtree, skill_dir) await refresh_skills_system_prompt_cache_async() return f"Deleted custom skill '{name}'." if action == "write_file": - await _to_thread(ensure_custom_skill_is_editable, name) + await _to_thread(skill_storage.ensure_custom_skill_is_editable, name) if path is None or content is None: raise ValueError("path and content are required for write_file.") - target = await _to_thread(ensure_safe_support_path, name, path) + target = await _to_thread(skill_storage.ensure_safe_support_path, name, path) exists = await _to_thread(target.exists) prev_content = await _to_thread(target.read_text, encoding="utf-8") if exists else None executable = "scripts/" in path or path.startswith("scripts/") scan = await _scan_or_raise(content, executable=executable, location=f"{name}/{path}") - await _to_thread(atomic_write, target, content) + await _to_thread(skill_storage.write_custom_skill, name, path, content) await _to_thread( - append_history, + skill_storage.append_history, name, _history_record(action="write_file", file_path=path, prev_content=prev_content, new_content=content, thread_id=thread_id, scanner=scan), ) return f"Wrote '{path}' for custom skill '{name}'." if action == "remove_file": - await _to_thread(ensure_custom_skill_is_editable, name) + await _to_thread(skill_storage.ensure_custom_skill_is_editable, name) if path is None: raise ValueError("path is required for remove_file.") - target = await _to_thread(ensure_safe_support_path, name, path) + target = await _to_thread(skill_storage.ensure_safe_support_path, name, path) if not await _to_thread(target.exists): raise FileNotFoundError(f"Supporting file '{path}' not found for skill '{name}'.") prev_content = await _to_thread(target.read_text, encoding="utf-8") await _to_thread(target.unlink) await _to_thread( - append_history, + skill_storage.append_history, name, _history_record(action="remove_file", file_path=path, prev_content=prev_content, new_content=None, thread_id=thread_id, scanner={"decision": "allow", "reason": "Deletion requested."}), ) return f"Removed '{path}' from custom skill '{name}'." - if await _to_thread(public_skill_exists, name): + if await _to_thread(skill_storage.public_skill_exists, name): raise ValueError(f"'{name}' is a built-in skill. To customise it, create a new skill with the same name under skills/custom/.") raise ValueError(f"Unsupported action '{action}'.") diff --git a/backend/tests/conftest.py b/backend/tests/conftest.py index d48630f37..a357a3962 100644 --- a/backend/tests/conftest.py +++ b/backend/tests/conftest.py @@ -68,6 +68,21 @@ def provisioner_module(): # context should mark themselves ``@pytest.mark.no_auto_user``. +@pytest.fixture(autouse=True) +def _reset_skill_storage_singleton(): + """Reset the SkillStorage singleton between tests to prevent cross-test contamination.""" + try: + from deerflow.skills.storage import reset_skill_storage + except ImportError: + yield + return + reset_skill_storage() + try: + yield + finally: + reset_skill_storage() + + @pytest.fixture(autouse=True) def _auto_user_context(request): """Inject a default ``test-user-autouse`` into the contextvar. diff --git a/backend/tests/test_client.py b/backend/tests/test_client.py index 5bc9f2a44..8397af163 100644 --- a/backend/tests/test_client.py +++ b/backend/tests/test_client.py @@ -43,8 +43,12 @@ def mock_app_config(): @pytest.fixture -def client(mock_app_config): +def client(mock_app_config, tmp_path): """Create a DeerFlowClient with mocked config loading.""" + import deerflow.skills.storage as _storage_mod + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + _storage_mod._default_skill_storage = LocalSkillStorage(host_path=str(tmp_path)) with patch("deerflow.client.get_app_config", return_value=mock_app_config): return DeerFlowClient() @@ -135,7 +139,7 @@ class TestConfigQueries: skill.category = "public" skill.enabled = True - with patch("deerflow.skills.loader.load_skills", return_value=[skill]) as mock_load: + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[skill]) as mock_load: result = client.list_skills() mock_load.assert_called_once_with(enabled_only=False) @@ -150,7 +154,7 @@ class TestConfigQueries: } def test_list_skills_enabled_only(self, client): - with patch("deerflow.skills.loader.load_skills", return_value=[]) as mock_load: + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[]) as mock_load: client.list_skills(enabled_only=True) mock_load.assert_called_once_with(enabled_only=True) @@ -1163,13 +1167,13 @@ class TestSkillsManagement: def test_get_skill_found(self, client): skill = self._make_skill() - with patch("deerflow.skills.loader.load_skills", return_value=[skill]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[skill]): result = client.get_skill("test-skill") assert result is not None assert result["name"] == "test-skill" def test_get_skill_not_found(self, client): - with patch("deerflow.skills.loader.load_skills", return_value=[]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[]): result = client.get_skill("nonexistent") assert result is None @@ -1190,7 +1194,7 @@ class TestSkillsManagement: client._agent = MagicMock() with ( - patch("deerflow.skills.loader.load_skills", side_effect=[[skill], [updated_skill]]), + patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", side_effect=[[skill], [updated_skill]]), patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=tmp_path), patch("deerflow.client.get_extensions_config", return_value=ext_config), patch("deerflow.client.reload_extensions_config"), @@ -1202,7 +1206,7 @@ class TestSkillsManagement: tmp_path.unlink() def test_update_skill_not_found(self, client): - with patch("deerflow.skills.loader.load_skills", return_value=[]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[]): with pytest.raises(ValueError, match="not found"): client.update_skill("nonexistent", enabled=True) @@ -1222,7 +1226,9 @@ class TestSkillsManagement: skills_root = tmp_path / "skills" (skills_root / "custom").mkdir(parents=True) - with patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root): + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + with patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))): result = client.install_skill(archive_path) assert result["success"] is True @@ -1785,12 +1791,12 @@ class TestScenarioConfigManagement: skill.category = "public" skill.enabled = True - with patch("deerflow.skills.loader.load_skills", return_value=[skill]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[skill]): skills_result = client.list_skills() assert len(skills_result["skills"]) == 1 # Get specific skill - with patch("deerflow.skills.loader.load_skills", return_value=[skill]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[skill]): detail = client.get_skill("web-search") assert detail is not None assert detail["enabled"] is True @@ -1841,7 +1847,7 @@ class TestScenarioConfigManagement: client._agent = MagicMock() # Simulate re-created agent with ( - patch("deerflow.skills.loader.load_skills", side_effect=[[skill], [toggled]]), + patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", side_effect=[[skill], [toggled]]), patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file), patch("deerflow.client.get_extensions_config", return_value=ext_config), patch("deerflow.client.reload_extensions_config"), @@ -2061,7 +2067,9 @@ class TestScenarioSkillInstallAndUse: (skills_root / "custom").mkdir(parents=True) # Step 1: Install - with patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root): + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + with patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))): result = client.install_skill(archive) assert result["success"] is True assert (skills_root / "custom" / "my-analyzer" / "SKILL.md").exists() @@ -2074,7 +2082,7 @@ class TestScenarioSkillInstallAndUse: installed_skill.category = "custom" installed_skill.enabled = True - with patch("deerflow.skills.loader.load_skills", return_value=[installed_skill]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[installed_skill]): skills_result = client.list_skills() assert any(s["name"] == "my-analyzer" for s in skills_result["skills"]) @@ -2094,7 +2102,7 @@ class TestScenarioSkillInstallAndUse: config_file.write_text("{}") with ( - patch("deerflow.skills.loader.load_skills", side_effect=[[installed_skill], [disabled_skill]]), + patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", side_effect=[[installed_skill], [disabled_skill]]), patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file), patch("deerflow.client.get_extensions_config", return_value=ext_config), patch("deerflow.client.reload_extensions_config"), @@ -2268,7 +2276,7 @@ class TestGatewayConformance: skill.category = "public" skill.enabled = True - with patch("deerflow.skills.loader.load_skills", return_value=[skill]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[skill]): result = client.list_skills() parsed = SkillsListResponse(**result) @@ -2283,7 +2291,7 @@ class TestGatewayConformance: skill.category = "public" skill.enabled = True - with patch("deerflow.skills.loader.load_skills", return_value=[skill]): + with patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[skill]): result = client.get_skill("web-search") assert result is not None @@ -2299,7 +2307,9 @@ class TestGatewayConformance: with zipfile.ZipFile(archive, "w") as zf: zf.write(skill_dir / "SKILL.md", "my-skill/SKILL.md") - with patch("deerflow.skills.installer.get_skills_root_path", return_value=tmp_path): + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + with patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(tmp_path))): result = client.install_skill(archive) parsed = SkillInstallResponse(**result) @@ -2453,8 +2463,10 @@ class TestInstallSkillSecurity: def patched_extract(zf, dest, max_total_size=100): return orig(zf, dest, max_total_size=100) + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + with ( - patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root), + patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))), patch("deerflow.skills.installer.safe_extract_skill_archive", side_effect=patched_extract), ): with pytest.raises(ValueError, match="too large"): @@ -2470,7 +2482,9 @@ class TestInstallSkillSecurity: skills_root = Path(tmp) / "skills" (skills_root / "custom").mkdir(parents=True) - with patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root): + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + with patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))): with pytest.raises(ValueError, match="unsafe"): client.install_skill(archive) @@ -2484,7 +2498,9 @@ class TestInstallSkillSecurity: skills_root = Path(tmp) / "skills" (skills_root / "custom").mkdir(parents=True) - with patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root): + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + with patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))): with pytest.raises(ValueError, match="unsafe"): client.install_skill(archive) @@ -2506,7 +2522,9 @@ class TestInstallSkillSecurity: skills_root = tmp_path / "skills" (skills_root / "custom").mkdir(parents=True) - with patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root): + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + with patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))): result = client.install_skill(archive) assert result["success"] is True @@ -2530,9 +2548,11 @@ class TestInstallSkillSecurity: skills_root = tmp_path / "skills" (skills_root / "custom").mkdir(parents=True) + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + with ( - patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root), - patch("deerflow.skills.installer._validate_skill_frontmatter", return_value=(True, "OK", "../evil")), + patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))), + patch("deerflow.skills.validation._validate_skill_frontmatter", return_value=(True, "OK", "../evil")), ): with pytest.raises(ValueError, match="Invalid skill name"): client.install_skill(archive) @@ -2553,9 +2573,11 @@ class TestInstallSkillSecurity: skills_root = tmp_path / "skills" (skills_root / "custom" / "dupe-skill").mkdir(parents=True) + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + with ( - patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root), - patch("deerflow.skills.installer._validate_skill_frontmatter", return_value=(True, "OK", "dupe-skill")), + patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))), + patch("deerflow.skills.validation._validate_skill_frontmatter", return_value=(True, "OK", "dupe-skill")), ): with pytest.raises(ValueError, match="already exists"): client.install_skill(archive) @@ -2570,7 +2592,9 @@ class TestInstallSkillSecurity: skills_root = Path(tmp) / "skills" (skills_root / "custom").mkdir(parents=True) - with patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root): + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + with patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))): with pytest.raises(ValueError, match="empty"): client.install_skill(archive) @@ -2589,9 +2613,11 @@ class TestInstallSkillSecurity: skills_root = tmp_path / "skills" (skills_root / "custom").mkdir(parents=True) + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + with ( - patch("deerflow.skills.installer.get_skills_root_path", return_value=skills_root), - patch("deerflow.skills.installer._validate_skill_frontmatter", return_value=(False, "Missing name field", "")), + patch("deerflow.skills.storage._default_skill_storage", LocalSkillStorage(host_path=str(skills_root))), + patch("deerflow.skills.validation._validate_skill_frontmatter", return_value=(False, "Missing name field", "")), ): with pytest.raises(ValueError, match="Invalid skill"): client.install_skill(archive) @@ -2683,7 +2709,7 @@ class TestConfigUpdateErrors: skill.name = "some-skill" with ( - patch("deerflow.skills.loader.load_skills", return_value=[skill]), + patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", return_value=[skill]), patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=None), ): with pytest.raises(FileNotFoundError, match="Cannot locate"): @@ -2703,7 +2729,7 @@ class TestConfigUpdateErrors: config_file.write_text("{}") with ( - patch("deerflow.skills.loader.load_skills", side_effect=[[skill], []]), + patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", side_effect=[[skill], []]), patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file), patch("deerflow.client.get_extensions_config", return_value=ext_config), patch("deerflow.client.reload_extensions_config"), @@ -3118,7 +3144,7 @@ class TestBugAgentInvalidationInconsistency: config_file.write_text("{}") with ( - patch("deerflow.skills.loader.load_skills", side_effect=[[skill], [updated]]), + patch("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.load_skills", side_effect=[[skill], [updated]]), patch("deerflow.client.ExtensionsConfig.resolve_config_path", return_value=config_file), patch("deerflow.client.get_extensions_config", return_value=ext_config), patch("deerflow.client.reload_extensions_config"), diff --git a/backend/tests/test_client_e2e.py b/backend/tests/test_client_e2e.py index c4ba36b98..0c3872e41 100644 --- a/backend/tests/test_client_e2e.py +++ b/backend/tests/test_client_e2e.py @@ -23,8 +23,6 @@ from dotenv import load_dotenv from deerflow.client import DeerFlowClient, StreamEvent from deerflow.config.app_config import AppConfig -from deerflow.config.model_config import ModelConfig -from deerflow.config.sandbox_config import SandboxConfig # Load .env from project root (for OPENAI_API_KEY etc.) load_dotenv(os.path.join(os.path.dirname(__file__), "../../.env")) @@ -55,24 +53,34 @@ def _make_e2e_config() -> AppConfig: - ``E2E_MODEL_ID`` (default: ``ep-20251211175242-llcmh``) - ``E2E_BASE_URL`` (default: ``https://ark-cn-beijing.bytedance.net/api/v3``) - ``OPENAI_API_KEY`` (required for LLM tests) + + Note: We use model_validate with a raw dict (not AppConfig(models=[ModelConfig(...)])) + because passing already-validated Pydantic instances triggers a pydantic-core + shortcut that returns stale cached data when another AppConfig was previously + loaded from disk in the same process. Dict-based validation is always correct. """ - return AppConfig( - models=[ - ModelConfig( - name=os.getenv("E2E_MODEL_NAME", "volcengine-ark"), - display_name="E2E Test Model", - use=os.getenv("E2E_MODEL_USE", "langchain_openai:ChatOpenAI"), - model=os.getenv("E2E_MODEL_ID", "ep-20251211175242-llcmh"), - base_url=os.getenv("E2E_BASE_URL", "https://ark-cn-beijing.bytedance.net/api/v3"), - api_key=os.getenv("OPENAI_API_KEY", ""), - max_tokens=512, - temperature=0.7, - supports_thinking=False, - supports_reasoning_effort=False, - supports_vision=False, - ) - ], - sandbox=SandboxConfig(use="deerflow.sandbox.local:LocalSandboxProvider", allow_host_bash=True), + return AppConfig.model_validate( + { + "models": [ + { + "name": os.getenv("E2E_MODEL_NAME", "volcengine-ark"), + "display_name": "E2E Test Model", + "use": os.getenv("E2E_MODEL_USE", "langchain_openai:ChatOpenAI"), + "model": os.getenv("E2E_MODEL_ID", "ep-20251211175242-llcmh"), + "base_url": os.getenv("E2E_BASE_URL", "https://ark-cn-beijing.bytedance.net/api/v3"), + "api_key": os.getenv("OPENAI_API_KEY", ""), + "max_tokens": 512, + "temperature": 0.7, + "supports_thinking": False, + "supports_reasoning_effort": False, + "supports_vision": False, + } + ], + "sandbox": { + "use": "deerflow.sandbox.local:LocalSandboxProvider", + "allow_host_bash": True, + }, + } ) @@ -95,10 +103,16 @@ def e2e_env(tmp_path, monkeypatch): monkeypatch.setattr("deerflow.config.paths._paths", None) monkeypatch.setattr("deerflow.sandbox.sandbox_provider._default_sandbox_provider", None) - # 2. Inject a clean AppConfig via the global singleton. + # 2. Inject a clean AppConfig. We must reset _app_config to None BEFORE + # calling _make_e2e_config() because AppConfig() constructor misbehaves when + # a disk config is already cached: it returns the cached model list instead + # of the provided one. Clearing first ensures the test config is correct. + monkeypatch.setattr("deerflow.config.app_config._app_config", None) + monkeypatch.setattr("deerflow.config.app_config._app_config_is_custom", False) config = _make_e2e_config() monkeypatch.setattr("deerflow.config.app_config._app_config", config) monkeypatch.setattr("deerflow.config.app_config._app_config_is_custom", True) + monkeypatch.setattr("deerflow.client.get_app_config", lambda: config) # 3. Disable title generation (extra LLM call, non-deterministic) from deerflow.config.title_config import TitleConfig @@ -540,9 +554,11 @@ class TestSkillInstallation: skills_root = tmp_path / "skills" (skills_root / "public").mkdir(parents=True) (skills_root / "custom").mkdir(parents=True) + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + monkeypatch.setattr( - "deerflow.skills.installer.get_skills_root_path", - lambda: skills_root, + "deerflow.skills.storage._default_skill_storage", + LocalSkillStorage(host_path=str(skills_root)), ) self._skills_root = skills_root @@ -617,19 +633,21 @@ class TestConfigManagement: def test_list_models_returns_injected_config(self, e2e_env): """list_models() returns the model from the injected AppConfig.""" + expected_model_name = os.getenv("E2E_MODEL_NAME", "volcengine-ark") c = DeerFlowClient(checkpointer=None, thinking_enabled=False) result = c.list_models() assert "models" in result assert len(result["models"]) == 1 - assert result["models"][0]["name"] == "volcengine-ark" + assert result["models"][0]["name"] == expected_model_name assert result["models"][0]["display_name"] == "E2E Test Model" def test_get_model_found(self, e2e_env): """get_model() returns the model when it exists.""" + expected_model_name = os.getenv("E2E_MODEL_NAME", "volcengine-ark") c = DeerFlowClient(checkpointer=None, thinking_enabled=False) - model = c.get_model("volcengine-ark") + model = c.get_model(expected_model_name) assert model is not None - assert model["name"] == "volcengine-ark" + assert model["name"] == expected_model_name assert model["supports_thinking"] is False def test_get_model_not_found(self, e2e_env): diff --git a/backend/tests/test_lead_agent_prompt.py b/backend/tests/test_lead_agent_prompt.py index c2d28d1ed..edbcd5193 100644 --- a/backend/tests/test_lead_agent_prompt.py +++ b/backend/tests/test_lead_agent_prompt.py @@ -92,7 +92,7 @@ def test_refresh_skills_system_prompt_cache_async_reloads_immediately(monkeypatc ) state = {"skills": [make_skill("first-skill")]} - monkeypatch.setattr(prompt_module, "load_skills", lambda enabled_only=True: list(state["skills"])) + monkeypatch.setattr(prompt_module, "get_or_new_skill_storage", lambda **kwargs: __import__("types").SimpleNamespace(load_skills=lambda *, enabled_only: list(state["skills"]))) _set_skills_cache_state() try: @@ -145,7 +145,7 @@ def test_clear_cache_does_not_spawn_parallel_refresh_workers(monkeypatch, tmp_pa return [make_skill(f"skill-{current_call}")] - monkeypatch.setattr(prompt_module, "load_skills", fake_load_skills) + monkeypatch.setattr(prompt_module, "get_or_new_skill_storage", lambda **kwargs: __import__("types").SimpleNamespace(load_skills=lambda *, enabled_only: fake_load_skills(enabled_only=enabled_only))) _set_skills_cache_state() try: diff --git a/backend/tests/test_lead_agent_skills.py b/backend/tests/test_lead_agent_skills.py index cb52f46e3..fe983d916 100644 --- a/backend/tests/test_lead_agent_skills.py +++ b/backend/tests/test_lead_agent_skills.py @@ -108,8 +108,8 @@ def test_get_skills_prompt_section_uses_explicit_config_for_enabled_skills(monke monkeypatch.setattr("deerflow.agents.lead_agent.prompt._get_enabled_skills", lambda: [_make_skill("global-skill")]) monkeypatch.setattr( - "deerflow.agents.lead_agent.prompt.load_skills", - lambda enabled_only=True, app_config=None: [_make_skill("explicit-skill")] if app_config is explicit_config else [], + "deerflow.agents.lead_agent.prompt.get_or_new_skill_storage", + lambda app_config=None, **kwargs: __import__("types").SimpleNamespace(load_skills=lambda *, enabled_only: [_make_skill("explicit-skill")] if app_config is explicit_config else []), ) result = get_skills_prompt_section(app_config=explicit_config) diff --git a/backend/tests/test_local_sandbox_provider_mounts.py b/backend/tests/test_local_sandbox_provider_mounts.py index 1468e005c..5c50a1aa0 100644 --- a/backend/tests/test_local_sandbox_provider_mounts.py +++ b/backend/tests/test_local_sandbox_provider_mounts.py @@ -469,7 +469,7 @@ class TestLocalSandboxProviderMounts: ], ) config = SimpleNamespace( - skills=SimpleNamespace(container_path="/custom-skills", get_skills_path=lambda: skills_dir), + skills=SimpleNamespace(container_path="/custom-skills", get_skills_path=lambda: skills_dir, use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), sandbox=sandbox_config, ) @@ -491,7 +491,7 @@ class TestLocalSandboxProviderMounts: ], ) config = SimpleNamespace( - skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir), + skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir, use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), sandbox=sandbox_config, ) @@ -515,7 +515,7 @@ class TestLocalSandboxProviderMounts: ], ) config = SimpleNamespace( - skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir), + skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir, use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), sandbox=sandbox_config, ) @@ -631,7 +631,7 @@ class TestLocalSandboxProviderMounts: ], ) config = SimpleNamespace( - skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir), + skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir, use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), sandbox=sandbox_config, ) diff --git a/backend/tests/test_local_skill_storage_write.py b/backend/tests/test_local_skill_storage_write.py new file mode 100644 index 000000000..ce68c6e88 --- /dev/null +++ b/backend/tests/test_local_skill_storage_write.py @@ -0,0 +1,162 @@ +"""Tests for LocalSkillStorage.write_custom_skill path-traversal guards.""" + +from __future__ import annotations + +import os + +import pytest + +from deerflow.skills.storage import get_or_new_skill_storage + + +@pytest.fixture() +def storage(tmp_path): + return get_or_new_skill_storage(skills_path=str(tmp_path)) + + +@pytest.fixture() +def skill_dir(tmp_path, storage): + """Pre-create the skill directory so symlink tests can plant files inside.""" + d = tmp_path / "custom" / "demo-skill" + d.mkdir(parents=True, exist_ok=True) + return d + + +# --------------------------------------------------------------------------- +# Happy path +# --------------------------------------------------------------------------- + + +def test_write_creates_file(tmp_path, storage): + storage.write_custom_skill("demo-skill", "SKILL.md", "# hello") + assert (tmp_path / "custom" / "demo-skill" / "SKILL.md").read_text() == "# hello" + + +def test_write_creates_subdirectory(tmp_path, storage): + storage.write_custom_skill("demo-skill", "references/ref.md", "# ref") + assert (tmp_path / "custom" / "demo-skill" / "references" / "ref.md").exists() + + +def test_write_is_atomic_overwrite(tmp_path, storage): + storage.write_custom_skill("demo-skill", "SKILL.md", "first") + storage.write_custom_skill("demo-skill", "SKILL.md", "second") + assert (tmp_path / "custom" / "demo-skill" / "SKILL.md").read_text() == "second" + + +# --------------------------------------------------------------------------- +# Empty / blank path +# --------------------------------------------------------------------------- + + +def test_rejects_empty_string(storage): + with pytest.raises(ValueError, match="empty"): + storage.write_custom_skill("demo-skill", "", "x") + + +# --------------------------------------------------------------------------- +# Absolute paths +# --------------------------------------------------------------------------- + + +def test_rejects_absolute_unix_path(storage): + with pytest.raises(ValueError, match="skill directory"): + storage.write_custom_skill("demo-skill", "/etc/passwd", "x") + + +def test_rejects_absolute_path_with_skill_prefix(tmp_path, storage): + """Absolute path within skill dir: containment check passes (not a security issue). + + Python's Path(base) / "/abs/path" ignores base and returns /abs/path directly. + If that absolute path resolves within skill_dir, the write succeeds. + This is not an escape — the file lands in the correct location. + """ + absolute = str(tmp_path / "custom" / "demo-skill" / "SKILL.md") + # Does not raise; the write goes to the expected place + storage.write_custom_skill("demo-skill", absolute, "# ok") + assert (tmp_path / "custom" / "demo-skill" / "SKILL.md").read_text() == "# ok" + + +# --------------------------------------------------------------------------- +# Parent-directory traversal +# --------------------------------------------------------------------------- + + +def test_rejects_dotdot_escape(storage): + with pytest.raises(ValueError, match="skill directory"): + storage.write_custom_skill("demo-skill", "../../escaped.txt", "x") + + +def test_rejects_dotdot_sibling(storage): + with pytest.raises(ValueError, match="skill directory"): + storage.write_custom_skill("demo-skill", "../sibling/x.txt", "x") + + +def test_rejects_dotdot_in_subpath(storage): + with pytest.raises(ValueError, match="skill directory"): + storage.write_custom_skill("demo-skill", "sub/../../escape.txt", "x") + + +def test_rejects_dotdot_only(storage): + with pytest.raises(ValueError, match="skill directory"): + storage.write_custom_skill("demo-skill", "..", "x") + + +# --------------------------------------------------------------------------- +# Symlink escape +# --------------------------------------------------------------------------- + + +def test_rejects_symlink_pointing_outside(tmp_path, storage, skill_dir): + outside = tmp_path / "outside.txt" + link = skill_dir / "escape_link.txt" + os.symlink(outside, link) + with pytest.raises(ValueError, match="skill directory"): + storage.write_custom_skill("demo-skill", "escape_link.txt", "x") + + +def test_rejects_symlink_dir_pointing_outside(tmp_path, storage, skill_dir): + outside_dir = tmp_path / "outside_dir" + outside_dir.mkdir() + link_dir = skill_dir / "linked_dir" + os.symlink(outside_dir, link_dir) + with pytest.raises(ValueError, match="skill directory"): + storage.write_custom_skill("demo-skill", "linked_dir/file.txt", "x") + + +def test_allows_symlink_within_skill_dir(tmp_path, storage, skill_dir): + """A symlink that resolves inside the skill directory is allowed. + + Because target is resolved before writing, the write goes to the real file + the symlink points to (both the link and the real file end up with the new + content). + """ + real_file = skill_dir / "real.md" + real_file.write_text("real") + link = skill_dir / "alias.md" + os.symlink(real_file, link) + # Should not raise + storage.write_custom_skill("demo-skill", "alias.md", "updated") + # resolve() writes through to the real target file + assert real_file.read_text() == "updated" + assert (skill_dir / "alias.md").read_text() == "updated" + + +# --------------------------------------------------------------------------- +# Invalid skill-name traversal +# --------------------------------------------------------------------------- + + +@pytest.mark.parametrize( + "name,method_name", + [ + ("../../escaped", "get_custom_skill_dir"), + ("../../escaped", "get_custom_skill_file"), + ("../../escaped", "get_skill_history_file"), + ("../../escaped", "custom_skill_exists"), + ("../../escaped", "public_skill_exists"), + ], +) +def test_rejects_invalid_skill_name_in_path_helpers(storage, name, method_name): + method = getattr(storage, method_name) + with pytest.raises(ValueError, match="hyphen-case"): + method(name) diff --git a/backend/tests/test_skill_manage_tool.py b/backend/tests/test_skill_manage_tool.py index 1b16fb48f..3933cb208 100644 --- a/backend/tests/test_skill_manage_tool.py +++ b/backend/tests/test_skill_manage_tool.py @@ -20,11 +20,10 @@ async def _async_result(decision: str, reason: str): def test_skill_manage_create_and_patch(monkeypatch, tmp_path): skills_root = tmp_path / "skills" config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config) refresh_calls = [] @@ -64,11 +63,10 @@ def test_skill_manage_create_and_patch(monkeypatch, tmp_path): def test_skill_manage_patch_replaces_single_occurrence_by_default(monkeypatch, tmp_path): skills_root = tmp_path / "skills" config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config) async def _refresh(): @@ -104,11 +102,10 @@ def test_skill_manage_rejects_public_skill_patch(monkeypatch, tmp_path): public_dir.mkdir(parents=True, exist_ok=True) (public_dir / "SKILL.md").write_text(_skill_content("deep-research"), encoding="utf-8") config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) runtime = SimpleNamespace(context={}, config={"configurable": {}}) @@ -128,11 +125,10 @@ def test_skill_manage_rejects_public_skill_patch(monkeypatch, tmp_path): def test_skill_manage_sync_wrapper_supported(monkeypatch, tmp_path): skills_root = tmp_path / "skills" config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) refresh_calls = [] async def _refresh(): @@ -156,11 +152,10 @@ def test_skill_manage_sync_wrapper_supported(monkeypatch, tmp_path): def test_skill_manage_rejects_support_path_traversal(monkeypatch, tmp_path): skills_root = tmp_path / "skills" config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) monkeypatch.setattr("deerflow.skills.security_scanner.get_app_config", lambda: config) async def _refresh(): diff --git a/backend/tests/test_skills_custom_router.py b/backend/tests/test_skills_custom_router.py index b4d101d35..ed93e5510 100644 --- a/backend/tests/test_skills_custom_router.py +++ b/backend/tests/test_skills_custom_router.py @@ -8,7 +8,7 @@ from fastapi import FastAPI from fastapi.testclient import TestClient from app.gateway.routers import skills as skills_router -from deerflow.skills.manager import get_skill_history_file +from deerflow.skills.storage import get_or_new_skill_storage from deerflow.skills.types import Skill @@ -58,7 +58,7 @@ def test_install_skill_archive_runs_security_scan(monkeypatch, tmp_path): scan_calls = [] refresh_calls = [] - async def _scan(content, *, executable, location): + async def _scan(content, *, executable, location, app_config=None): from deerflow.skills.security_scanner import ScanResult scan_calls.append({"content": content, "executable": executable, "location": location}) @@ -67,13 +67,21 @@ def test_install_skill_archive_runs_security_scan(monkeypatch, tmp_path): async def _refresh(): refresh_calls.append("refresh") + from types import SimpleNamespace + + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + storage = LocalSkillStorage(host_path=str(skills_root)) + config = SimpleNamespace( + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), + skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), + ) monkeypatch.setattr(skills_router, "resolve_thread_virtual_path", lambda thread_id, path: archive) - monkeypatch.setattr("deerflow.skills.installer.get_skills_root_path", lambda: skills_root) + monkeypatch.setattr(skills_router, "get_or_new_skill_storage", lambda **kw: storage) monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) monkeypatch.setattr(skills_router, "refresh_skills_system_prompt_cache_async", _refresh) - app = FastAPI() - app.include_router(skills_router.router) + app = _make_test_app(config) with TestClient(app) as client: response = client.post("/api/skills/install", json={"thread_id": "thread-1", "path": "mnt/user-data/outputs/archive-skill.skill"}) @@ -105,13 +113,21 @@ def test_install_skill_archive_security_scan_block_returns_400(monkeypatch, tmp_ async def _refresh(): refresh_calls.append("refresh") + from types import SimpleNamespace + + from deerflow.skills.storage.local_skill_storage import LocalSkillStorage + + storage = LocalSkillStorage(host_path=str(skills_root)) + config = SimpleNamespace( + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), + skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), + ) monkeypatch.setattr(skills_router, "resolve_thread_virtual_path", lambda thread_id, path: archive) - monkeypatch.setattr("deerflow.skills.installer.get_skills_root_path", lambda: skills_root) + monkeypatch.setattr(skills_router, "get_or_new_skill_storage", lambda **kw: storage) monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) monkeypatch.setattr(skills_router, "refresh_skills_system_prompt_cache_async", _refresh) - app = FastAPI() - app.include_router(skills_router.router) + app = _make_test_app(config) with TestClient(app) as client: response = client.post("/api/skills/install", json={"thread_id": "thread-1", "path": "mnt/user-data/outputs/blocked-skill.skill"}) @@ -128,11 +144,10 @@ def test_custom_skills_router_lifecycle(monkeypatch, tmp_path): custom_dir.mkdir(parents=True, exist_ok=True) (custom_dir / "SKILL.md").write_text(_skill_content("demo-skill"), encoding="utf-8") config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", lambda *args, **kwargs: _async_scan("allow", "ok")) refresh_calls = [] @@ -177,12 +192,13 @@ def test_custom_skill_rollback_blocked_by_scanner(monkeypatch, tmp_path): edited_content = _skill_content("demo-skill", "Edited skill") (custom_dir / "SKILL.md").write_text(edited_content, encoding="utf-8") config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) - get_skill_history_file("demo-skill", app_config=config).write_text( + history_file = get_or_new_skill_storage(app_config=config).get_skill_history_file("demo-skill") + history_file.parent.mkdir(parents=True, exist_ok=True) + history_file.write_text( '{"action":"human_edit","prev_content":' + json.dumps(original_content) + ',"new_content":' + json.dumps(edited_content) + "}\n", encoding="utf-8", ) @@ -218,11 +234,10 @@ def test_custom_skill_delete_preserves_history_and_allows_restore(monkeypatch, t original_content = _skill_content("demo-skill") (custom_dir / "SKILL.md").write_text(original_content, encoding="utf-8") config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) monkeypatch.setattr("app.gateway.routers.skills.scan_skill_content", lambda *args, **kwargs: _async_scan("allow", "ok")) refresh_calls = [] @@ -255,11 +270,10 @@ def test_custom_skill_delete_continues_when_history_write_is_readonly(monkeypatc custom_dir.mkdir(parents=True, exist_ok=True) (custom_dir / "SKILL.md").write_text(_skill_content("demo-skill"), encoding="utf-8") config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) refresh_calls = [] async def _refresh(): @@ -268,7 +282,7 @@ def test_custom_skill_delete_continues_when_history_write_is_readonly(monkeypatc def _readonly_history(*args, **kwargs): raise OSError(errno.EROFS, "Read-only file system", str(skills_root / "custom" / ".history")) - monkeypatch.setattr("app.gateway.routers.skills.append_history", _readonly_history) + monkeypatch.setattr("deerflow.skills.storage.local_skill_storage.LocalSkillStorage.append_history", _readonly_history) monkeypatch.setattr("app.gateway.routers.skills.refresh_skills_system_prompt_cache_async", _refresh) app = _make_test_app(config) @@ -288,11 +302,10 @@ def test_custom_skill_delete_fails_when_skill_dir_removal_fails(monkeypatch, tmp custom_dir.mkdir(parents=True, exist_ok=True) (custom_dir / "SKILL.md").write_text(_skill_content("demo-skill"), encoding="utf-8") config = SimpleNamespace( - skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills"), + skills=SimpleNamespace(get_skills_path=lambda: skills_root, container_path="/mnt/skills", use="deerflow.skills.storage.local_skill_storage:LocalSkillStorage"), skill_evolution=SimpleNamespace(enabled=True, moderation_model_name=None), ) monkeypatch.setattr("deerflow.config.get_app_config", lambda: config) - monkeypatch.setattr("deerflow.skills.manager.get_app_config", lambda: config) refresh_calls = [] async def _refresh(): @@ -301,7 +314,7 @@ def test_custom_skill_delete_fails_when_skill_dir_removal_fails(monkeypatch, tmp def _fail_rmtree(*args, **kwargs): raise PermissionError(errno.EACCES, "Permission denied", str(custom_dir)) - monkeypatch.setattr("app.gateway.routers.skills.shutil.rmtree", _fail_rmtree) + monkeypatch.setattr("deerflow.skills.storage.local_skill_storage.shutil.rmtree", _fail_rmtree) monkeypatch.setattr("app.gateway.routers.skills.refresh_skills_system_prompt_cache_async", _refresh) app = _make_test_app(config) @@ -320,7 +333,7 @@ def test_update_skill_refreshes_prompt_cache_before_return(monkeypatch, tmp_path enabled_state = {"value": True} refresh_calls = [] - def _load_skills(*, enabled_only: bool, app_config=None): + def _load_skills(*, enabled_only: bool): skill = _make_skill("demo-skill", enabled=enabled_state["value"]) if enabled_only and not skill.enabled: return [] @@ -330,7 +343,8 @@ def test_update_skill_refreshes_prompt_cache_before_return(monkeypatch, tmp_path refresh_calls.append("refresh") enabled_state["value"] = False - monkeypatch.setattr("app.gateway.routers.skills.load_skills", _load_skills) + mock_storage = SimpleNamespace(load_skills=_load_skills) + monkeypatch.setattr("app.gateway.routers.skills.get_or_new_skill_storage", lambda **kwargs: mock_storage) monkeypatch.setattr("app.gateway.routers.skills.get_extensions_config", lambda: SimpleNamespace(mcp_servers={}, skills={})) monkeypatch.setattr("app.gateway.routers.skills.reload_extensions_config", lambda: None) monkeypatch.setattr(skills_router.ExtensionsConfig, "resolve_config_path", staticmethod(lambda: config_path)) diff --git a/backend/tests/test_skills_installer.py b/backend/tests/test_skills_installer.py index a7c9eae56..101f1b2a8 100644 --- a/backend/tests/test_skills_installer.py +++ b/backend/tests/test_skills_installer.py @@ -9,7 +9,6 @@ import pytest from deerflow.skills.installer import ( SkillSecurityScanError, - install_skill_from_archive, is_symlink_member, is_unsafe_zip_member, resolve_skill_dir_from_archive, @@ -17,6 +16,7 @@ from deerflow.skills.installer import ( should_ignore_archive_entry, ) from deerflow.skills.security_scanner import ScanResult +from deerflow.skills.storage import get_or_new_skill_storage # --------------------------------------------------------------------------- # is_unsafe_zip_member @@ -193,7 +193,7 @@ class TestInstallSkillFromArchive: zip_path = self._make_skill_zip(tmp_path) skills_root = tmp_path / "skills" skills_root.mkdir() - result = install_skill_from_archive(zip_path, skills_root=skills_root) + result = get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert result["success"] is True assert result["skill_name"] == "test-skill" assert (skills_root / "custom" / "test-skill" / "SKILL.md").exists() @@ -210,7 +210,7 @@ class TestInstallSkillFromArchive: monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert calls == [ { @@ -240,7 +240,7 @@ class TestInstallSkillFromArchive: monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert calls == [ { @@ -275,7 +275,7 @@ class TestInstallSkillFromArchive: skills_root.mkdir() with pytest.raises(SkillSecurityScanError, match="nested SKILL.md"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert not (skills_root / "custom" / "test-skill").exists() @@ -295,7 +295,7 @@ class TestInstallSkillFromArchive: monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) with pytest.raises(SkillSecurityScanError, match="rejected executable.*script needs review"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert not (skills_root / "custom" / "test-skill").exists() @@ -310,7 +310,7 @@ class TestInstallSkillFromArchive: monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) with pytest.raises(SkillSecurityScanError, match="Security scan blocked.*prompt injection"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert not (skills_root / "custom" / "blocked-skill").exists() @@ -328,7 +328,7 @@ class TestInstallSkillFromArchive: monkeypatch.setattr("deerflow.skills.installer.shutil.copytree", _copytree) with pytest.raises(OSError, match="copy failed"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) custom_dir = skills_root / "custom" assert not (custom_dir / "test-skill").exists() @@ -349,7 +349,7 @@ class TestInstallSkillFromArchive: monkeypatch.setattr("deerflow.skills.installer.shutil.copytree", _copytree) with pytest.raises(ValueError, match="already exists"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert (target / "marker.txt").read_text(encoding="utf-8") == "external" assert not (target / "SKILL.md").exists() @@ -366,7 +366,7 @@ class TestInstallSkillFromArchive: monkeypatch.setattr("deerflow.skills.installer.shutil.move", _move) with pytest.raises(OSError, match="move failed"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert not (skills_root / "custom" / "test-skill").exists() @@ -375,13 +375,13 @@ class TestInstallSkillFromArchive: skills_root = tmp_path / "skills" (skills_root / "custom" / "test-skill").mkdir(parents=True) with pytest.raises(ValueError, match="already exists"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) def test_invalid_extension(self, tmp_path): bad_path = tmp_path / "bad.zip" bad_path.write_text("not a skill") with pytest.raises(ValueError, match=".skill"): - install_skill_from_archive(bad_path) + get_or_new_skill_storage(skills_path=tmp_path).install_skill_from_archive(bad_path) def test_bad_frontmatter(self, tmp_path): zip_path = tmp_path / "bad.skill" @@ -390,11 +390,11 @@ class TestInstallSkillFromArchive: skills_root = tmp_path / "skills" skills_root.mkdir() with pytest.raises(ValueError, match="Invalid skill"): - install_skill_from_archive(zip_path, skills_root=skills_root) + get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) - def test_nonexistent_file(self): + def test_nonexistent_file(self, tmp_path): with pytest.raises(FileNotFoundError): - install_skill_from_archive(Path("/nonexistent/path.skill")) + get_or_new_skill_storage(skills_path=tmp_path).install_skill_from_archive(Path("/nonexistent/path.skill")) def test_macosx_filtered_during_resolve(self, tmp_path): """Archive with __MACOSX dir still installs correctly.""" @@ -404,6 +404,6 @@ class TestInstallSkillFromArchive: zf.writestr("__MACOSX/._my-skill", "meta") skills_root = tmp_path / "skills" skills_root.mkdir() - result = install_skill_from_archive(zip_path, skills_root=skills_root) + result = get_or_new_skill_storage(skills_path=skills_root).install_skill_from_archive(zip_path) assert result["success"] is True assert result["skill_name"] == "my-skill" diff --git a/backend/tests/test_skills_loader.py b/backend/tests/test_skills_loader.py index 7d885444d..5a03532c6 100644 --- a/backend/tests/test_skills_loader.py +++ b/backend/tests/test_skills_loader.py @@ -1,8 +1,10 @@ """Tests for recursive skills loading.""" from pathlib import Path +from types import SimpleNamespace -from deerflow.skills.loader import get_skills_root_path, load_skills +from deerflow.config.skills_config import SkillsConfig +from deerflow.skills.storage import get_or_new_skill_storage def _write_skill(skill_dir: Path, name: str, description: str) -> None: @@ -14,7 +16,8 @@ def _write_skill(skill_dir: Path, name: str, description: str) -> None: def test_get_skills_root_path_points_to_project_root_skills(): """get_skills_root_path() should point to deer-flow/skills (sibling of backend/), not backend/packages/skills.""" - path = get_skills_root_path() + app_config = SimpleNamespace(skills=SkillsConfig()) + path = get_or_new_skill_storage(app_config=app_config).get_skills_root_path() assert path.name == "skills", f"Expected 'skills', got '{path.name}'" assert (path.parent / "backend").is_dir(), f"Expected skills path's parent to be project root containing 'backend/', but got {path}" @@ -27,7 +30,7 @@ def test_load_skills_discovers_nested_skills_and_sets_container_paths(tmp_path: _write_skill(skills_root / "public" / "parent" / "child-skill", "child-skill", "Child skill") _write_skill(skills_root / "custom" / "team" / "helper", "team-helper", "Team helper") - skills = load_skills(skills_path=skills_root, use_config=False, enabled_only=False) + skills = get_or_new_skill_storage(skills_path=skills_root).load_skills(enabled_only=False) by_name = {skill.name: skill for skill in skills} assert {"root-skill", "child-skill", "team-helper"} <= set(by_name) @@ -57,7 +60,7 @@ def test_load_skills_skips_hidden_directories(tmp_path: Path): "Hidden skill", ) - skills = load_skills(skills_path=skills_root, use_config=False, enabled_only=False) + skills = get_or_new_skill_storage(skills_path=skills_root).load_skills(enabled_only=False) names = {skill.name for skill in skills} assert "ok-skill" in names @@ -69,7 +72,7 @@ def test_load_skills_prefers_custom_over_public_with_same_name(tmp_path: Path): _write_skill(skills_root / "public" / "shared-skill", "shared-skill", "Public version") _write_skill(skills_root / "custom" / "shared-skill", "shared-skill", "Custom version") - skills = load_skills(skills_path=skills_root, use_config=False, enabled_only=False) + skills = get_or_new_skill_storage(skills_path=skills_root).load_skills(enabled_only=False) shared = next(skill for skill in skills if skill.name == "shared-skill") assert shared.category == "custom"