diff --git a/backend/app/gateway/routers/skills.py b/backend/app/gateway/routers/skills.py index ed07b708b..85eb77cc0 100644 --- a/backend/app/gateway/routers/skills.py +++ b/backend/app/gateway/routers/skills.py @@ -13,7 +13,7 @@ from deerflow.agents.lead_agent.prompt import refresh_skills_system_prompt_cache from deerflow.config.app_config import AppConfig from deerflow.config.extensions_config import ExtensionsConfig, SkillStateConfig, get_extensions_config, reload_extensions_config from deerflow.skills import Skill, load_skills -from deerflow.skills.installer import SkillAlreadyExistsError, install_skill_from_archive +from deerflow.skills.installer import SkillAlreadyExistsError, ainstall_skill_from_archive from deerflow.skills.manager import ( append_history, atomic_write, @@ -121,7 +121,7 @@ async def list_skills(config: AppConfig = Depends(get_config)) -> SkillsListResp async def install_skill(request: SkillInstallRequest) -> SkillInstallResponse: try: skill_file_path = resolve_thread_virtual_path(request.thread_id, request.path) - result = install_skill_from_archive(skill_file_path) + result = await ainstall_skill_from_archive(skill_file_path) await refresh_skills_system_prompt_cache_async() return SkillInstallResponse(**result) except FileNotFoundError as e: diff --git a/backend/packages/harness/deerflow/agents/factory.py b/backend/packages/harness/deerflow/agents/factory.py index 57361edba..bd57d733d 100644 --- a/backend/packages/harness/deerflow/agents/factory.py +++ b/backend/packages/harness/deerflow/agents/factory.py @@ -254,9 +254,11 @@ def _assemble_from_features( from deerflow.agents.middlewares.view_image_middleware import ViewImageMiddleware chain.append(ViewImageMiddleware()) - from deerflow.tools.builtins import view_image_tool - extra_tools.append(view_image_tool) + if feat.sandbox is not False: + from deerflow.tools.builtins import view_image_tool + + extra_tools.append(view_image_tool) # --- [11] Subagent --- if feat.subagent is not False: diff --git a/backend/packages/harness/deerflow/community/aio_sandbox/local_backend.py b/backend/packages/harness/deerflow/community/aio_sandbox/local_backend.py index 4b680df2d..15cbe3b78 100644 --- a/backend/packages/harness/deerflow/community/aio_sandbox/local_backend.py +++ b/backend/packages/harness/deerflow/community/aio_sandbox/local_backend.py @@ -9,6 +9,7 @@ from __future__ import annotations import json import logging import os +import shlex import subprocess from datetime import datetime @@ -86,6 +87,46 @@ def _format_container_mount(runtime: str, host_path: str, container_path: str, r return ["-v", mount_spec] +def _redact_container_command_for_log(cmd: list[str]) -> list[str]: + """Return a Docker/Container command with environment values redacted.""" + redacted: list[str] = [] + redact_next_env = False + + for arg in cmd: + if redact_next_env: + if "=" in arg: + key = arg.split("=", 1)[0] + redacted.append(f"{key}=" if key else "") + else: + redacted.append(arg) + redact_next_env = False + continue + + if arg in {"-e", "--env"}: + redacted.append(arg) + redact_next_env = True + continue + + if arg.startswith("--env="): + value = arg.removeprefix("--env=") + if "=" in value: + key = value.split("=", 1)[0] + redacted.append(f"--env={key}=" if key else "--env=") + else: + redacted.append(arg) + continue + + redacted.append(arg) + + return redacted + + +def _format_container_command_for_log(cmd: list[str]) -> str: + if os.name == "nt": + return subprocess.list2cmdline(cmd) + return shlex.join(cmd) + + class LocalContainerBackend(SandboxBackend): """Backend that manages sandbox containers locally using Docker or Apple Container. @@ -464,7 +505,8 @@ class LocalContainerBackend(SandboxBackend): cmd.append(self._image) - logger.info(f"Starting container using {self._runtime}: {' '.join(cmd)}") + log_cmd = _format_container_command_for_log(_redact_container_command_for_log(cmd)) + logger.info(f"Starting container using {self._runtime}: {log_cmd}") try: result = subprocess.run(cmd, capture_output=True, text=True, check=True) diff --git a/backend/packages/harness/deerflow/models/mindie_provider.py b/backend/packages/harness/deerflow/models/mindie_provider.py index 5f0d12e83..a75ae0aba 100644 --- a/backend/packages/harness/deerflow/models/mindie_provider.py +++ b/backend/packages/harness/deerflow/models/mindie_provider.py @@ -1,4 +1,5 @@ import ast +import html import json import re import uuid @@ -36,8 +37,8 @@ def _fix_messages(messages: list) -> list: if isinstance(msg, AIMessage) and getattr(msg, "tool_calls", []): xml_parts = [] for tool in msg.tool_calls: - args_xml = " ".join(f"{json.dumps(v, ensure_ascii=False)}" for k, v in tool.get("args", {}).items()) - xml_parts.append(f" {args_xml} ") + args_xml = " ".join(f"{html.escape(v if isinstance(v, str) else json.dumps(v, ensure_ascii=False), quote=False)}" for k, v in tool.get("args", {}).items()) + xml_parts.append(f" {args_xml} ") full_text = f"{text}\n" + "\n".join(xml_parts) if text else "\n".join(xml_parts) fixed.append(AIMessage(content=full_text.strip() or " ")) continue @@ -80,13 +81,24 @@ def _parse_xml_tool_call_to_dict(content: str) -> tuple[str, list[dict]]: func_match = re.search(r"]+)>", inner_content) if not func_match: continue - function_name = func_match.group(1).strip() + function_name = html.unescape(func_match.group(1).strip()) + + # Ignore nested tool blocks when extracting parameters for this call. + # Nested `` sections represent separate invocations and + # their `` tags must not leak into the current call args. + param_source_parts: list[str] = [] + nested_cursor = 0 + for nested_start, nested_end, _ in _iter_tool_call_blocks(inner_content): + param_source_parts.append(inner_content[nested_cursor:nested_start]) + nested_cursor = nested_end + param_source_parts.append(inner_content[nested_cursor:]) + param_source = "".join(param_source_parts) args = {} param_pattern = re.compile(r"]+)>(.*?)", re.DOTALL) - for param_match in param_pattern.finditer(inner_content): - key = param_match.group(1).strip() - raw_value = param_match.group(2).strip() + for param_match in param_pattern.finditer(param_source): + key = html.unescape(param_match.group(1).strip()) + raw_value = html.unescape(param_match.group(2).strip()) # Attempt to deserialize string values into native Python types # to satisfy downstream Pydantic validation. diff --git a/backend/packages/harness/deerflow/sandbox/local/list_dir.py b/backend/packages/harness/deerflow/sandbox/local/list_dir.py index b1031d340..35e51f848 100644 --- a/backend/packages/harness/deerflow/sandbox/local/list_dir.py +++ b/backend/packages/harness/deerflow/sandbox/local/list_dir.py @@ -22,6 +22,13 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]: if not root_path.is_dir(): return result + def _is_within_root(candidate: Path) -> bool: + try: + candidate.relative_to(root_path) + return True + except ValueError: + return False + def _traverse(current_path: Path, current_depth: int) -> None: """Recursively traverse directories up to max_depth.""" if current_depth > max_depth: @@ -32,8 +39,23 @@ def list_dir(path: str, max_depth: int = 2) -> list[str]: if should_ignore_name(item.name): continue + if item.is_symlink(): + try: + item_resolved = item.resolve() + if not _is_within_root(item_resolved): + continue + except OSError: + continue + post_fix = "/" if item_resolved.is_dir() else "" + result.append(str(item_resolved) + post_fix) + continue + + item_resolved = item.resolve() + if not _is_within_root(item_resolved): + continue + post_fix = "/" if item.is_dir() else "" - result.append(str(item.resolve()) + post_fix) + result.append(str(item_resolved) + post_fix) # Recurse into subdirectories if not at max depth if item.is_dir() and current_depth < max_depth: diff --git a/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py b/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py index ae8c948b0..116a62159 100644 --- a/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py +++ b/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py @@ -5,6 +5,7 @@ import shutil import subprocess from dataclasses import dataclass from pathlib import Path +from typing import NamedTuple from deerflow.sandbox.local.list_dir import list_dir from deerflow.sandbox.sandbox import Sandbox @@ -20,6 +21,11 @@ class PathMapping: read_only: bool = False +class ResolvedPath(NamedTuple): + path: str + mapping: PathMapping | None + + class LocalSandbox(Sandbox): @staticmethod def _shell_name(shell: str) -> str: @@ -91,7 +97,23 @@ class LocalSandbox(Sandbox): return best_mapping.read_only - def _resolve_path(self, path: str) -> str: + def _find_path_mapping(self, path: str) -> tuple[PathMapping, str] | None: + path_str = str(path) + + for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path.rstrip("/") or "/"), reverse=True): + container_path = mapping.container_path.rstrip("/") or "/" + if container_path == "/": + if path_str.startswith("/"): + return mapping, path_str.lstrip("/") + continue + + if path_str == container_path or path_str.startswith(container_path + "/"): + relative = path_str[len(container_path) :].lstrip("/") + return mapping, relative + + return None + + def _resolve_path_with_mapping(self, path: str) -> ResolvedPath: """ Resolve container path to actual local path using mappings. @@ -99,22 +121,30 @@ class LocalSandbox(Sandbox): path: Path that might be a container path Returns: - Resolved local path + Resolved local path and the matched mapping, if any """ path_str = str(path) - # Try each mapping (longest prefix first for more specific matches) - for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True): - container_path = mapping.container_path - local_path = mapping.local_path - if path_str == container_path or path_str.startswith(container_path + "/"): - # Replace the container path prefix with local path - relative = path_str[len(container_path) :].lstrip("/") - resolved = str(Path(local_path) / relative) if relative else local_path - return resolved + mapping_match = self._find_path_mapping(path_str) + if mapping_match is None: + return ResolvedPath(path_str, None) - # No mapping found, return original path - return path_str + mapping, relative = mapping_match + local_root = Path(mapping.local_path).resolve() + resolved_path = (local_root / relative).resolve() if relative else local_root + + try: + resolved_path.relative_to(local_root) + except ValueError as exc: + raise PermissionError(errno.EACCES, "Access denied: path escapes mounted directory", path_str) from exc + + return ResolvedPath(str(resolved_path), mapping) + + def _resolve_path(self, path: str) -> str: + return self._resolve_path_with_mapping(path).path + + def _is_resolved_path_read_only(self, resolved: ResolvedPath) -> bool: + return bool(resolved.mapping and resolved.mapping.read_only) or self._is_read_only_path(resolved.path) def _reverse_resolve_path(self, path: str) -> str: """ @@ -309,8 +339,14 @@ class LocalSandbox(Sandbox): def list_dir(self, path: str, max_depth=2) -> list[str]: resolved_path = self._resolve_path(path) entries = list_dir(resolved_path, max_depth) - # Reverse resolve local paths back to container paths in output - return [self._reverse_resolve_paths_in_output(entry) for entry in entries] + # Reverse resolve local paths back to container paths and preserve + # list_dir's trailing "/" marker for directories. + result: list[str] = [] + for entry in entries: + is_dir = entry.endswith(("/", "\\")) + reversed_entry = self._reverse_resolve_path(entry.rstrip("/\\")) if is_dir else self._reverse_resolve_path(entry) + result.append(f"{reversed_entry}/" if is_dir and not reversed_entry.endswith("/") else reversed_entry) + return result def read_file(self, path: str) -> str: resolved_path = self._resolve_path(path) @@ -329,8 +365,9 @@ class LocalSandbox(Sandbox): raise type(e)(e.errno, e.strerror, path) from None def write_file(self, path: str, content: str, append: bool = False) -> None: - resolved_path = self._resolve_path(path) - if self._is_read_only_path(resolved_path): + resolved = self._resolve_path_with_mapping(path) + resolved_path = resolved.path + if self._is_resolved_path_read_only(resolved): raise OSError(errno.EROFS, "Read-only file system", path) try: dir_path = os.path.dirname(resolved_path) @@ -384,8 +421,9 @@ class LocalSandbox(Sandbox): ], truncated def update_file(self, path: str, content: bytes) -> None: - resolved_path = self._resolve_path(path) - if self._is_read_only_path(resolved_path): + resolved = self._resolve_path_with_mapping(path) + resolved_path = resolved.path + if self._is_resolved_path_read_only(resolved): raise OSError(errno.EROFS, "Read-only file system", path) try: dir_path = os.path.dirname(resolved_path) diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index 7cee00ae1..32ee7d646 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -22,6 +22,9 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_ _ABSOLUTE_PATH_PATTERN = re.compile(r"(?()]+)") _FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE) +_URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE) +_URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE) +_DOTDOT_PATH_SEGMENT_PATTERN = re.compile(r"(?:^|[/\\=])\.\.(?:$|[/\\])") _LOCAL_BASH_SYSTEM_PATH_PREFIXES = ( "/bin/", "/usr/bin/", @@ -37,6 +40,42 @@ _DEFAULT_GLOB_MAX_RESULTS = 200 _MAX_GLOB_MAX_RESULTS = 1000 _DEFAULT_GREP_MAX_RESULTS = 100 _MAX_GREP_MAX_RESULTS = 500 +_LOCAL_BASH_CWD_COMMANDS = {"cd", "pushd"} +_LOCAL_BASH_COMMAND_WRAPPERS = {"command", "builtin"} +_LOCAL_BASH_COMMAND_PREFIX_KEYWORDS = {"!", "{", "case", "do", "elif", "else", "for", "if", "select", "then", "time", "until", "while"} +_LOCAL_BASH_COMMAND_END_KEYWORDS = {"}", "done", "esac", "fi"} +_LOCAL_BASH_ROOT_PATH_COMMANDS = { + "awk", + "cat", + "cp", + "du", + "find", + "grep", + "head", + "less", + "ln", + "ls", + "more", + "mv", + "rm", + "sed", + "tail", + "tar", +} +_SHELL_COMMAND_SEPARATORS = {";", "&&", "||", "|", "|&", "&", "(", ")"} +_SHELL_REDIRECTION_OPERATORS = { + "<", + ">", + "<<", + ">>", + "<<<", + "<>", + ">&", + "<&", + "&>", + "&>>", + ">|", +} def _get_skills_container_path() -> str: @@ -549,7 +588,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *, This function is a security gate — it checks whether *path* may be accessed and raises on violation. It does **not** resolve the virtual path to a host path; callers are responsible for resolution via - ``_resolve_and_validate_user_data_path`` or ``_resolve_skills_path``. + ``resolve_and_validate_user_data_path`` or ``_resolve_skills_path``. Allowed virtual-path families: - ``/mnt/user-data/*`` — always allowed (read + write) @@ -636,6 +675,219 @@ def _resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState return str(resolved) +def _is_non_file_url_token(token: str) -> bool: + """Return True for URL tokens that should not be interpreted as paths.""" + values = [token] + if "=" in token: + values.append(token.split("=", 1)[1]) + + for value in values: + match = _URL_WITH_SCHEME_PATTERN.match(value) + if match and not value.lower().startswith("file://"): + return True + return False + + +def _non_file_url_spans(command: str) -> list[tuple[int, int]]: + spans = [] + for match in _URL_IN_COMMAND_PATTERN.finditer(command): + if not match.group().lower().startswith("file://"): + spans.append(match.span()) + return spans + + +def _is_in_spans(position: int, spans: list[tuple[int, int]]) -> bool: + return any(start <= position < end for start, end in spans) + + +def _has_dotdot_path_segment(token: str) -> bool: + if _is_non_file_url_token(token): + return False + return bool(_DOTDOT_PATH_SEGMENT_PATTERN.search(token)) + + +def _split_shell_tokens(command: str) -> list[str]: + try: + normalized = command.replace("\r\n", "\n").replace("\r", "\n").replace("\n", " ; ") + lexer = shlex.shlex(normalized, posix=True, punctuation_chars=True) + lexer.whitespace_split = True + lexer.commenters = "" + return list(lexer) + except ValueError: + # The shell will reject malformed quoting later; keep validation + # best-effort instead of turning syntax errors into security messages. + return command.split() + + +def _is_shell_command_separator(token: str) -> bool: + return token in _SHELL_COMMAND_SEPARATORS + + +def _is_shell_redirection_operator(token: str) -> bool: + return token in _SHELL_REDIRECTION_OPERATORS + + +def _is_shell_assignment(token: str) -> bool: + name, separator, _ = token.partition("=") + if not separator or not name: + return False + return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", name)) + + +def _is_allowed_local_bash_absolute_path(path: str, allowed_paths: list[str], *, allow_system_paths: bool) -> bool: + # Check for MCP filesystem server allowed paths + if any(path.startswith(allowed_path) or path == allowed_path.rstrip("/") for allowed_path in allowed_paths): + _reject_path_traversal(path) + return True + + if path == VIRTUAL_PATH_PREFIX or path.startswith(f"{VIRTUAL_PATH_PREFIX}/"): + _reject_path_traversal(path) + return True + + # Allow skills container path (resolved by tools.py before passing to sandbox) + if _is_skills_path(path): + _reject_path_traversal(path) + return True + + # Allow ACP workspace path (path-traversal check only) + if _is_acp_workspace_path(path): + _reject_path_traversal(path) + return True + + # Allow custom mount container paths + if _is_custom_mount_path(path): + _reject_path_traversal(path) + return True + + if allow_system_paths and any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES): + return True + + return False + + +def _next_cd_target(tokens: list[str], start_index: int) -> tuple[str | None, int]: + index = start_index + while index < len(tokens): + token = tokens[index] + if _is_shell_command_separator(token): + return None, index + if _is_shell_redirection_operator(token): + index += 2 + continue + if token == "--": + index += 1 + continue + if token in {"-L", "-P", "-e", "-@"}: + index += 1 + continue + if token.startswith("-") and token != "-": + index += 1 + continue + return token, index + 1 + return None, index + + +def _validate_local_bash_cwd_target(command_name: str, target: str | None, allowed_paths: list[str]) -> None: + if target is None or target == "-": + raise PermissionError(f"Unsafe working directory change in command: {command_name}. Use paths under {VIRTUAL_PATH_PREFIX}") + if target.startswith(("$", "`")): + raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}") + if target.startswith("~"): + raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}") + if target.startswith("/"): + _reject_path_traversal(target) + if not _is_allowed_local_bash_absolute_path(target, allowed_paths, allow_system_paths=False): + raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}") + + +def _looks_like_unsafe_cwd_target(target: str | None) -> bool: + if target is None: + return False + return target == "-" or target.startswith(("$", "`", "~", "/", "..")) or _has_dotdot_path_segment(target) + + +def _validate_local_bash_root_path_args(command_name: str, tokens: list[str], start_index: int) -> None: + if command_name not in _LOCAL_BASH_ROOT_PATH_COMMANDS: + return + + index = start_index + while index < len(tokens): + token = tokens[index] + if _is_shell_command_separator(token): + return + if _is_shell_redirection_operator(token): + index += 2 + continue + if token == "/" and not _is_non_file_url_token(token): + raise PermissionError(f"Unsafe absolute paths in command: /. Use paths under {VIRTUAL_PATH_PREFIX}") + index += 1 + + +def _validate_local_bash_shell_tokens(command: str, allowed_paths: list[str]) -> None: + """Conservatively reject relative path escapes missed by absolute-path scanning.""" + if re.search(r"\$\([^)]*\b(?:cd|pushd)\b", command): + raise PermissionError(f"Unsafe working directory change in command substitution. Use paths under {VIRTUAL_PATH_PREFIX}") + + tokens = _split_shell_tokens(command) + + for token in tokens: + if _is_shell_command_separator(token) or _is_shell_redirection_operator(token): + continue + if _has_dotdot_path_segment(token): + raise PermissionError("Access denied: path traversal detected") + + at_command_start = True + index = 0 + while index < len(tokens): + token = tokens[index] + + if _is_shell_command_separator(token): + at_command_start = True + index += 1 + continue + + if _is_shell_redirection_operator(token): + index += 1 + continue + + if at_command_start and _is_shell_assignment(token): + index += 1 + continue + + command_name = token.rsplit("/", 1)[-1] + if at_command_start and command_name in _LOCAL_BASH_COMMAND_PREFIX_KEYWORDS | _LOCAL_BASH_COMMAND_END_KEYWORDS: + index += 1 + continue + + if not at_command_start: + index += 1 + continue + + at_command_start = False + if command_name in _LOCAL_BASH_COMMAND_WRAPPERS and index + 1 < len(tokens): + wrapped_name = tokens[index + 1].rsplit("/", 1)[-1] + if wrapped_name in _LOCAL_BASH_CWD_COMMANDS: + target, next_index = _next_cd_target(tokens, index + 2) + _validate_local_bash_cwd_target(wrapped_name, target, allowed_paths) + index = next_index + continue + _validate_local_bash_root_path_args(wrapped_name, tokens, index + 2) + + if command_name not in _LOCAL_BASH_CWD_COMMANDS: + _validate_local_bash_root_path_args(command_name, tokens, index + 1) + index += 1 + continue + + target, next_index = _next_cd_target(tokens, index + 1) + _validate_local_bash_cwd_target(command_name, target, allowed_paths) + index = next_index + + +def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) -> str: + """Resolve a /mnt/user-data virtual path and validate it stays in bounds.""" + return _resolve_and_validate_user_data_path(path, thread_data) + + def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState | None) -> None: """Validate absolute paths in local-sandbox bash commands. @@ -661,33 +913,14 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState unsafe_paths: list[str] = [] allowed_paths = _get_mcp_allowed_paths() + _validate_local_bash_shell_tokens(command, allowed_paths) + url_spans = _non_file_url_spans(command) - for absolute_path in _ABSOLUTE_PATH_PATTERN.findall(command): - # Check for MCP filesystem server allowed paths - if any(absolute_path.startswith(path) or absolute_path == path.rstrip("/") for path in allowed_paths): - _reject_path_traversal(absolute_path) + for match in _ABSOLUTE_PATH_PATTERN.finditer(command): + if _is_in_spans(match.start(), url_spans): continue - - if absolute_path == VIRTUAL_PATH_PREFIX or absolute_path.startswith(f"{VIRTUAL_PATH_PREFIX}/"): - _reject_path_traversal(absolute_path) - continue - - # Allow skills container path (resolved by tools.py before passing to sandbox) - if _is_skills_path(absolute_path): - _reject_path_traversal(absolute_path) - continue - - # Allow ACP workspace path (path-traversal check only) - if _is_acp_workspace_path(absolute_path): - _reject_path_traversal(absolute_path) - continue - - # Allow custom mount container paths - if _is_custom_mount_path(absolute_path): - _reject_path_traversal(absolute_path) - continue - - if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES): + absolute_path = match.group() + if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True): continue unsafe_paths.append(absolute_path) diff --git a/backend/packages/harness/deerflow/skills/__init__.py b/backend/packages/harness/deerflow/skills/__init__.py index bbdca0650..4fcb7cc0d 100644 --- a/backend/packages/harness/deerflow/skills/__init__.py +++ b/backend/packages/harness/deerflow/skills/__init__.py @@ -1,4 +1,4 @@ -from .installer import SkillAlreadyExistsError, install_skill_from_archive +from .installer import SkillAlreadyExistsError, SkillSecurityScanError, ainstall_skill_from_archive, install_skill_from_archive from .loader import get_skills_root_path, load_skills from .types import Skill from .validation import ALLOWED_FRONTMATTER_PROPERTIES, _validate_skill_frontmatter @@ -10,5 +10,7 @@ __all__ = [ "ALLOWED_FRONTMATTER_PROPERTIES", "_validate_skill_frontmatter", "install_skill_from_archive", + "ainstall_skill_from_archive", "SkillAlreadyExistsError", + "SkillSecurityScanError", ] diff --git a/backend/packages/harness/deerflow/skills/installer.py b/backend/packages/harness/deerflow/skills/installer.py index f7234336e..e58367817 100644 --- a/backend/packages/harness/deerflow/skills/installer.py +++ b/backend/packages/harness/deerflow/skills/installer.py @@ -4,6 +4,8 @@ Pure business logic — no FastAPI/HTTP dependencies. Both Gateway and Client delegate to these functions. """ +import asyncio +import concurrent.futures import logging import posixpath import shutil @@ -13,15 +15,23 @@ import zipfile from pathlib import Path, PurePosixPath, PureWindowsPath from deerflow.skills.loader import get_skills_root_path +from deerflow.skills.security_scanner import scan_skill_content from deerflow.skills.validation import _validate_skill_frontmatter logger = logging.getLogger(__name__) +_PROMPT_INPUT_DIRS = {"references", "templates"} +_PROMPT_INPUT_SUFFIXES = frozenset({".json", ".markdown", ".md", ".rst", ".txt", ".yaml", ".yml"}) + class SkillAlreadyExistsError(ValueError): """Raised when a skill with the same name is already installed.""" +class SkillSecurityScanError(ValueError): + """Raised when a skill archive fails security scanning.""" + + def is_unsafe_zip_member(info: zipfile.ZipInfo) -> bool: """Return True if the zip member path is absolute or attempts directory traversal.""" name = info.filename @@ -114,7 +124,78 @@ def safe_extract_skill_archive( dst.write(chunk) -def install_skill_from_archive( +def _is_script_support_file(rel_path: Path) -> bool: + return bool(rel_path.parts) and rel_path.parts[0] == "scripts" + + +def _should_scan_support_file(rel_path: Path) -> bool: + if _is_script_support_file(rel_path): + return True + return bool(rel_path.parts) and rel_path.parts[0] in _PROMPT_INPUT_DIRS and rel_path.suffix.lower() in _PROMPT_INPUT_SUFFIXES + + +def _move_staged_skill_into_reserved_target(staging_target: Path, target: Path) -> None: + installed = False + reserved = False + try: + target.mkdir(mode=0o700) + reserved = True + for child in staging_target.iterdir(): + shutil.move(str(child), target / child.name) + installed = True + except FileExistsError as e: + raise SkillAlreadyExistsError(f"Skill '{target.name}' already exists") from e + finally: + if reserved and not installed and target.exists(): + shutil.rmtree(target) + + +async def _scan_skill_file_or_raise(skill_dir: Path, path: Path, skill_name: str, *, executable: bool) -> None: + rel_path = path.relative_to(skill_dir).as_posix() + location = f"{skill_name}/{rel_path}" + try: + content = path.read_text(encoding="utf-8") + except UnicodeDecodeError as e: + raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': {location} must be valid UTF-8") from e + + try: + result = await scan_skill_content(content, executable=executable, location=location) + except Exception as e: + raise SkillSecurityScanError(f"Security scan failed for {location}: {e}") from e + + decision = getattr(result, "decision", None) + reason = str(getattr(result, "reason", "") or "No reason provided.") + if decision == "block": + if rel_path == "SKILL.md": + raise SkillSecurityScanError(f"Security scan blocked skill '{skill_name}': {reason}") + raise SkillSecurityScanError(f"Security scan blocked {location}: {reason}") + if executable and decision != "allow": + raise SkillSecurityScanError(f"Security scan rejected executable {location}: {reason}") + if decision not in {"allow", "warn"}: + raise SkillSecurityScanError(f"Security scan failed for {location}: invalid scanner decision {decision!r}") + + +async def _scan_skill_archive_contents_or_raise(skill_dir: Path, skill_name: str) -> None: + """Run the skill security scanner against all installable text and script files.""" + skill_md = skill_dir / "SKILL.md" + await _scan_skill_file_or_raise(skill_dir, skill_md, skill_name, executable=False) + + for path in sorted(skill_dir.rglob("*")): + if not path.is_file(): + continue + + rel_path = path.relative_to(skill_dir) + if rel_path == Path("SKILL.md"): + continue + if path.name == "SKILL.md": + raise SkillSecurityScanError(f"Security scan failed for skill '{skill_name}': nested SKILL.md is not allowed at {skill_name}/{rel_path.as_posix()}") + if not _should_scan_support_file(rel_path): + continue + + await _scan_skill_file_or_raise(skill_dir, path, skill_name, executable=_is_script_support_file(rel_path)) + + +async def ainstall_skill_from_archive( zip_path: str | Path, *, skills_root: Path | None = None, @@ -173,7 +254,12 @@ def install_skill_from_archive( if target.exists(): raise SkillAlreadyExistsError(f"Skill '{skill_name}' already exists") - shutil.copytree(skill_dir, target) + await _scan_skill_archive_contents_or_raise(skill_dir, skill_name) + + with tempfile.TemporaryDirectory(prefix=f".installing-{skill_name}-", dir=custom_dir) as staging_root: + staging_target = Path(staging_root) / skill_name + shutil.copytree(skill_dir, staging_target) + _move_staged_skill_into_reserved_target(staging_target, target) logger.info("Skill %r installed to %s", skill_name, target) return { @@ -181,3 +267,24 @@ def install_skill_from_archive( "skill_name": skill_name, "message": f"Skill '{skill_name}' installed successfully", } + + +def _run_async_install(coro): + try: + loop = asyncio.get_running_loop() + except RuntimeError: + loop = None + + if loop is not None and loop.is_running(): + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: + return executor.submit(asyncio.run, coro).result() + return asyncio.run(coro) + + +def install_skill_from_archive( + zip_path: str | Path, + *, + skills_root: Path | None = None, +) -> dict: + """Install a skill from a .skill archive (ZIP).""" + return _run_async_install(ainstall_skill_from_archive(zip_path, skills_root=skills_root)) diff --git a/backend/packages/harness/deerflow/tools/builtins/view_image_tool.py b/backend/packages/harness/deerflow/tools/builtins/view_image_tool.py index e47ab1938..3dedcab70 100644 --- a/backend/packages/harness/deerflow/tools/builtins/view_image_tool.py +++ b/backend/packages/harness/deerflow/tools/builtins/view_image_tool.py @@ -8,7 +8,42 @@ from langchain_core.messages import ToolMessage from langgraph.types import Command from langgraph.typing import ContextT -from deerflow.agents.thread_state import ThreadState +from deerflow.agents.thread_state import ThreadDataState, ThreadState +from deerflow.config.paths import VIRTUAL_PATH_PREFIX + +_ALLOWED_IMAGE_VIRTUAL_ROOTS = ( + f"{VIRTUAL_PATH_PREFIX}/workspace", + f"{VIRTUAL_PATH_PREFIX}/uploads", + f"{VIRTUAL_PATH_PREFIX}/outputs", +) +_ALLOWED_IMAGE_VIRTUAL_ROOTS_TEXT = ", ".join(_ALLOWED_IMAGE_VIRTUAL_ROOTS) +_MAX_IMAGE_BYTES = 20 * 1024 * 1024 +_EXTENSION_TO_MIME = { + ".jpg": "image/jpeg", + ".jpeg": "image/jpeg", + ".png": "image/png", + ".webp": "image/webp", +} + + +def _is_allowed_image_virtual_path(image_path: str) -> bool: + return any(image_path == root or image_path.startswith(f"{root}/") for root in _ALLOWED_IMAGE_VIRTUAL_ROOTS) + + +def _detect_image_mime(image_data: bytes) -> str | None: + if image_data.startswith(b"\xff\xd8\xff"): + return "image/jpeg" + if image_data.startswith(b"\x89PNG\r\n\x1a\n"): + return "image/png" + if len(image_data) >= 12 and image_data.startswith(b"RIFF") and image_data[8:12] == b"WEBP": + return "image/webp" + return None + + +def _sanitize_image_error(error: Exception, thread_data: ThreadDataState | None) -> str: + from deerflow.sandbox.tools import mask_local_paths_in_output + + return mask_local_paths_in_output(f"{type(error).__name__}: {error}", thread_data) @tool("view_image", parse_docstring=True) @@ -29,22 +64,39 @@ def view_image_tool( - For multiple files at once (use present_files instead) Args: - image_path: Absolute path to the image file. Common formats supported: jpg, jpeg, png, webp. + image_path: Absolute /mnt/user-data virtual path to the image file. Common formats supported: jpg, jpeg, png, webp. """ - from deerflow.sandbox.tools import get_thread_data, replace_virtual_path + from deerflow.sandbox.exceptions import SandboxRuntimeError + from deerflow.sandbox.tools import ( + get_thread_data, + resolve_and_validate_user_data_path, + validate_local_tool_path, + ) - # Replace virtual path with actual path - # /mnt/user-data/* paths are mapped to thread-specific directories thread_data = get_thread_data(runtime) - actual_path = replace_virtual_path(image_path, thread_data) - # Validate that the path is absolute - path = Path(actual_path) - if not path.is_absolute(): + if not _is_allowed_image_virtual_path(image_path): return Command( - update={"messages": [ToolMessage(f"Error: Path must be absolute, got: {image_path}", tool_call_id=tool_call_id)]}, + update={ + "messages": [ + ToolMessage( + f"Error: Only image paths under {_ALLOWED_IMAGE_VIRTUAL_ROOTS_TEXT} are allowed", + tool_call_id=tool_call_id, + ) + ] + }, ) + try: + validate_local_tool_path(image_path, thread_data, read_only=True) + actual_path = resolve_and_validate_user_data_path(image_path, thread_data) + except (PermissionError, SandboxRuntimeError) as e: + return Command( + update={"messages": [ToolMessage(f"Error: {str(e)}", tool_call_id=tool_call_id)]}, + ) + + path = Path(actual_path) + # Validate that the file exists if not path.exists(): return Command( @@ -58,34 +110,49 @@ def view_image_tool( ) # Validate image extension - valid_extensions = {".jpg", ".jpeg", ".png", ".webp"} - if path.suffix.lower() not in valid_extensions: + expected_mime_type = _EXTENSION_TO_MIME.get(path.suffix.lower()) + if expected_mime_type is None: return Command( - update={"messages": [ToolMessage(f"Error: Unsupported image format: {path.suffix}. Supported formats: {', '.join(valid_extensions)}", tool_call_id=tool_call_id)]}, + update={"messages": [ToolMessage(f"Error: Unsupported image format: {path.suffix}. Supported formats: {', '.join(_EXTENSION_TO_MIME)}", tool_call_id=tool_call_id)]}, ) # Detect MIME type from file extension mime_type, _ = mimetypes.guess_type(actual_path) if mime_type is None: - # Fallback to default MIME types for common image formats - extension_to_mime = { - ".jpg": "image/jpeg", - ".jpeg": "image/jpeg", - ".png": "image/png", - ".webp": "image/webp", - } - mime_type = extension_to_mime.get(path.suffix.lower(), "application/octet-stream") + mime_type = expected_mime_type + + try: + image_size = path.stat().st_size + except OSError as e: + return Command( + update={"messages": [ToolMessage(f"Error reading image metadata: {_sanitize_image_error(e, thread_data)}", tool_call_id=tool_call_id)]}, + ) + if image_size > _MAX_IMAGE_BYTES: + return Command( + update={"messages": [ToolMessage(f"Error: Image file is too large: {image_size} bytes. Maximum supported size is {_MAX_IMAGE_BYTES} bytes", tool_call_id=tool_call_id)]}, + ) # Read image file and convert to base64 try: with open(actual_path, "rb") as f: image_data = f.read() - image_base64 = base64.b64encode(image_data).decode("utf-8") except Exception as e: return Command( - update={"messages": [ToolMessage(f"Error reading image file: {str(e)}", tool_call_id=tool_call_id)]}, + update={"messages": [ToolMessage(f"Error reading image file: {_sanitize_image_error(e, thread_data)}", tool_call_id=tool_call_id)]}, ) + detected_mime_type = _detect_image_mime(image_data) + if detected_mime_type is None: + return Command( + update={"messages": [ToolMessage("Error: File contents do not match a supported image format", tool_call_id=tool_call_id)]}, + ) + if detected_mime_type != expected_mime_type: + return Command( + update={"messages": [ToolMessage(f"Error: Image contents are {detected_mime_type}, but file extension indicates {expected_mime_type}", tool_call_id=tool_call_id)]}, + ) + mime_type = detected_mime_type + image_base64 = base64.b64encode(image_data).decode("utf-8") + # Update viewed_images in state # The merge_viewed_images reducer will handle merging with existing images new_viewed_images = {image_path: {"base64": image_base64, "mime_type": mime_type}} diff --git a/backend/tests/test_aio_sandbox_local_backend.py b/backend/tests/test_aio_sandbox_local_backend.py index d0b99bec1..d74786682 100644 --- a/backend/tests/test_aio_sandbox_local_backend.py +++ b/backend/tests/test_aio_sandbox_local_backend.py @@ -1,4 +1,8 @@ -from deerflow.community.aio_sandbox.local_backend import _format_container_mount +import logging +import os +from types import SimpleNamespace + +from deerflow.community.aio_sandbox.local_backend import LocalContainerBackend, _format_container_command_for_log, _format_container_mount, _redact_container_command_for_log def test_format_container_mount_uses_mount_syntax_for_docker_windows_paths(): @@ -26,3 +30,89 @@ def test_format_container_mount_keeps_volume_syntax_for_apple_container(): "-v", "/host/path:/mnt/path:ro", ] + + +def test_redact_container_command_for_log_redacts_env_values(): + redacted = _redact_container_command_for_log( + [ + "docker", + "run", + "-e", + "API_KEY=secret-value", + "--env=TOKEN=token-value", + "--name", + "sandbox", + "image", + ] + ) + + assert "API_KEY=" in redacted + assert "--env=TOKEN=" in redacted + assert "secret-value" not in " ".join(redacted) + assert "token-value" not in " ".join(redacted) + + +def test_redact_container_command_for_log_keeps_inherited_env_names(): + redacted = _redact_container_command_for_log( + [ + "docker", + "run", + "-e", + "API_KEY", + "--env=TOKEN", + "--name", + "sandbox", + "image", + ] + ) + + assert redacted == [ + "docker", + "run", + "-e", + "API_KEY", + "--env=TOKEN", + "--name", + "sandbox", + "image", + ] + + +def test_format_container_command_for_log_uses_windows_quoting(monkeypatch): + monkeypatch.setattr(os, "name", "nt") + + command = _format_container_command_for_log(["docker", "run", "--name", "sandbox one", "image"]) + + assert command == 'docker run --name "sandbox one" image' + + +def test_start_container_logs_redacted_env_values(monkeypatch, caplog): + backend = LocalContainerBackend( + image="sandbox:latest", + base_port=8080, + container_prefix="sandbox", + config_mounts=[], + environment={"API_KEY": "secret-value", "NORMAL": "visible-value"}, + ) + monkeypatch.setattr(backend, "_runtime", "docker") + + captured_cmd: list[str] = [] + + def fake_run(cmd, **kwargs): + captured_cmd.extend(cmd) + return SimpleNamespace(stdout="container-id\n", stderr="", returncode=0) + + monkeypatch.setattr("subprocess.run", fake_run) + + with caplog.at_level(logging.INFO, logger="deerflow.community.aio_sandbox.local_backend"): + backend._start_container("sandbox-test", 18080) + + joined_cmd = " ".join(captured_cmd) + assert "API_KEY=secret-value" in joined_cmd + assert "NORMAL=visible-value" in joined_cmd + + log_output = "\n".join(record.getMessage() for record in caplog.records) + assert "API_KEY=" in log_output + assert "NORMAL=" in log_output + assert "secret-value" not in log_output + assert "visible-value" not in log_output diff --git a/backend/tests/test_client.py b/backend/tests/test_client.py index 3d8d0a9d5..5bc9f2a44 100644 --- a/backend/tests/test_client.py +++ b/backend/tests/test_client.py @@ -49,6 +49,17 @@ def client(mock_app_config): return DeerFlowClient() +@pytest.fixture +def allow_skill_security_scan(): + async def _scan(*args, **kwargs): + from deerflow.skills.security_scanner import ScanResult + + return ScanResult(decision="allow", reason="ok") + + with patch("deerflow.skills.installer.scan_skill_content", _scan): + yield + + # --------------------------------------------------------------------------- # __init__ # --------------------------------------------------------------------------- @@ -1195,7 +1206,7 @@ class TestSkillsManagement: with pytest.raises(ValueError, match="not found"): client.update_skill("nonexistent", enabled=True) - def test_install_skill(self, client): + def test_install_skill(self, client, allow_skill_security_scan): with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) @@ -2033,7 +2044,7 @@ class TestScenarioMemoryWorkflow: class TestScenarioSkillInstallAndUse: """Scenario: Install a skill → verify it appears → toggle it.""" - def test_install_then_toggle(self, client): + def test_install_then_toggle(self, client, allow_skill_security_scan): """Install .skill archive → list to verify → disable → verify disabled.""" with tempfile.TemporaryDirectory() as tmp: tmp_path = Path(tmp) @@ -2279,7 +2290,7 @@ class TestGatewayConformance: parsed = SkillResponse(**result) assert parsed.name == "web-search" - def test_install_skill(self, client, tmp_path): + def test_install_skill(self, client, tmp_path, allow_skill_security_scan): skill_dir = tmp_path / "my-skill" skill_dir.mkdir() (skill_dir / "SKILL.md").write_text("---\nname: my-skill\ndescription: A test skill\n---\nBody\n") @@ -2477,7 +2488,7 @@ class TestInstallSkillSecurity: with pytest.raises(ValueError, match="unsafe"): client.install_skill(archive) - def test_symlinks_skipped_during_extraction(self, client): + def test_symlinks_skipped_during_extraction(self, client, allow_skill_security_scan): """Symlink entries in the archive are skipped (never written to disk).""" import stat as stat_mod diff --git a/backend/tests/test_client_e2e.py b/backend/tests/test_client_e2e.py index 6c688933a..c4ba36b98 100644 --- a/backend/tests/test_client_e2e.py +++ b/backend/tests/test_client_e2e.py @@ -525,6 +525,15 @@ class TestArtifactAccess: class TestSkillInstallation: """install_skill() with real ZIP handling and filesystem.""" + @pytest.fixture(autouse=True) + def _allow_skill_security_scan(self, monkeypatch): + async def _scan(*args, **kwargs): + from deerflow.skills.security_scanner import ScanResult + + return ScanResult(decision="allow", reason="ok") + + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + @pytest.fixture(autouse=True) def _isolate_skills_dir(self, tmp_path, monkeypatch): """Redirect skill installation to a temp directory.""" diff --git a/backend/tests/test_create_deerflow_agent.py b/backend/tests/test_create_deerflow_agent.py index 03fee2055..fb403ed7f 100644 --- a/backend/tests/test_create_deerflow_agent.py +++ b/backend/tests/test_create_deerflow_agent.py @@ -116,10 +116,22 @@ def test_middleware_and_features_conflict(): # --------------------------------------------------------------------------- -# 7. Vision feature auto-injects view_image_tool +# 7. Vision feature auto-injects view_image_tool when thread data is available # --------------------------------------------------------------------------- @patch("deerflow.agents.factory.create_agent") def test_vision_injects_view_image_tool(mock_create_agent): + mock_create_agent.return_value = MagicMock() + feat = RuntimeFeatures(vision=True, sandbox=True) + + create_deerflow_agent(_make_mock_model(), features=feat) + + call_kwargs = mock_create_agent.call_args[1] + tool_names = [t.name for t in call_kwargs["tools"]] + assert "view_image" in tool_names + + +@patch("deerflow.agents.factory.create_agent") +def test_vision_without_sandbox_does_not_inject_view_image_tool(mock_create_agent): mock_create_agent.return_value = MagicMock() feat = RuntimeFeatures(vision=True, sandbox=False) @@ -127,7 +139,7 @@ def test_vision_injects_view_image_tool(mock_create_agent): call_kwargs = mock_create_agent.call_args[1] tool_names = [t.name for t in call_kwargs["tools"]] - assert "view_image" in tool_names + assert "view_image" not in tool_names def test_view_image_middleware_preserves_viewed_images_reducer(): @@ -301,11 +313,11 @@ def test_always_on_error_handling(mock_create_agent): # --------------------------------------------------------------------------- -# 17. Vision with custom middleware still injects tool +# 17. Vision with custom middleware follows thread-data availability # --------------------------------------------------------------------------- @patch("deerflow.agents.factory.create_agent") -def test_vision_custom_middleware_still_injects_tool(mock_create_agent): - """Custom vision middleware still gets the view_image_tool auto-injected.""" +def test_vision_custom_middleware_without_sandbox_does_not_inject_tool(mock_create_agent): + """Custom vision middleware without thread data does not get view_image_tool auto-injected.""" from langchain.agents.middleware import AgentMiddleware mock_create_agent.return_value = MagicMock() @@ -319,7 +331,7 @@ def test_vision_custom_middleware_still_injects_tool(mock_create_agent): call_kwargs = mock_create_agent.call_args[1] tool_names = [t.name for t in call_kwargs["tools"]] - assert "view_image" in tool_names + assert "view_image" not in tool_names # =========================================================================== diff --git a/backend/tests/test_local_sandbox_provider_mounts.py b/backend/tests/test_local_sandbox_provider_mounts.py index 328b1d48d..1468e005c 100644 --- a/backend/tests/test_local_sandbox_provider_mounts.py +++ b/backend/tests/test_local_sandbox_provider_mounts.py @@ -1,4 +1,5 @@ import errno +from pathlib import Path from types import SimpleNamespace from unittest.mock import patch @@ -8,6 +9,13 @@ from deerflow.sandbox.local.local_sandbox import LocalSandbox, PathMapping from deerflow.sandbox.local.local_sandbox_provider import LocalSandboxProvider +def _symlink_to(target, link, *, target_is_directory=False): + try: + link.symlink_to(target, target_is_directory=target_is_directory) + except (NotImplementedError, OSError) as exc: + pytest.skip(f"symlinks are not available: {exc}") + + class TestPathMapping: def test_path_mapping_dataclass(self): mapping = PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True) @@ -29,7 +37,7 @@ class TestLocalSandboxPathResolution: ], ) resolved = sandbox._resolve_path("/mnt/skills") - assert resolved == "/home/user/skills" + assert resolved == str(Path("/home/user/skills").resolve()) def test_resolve_path_nested_path(self): sandbox = LocalSandbox( @@ -39,7 +47,7 @@ class TestLocalSandboxPathResolution: ], ) resolved = sandbox._resolve_path("/mnt/skills/agent/prompt.py") - assert resolved == "/home/user/skills/agent/prompt.py" + assert resolved == str(Path("/home/user/skills/agent/prompt.py").resolve()) def test_resolve_path_no_mapping(self): sandbox = LocalSandbox( @@ -61,7 +69,7 @@ class TestLocalSandboxPathResolution: ) resolved = sandbox._resolve_path("/mnt/skills/file.py") # Should match /mnt/skills first (longer prefix) - assert resolved == "/home/user/skills/file.py" + assert resolved == str(Path("/home/user/skills/file.py").resolve()) def test_reverse_resolve_path_exact_match(self, tmp_path): skills_dir = tmp_path / "skills" @@ -175,6 +183,157 @@ class TestReadOnlyPath: assert exc_info.value.errno == errno.EROFS +class TestSymlinkEscapes: + def test_read_file_blocks_symlink_escape_from_mount(self, tmp_path): + mount_dir = tmp_path / "mount" + mount_dir.mkdir() + outside_dir = tmp_path / "outside" + outside_dir.mkdir() + (outside_dir / "secret.txt").write_text("secret") + _symlink_to(outside_dir, mount_dir / "escape", target_is_directory=True) + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(mount_dir), read_only=False), + ], + ) + + with pytest.raises(PermissionError) as exc_info: + sandbox.read_file("/mnt/data/escape/secret.txt") + + assert exc_info.value.errno == errno.EACCES + + def test_write_file_blocks_symlink_escape_from_mount(self, tmp_path): + mount_dir = tmp_path / "mount" + mount_dir.mkdir() + outside_dir = tmp_path / "outside" + outside_dir.mkdir() + victim = outside_dir / "victim.txt" + victim.write_text("original") + _symlink_to(outside_dir, mount_dir / "escape", target_is_directory=True) + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(mount_dir), read_only=False), + ], + ) + + with pytest.raises(PermissionError) as exc_info: + sandbox.write_file("/mnt/data/escape/victim.txt", "changed") + + assert exc_info.value.errno == errno.EACCES + assert victim.read_text() == "original" + + def test_write_file_uses_matched_read_only_mount_for_symlink_target(self, tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + writable_dir = repo_dir / "writable" + writable_dir.mkdir() + _symlink_to(writable_dir, repo_dir / "link-to-writable", target_is_directory=True) + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/repo", local_path=str(repo_dir), read_only=True), + PathMapping(container_path="/mnt/repo/writable", local_path=str(writable_dir), read_only=False), + ], + ) + + with pytest.raises(OSError) as exc_info: + sandbox.write_file("/mnt/repo/link-to-writable/file.txt", "bypass") + + assert exc_info.value.errno == errno.EROFS + assert not (writable_dir / "file.txt").exists() + + def test_list_dir_does_not_follow_symlink_escape_from_mount(self, tmp_path): + mount_dir = tmp_path / "mount" + mount_dir.mkdir() + outside_dir = tmp_path / "outside" + outside_dir.mkdir() + (outside_dir / "secret.txt").write_text("secret") + _symlink_to(outside_dir, mount_dir / "escape", target_is_directory=True) + (mount_dir / "visible.txt").write_text("visible") + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(mount_dir), read_only=False), + ], + ) + + entries = sandbox.list_dir("/mnt/data", max_depth=2) + + assert "/mnt/data/visible.txt" in entries + assert all("secret.txt" not in entry for entry in entries) + assert all("outside" not in entry for entry in entries) + + def test_list_dir_formats_internal_directory_symlink_like_directory(self, tmp_path): + mount_dir = tmp_path / "mount" + nested_dir = mount_dir / "nested" + linked_dir = nested_dir / "linked-dir" + linked_dir.mkdir(parents=True) + _symlink_to(linked_dir, mount_dir / "dir-link", target_is_directory=True) + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(mount_dir), read_only=False), + ], + ) + + entries = sandbox.list_dir("/mnt/data", max_depth=1) + + assert "/mnt/data/nested/" in entries + assert "/mnt/data/nested/linked-dir/" in entries + assert "/mnt/data/dir-link" not in entries + + def test_write_file_blocks_symlink_into_nested_read_only_mount(self, tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + protected_dir = repo_dir / "protected" + protected_dir.mkdir() + _symlink_to(protected_dir, repo_dir / "link-to-protected", target_is_directory=True) + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/repo", local_path=str(repo_dir), read_only=False), + PathMapping(container_path="/mnt/repo/protected", local_path=str(protected_dir), read_only=True), + ], + ) + + with pytest.raises(OSError) as exc_info: + sandbox.write_file("/mnt/repo/link-to-protected/file.txt", "bypass") + + assert exc_info.value.errno == errno.EROFS + assert not (protected_dir / "file.txt").exists() + + def test_update_file_blocks_symlink_into_nested_read_only_mount(self, tmp_path): + repo_dir = tmp_path / "repo" + repo_dir.mkdir() + protected_dir = repo_dir / "protected" + protected_dir.mkdir() + existing = protected_dir / "file.txt" + existing.write_bytes(b"original") + _symlink_to(protected_dir, repo_dir / "link-to-protected", target_is_directory=True) + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/repo", local_path=str(repo_dir), read_only=False), + PathMapping(container_path="/mnt/repo/protected", local_path=str(protected_dir), read_only=True), + ], + ) + + with pytest.raises(OSError) as exc_info: + sandbox.update_file("/mnt/repo/link-to-protected/file.txt", b"changed") + + assert exc_info.value.errno == errno.EROFS + assert existing.read_bytes() == b"original" + + class TestMultipleMounts: def test_multiple_read_write_mounts(self, tmp_path): skills_dir = tmp_path / "skills" diff --git a/backend/tests/test_mindie_provider.py b/backend/tests/test_mindie_provider.py index 552966c37..78bc0d972 100644 --- a/backend/tests/test_mindie_provider.py +++ b/backend/tests/test_mindie_provider.py @@ -91,7 +91,7 @@ class TestFixMessages: assert isinstance(out, AIMessage) assert "" in out.content assert "" in out.content - assert '"London"' in out.content + assert "London" in out.content assert not getattr(out, "tool_calls", []) def test_ai_message_text_preserved_before_xml(self): @@ -116,6 +116,22 @@ class TestFixMessages: assert "" in content assert "" in content + def test_ai_message_tool_args_are_xml_escaped(self): + msg = AIMessage( + content="", + tool_calls=[ + { + "name": "fn<&>", + "args": {"k<&>": "v<&>"}, + "id": "id1", + } + ], + ) + result = _fix_messages([msg]) + content = result[0].content + assert "" in content + assert "v<&>" in content + # ── ToolMessage → HumanMessage ──────────────────────────────────────────── def test_tool_message_becomes_human_message(self): @@ -185,6 +201,15 @@ class TestParseXmlToolCalls: assert calls[0]["name"] == "a" assert calls[1]["name"] == "b" + def test_nested_tool_call_blocks_do_not_break_parsing(self): + content = "12" + clean, calls = _parse_xml_tool_call_to_dict(content) + assert clean == "" + assert len(calls) == 1 + assert calls[0]["name"] == "outer" + assert calls[0]["args"] == {"q": 1} + assert "x" not in calls[0]["args"] + def test_text_before_tool_call_preserved(self): content = "Here is the answer.\nv" clean, calls = _parse_xml_tool_call_to_dict(content) @@ -226,6 +251,12 @@ class TestParseXmlToolCalls: _, c2 = _parse_xml_tool_call_to_dict(block) assert c1[0]["id"] != c2[0]["id"] + def test_escaped_entities_are_unescaped(self): + content = "v<&>" + _, calls = _parse_xml_tool_call_to_dict(content) + assert calls[0]["name"] == "fn<&>" + assert calls[0]["args"]["k<&>"] == "v<&>" + # ═════════════════════════════════════════════════════════════════════════════ # 3. MindIEChatModel._patch_result_with_tools @@ -244,6 +275,12 @@ class TestPatchResult: patched = model._patch_result_with_tools(result) assert patched.generations[0].message.content == "line1\nline2" + def test_escaped_newlines_inside_code_fence_preserved(self): + model = self._model() + result = _make_chat_result('text\\n```json\n{"k":"a\\\\nb"}\n```\\nend') + patched = model._patch_result_with_tools(result) + assert patched.generations[0].message.content == 'text\n```json\n{"k":"a\\\\nb"}\n```\nend' + def test_xml_tool_calls_extracted(self): model = self._model() content = "1+1" @@ -281,6 +318,50 @@ class TestPatchResult: assert patched is not None +class TestMindIEInit: + def test_timeout_kwargs_are_normalized(self): + captured = {} + + def fake_init(self, **kwargs): + captured.update(kwargs) + + with patch("deerflow.models.mindie_provider.ChatOpenAI.__init__", new=fake_init): + MindIEChatModel( + model="mindie-test", + api_key="test-key", + connect_timeout=1.0, + read_timeout=2.0, + write_timeout=3.0, + pool_timeout=4.0, + ) + + timeout = captured.get("timeout") + assert timeout is not None + assert timeout.connect == 1.0 + assert timeout.read == 2.0 + assert timeout.write == 3.0 + assert timeout.pool == 4.0 + + def test_explicit_timeout_takes_precedence(self): + captured = {} + + def fake_init(self, **kwargs): + captured.update(kwargs) + + with patch("deerflow.models.mindie_provider.ChatOpenAI.__init__", new=fake_init): + MindIEChatModel( + model="mindie-test", + api_key="test-key", + timeout=9.0, + connect_timeout=1.0, + read_timeout=2.0, + write_timeout=3.0, + pool_timeout=4.0, + ) + + assert captured.get("timeout") == 9.0 + + # ═════════════════════════════════════════════════════════════════════════════ # 4. MindIEChatModel._generate (sync) # ═════════════════════════════════════════════════════════════════════════════ diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py index 8c67cd50a..57466a0fe 100644 --- a/backend/tests/test_sandbox_tools_security.py +++ b/backend/tests/test_sandbox_tools_security.py @@ -346,6 +346,104 @@ def test_validate_local_bash_command_paths_blocks_traversal_in_skills() -> None: ) +@pytest.mark.parametrize( + "command", + [ + "cat ../uploads/secret.txt", + "cat subdir/../../secret.txt", + "python script.py --input=../secret.txt", + "echo ok > ../outputs/result.txt", + ], +) +def test_validate_local_bash_command_paths_blocks_relative_dotdot_segments(command: str) -> None: + with pytest.raises(PermissionError, match="path traversal"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_cd_root_escape() -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths("cd / && cat etc/passwd", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_cd_parent_escape() -> None: + with pytest.raises(PermissionError, match="path traversal"): + validate_local_bash_command_paths("cd .. && cat etc/passwd", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_cd_env_var_escape() -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths("cd $HOME && cat .ssh/id_rsa", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_multiline_cd_escape() -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths("echo ok\ncd $HOME && cat .ssh/id_rsa", _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + "command cd / && cat etc/passwd", + "builtin cd $HOME && cat .ssh/id_rsa", + "if cd $HOME; then cat .ssh/id_rsa; fi", + "{ cd /; cat etc/passwd; }", + 'echo "$(cd $HOME && cat .ssh/id_rsa)"', + ], +) +def test_validate_local_bash_command_paths_blocks_complex_cd_escapes(command: str) -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + "ls /", + "ln -s / root && cat root/etc/passwd", + "command ls /", + ], +) +def test_validate_local_bash_command_paths_blocks_bare_root_path(command: str) -> None: + with pytest.raises(PermissionError, match="Unsafe absolute paths"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + "echo cd /", + "printf '%s\\n' pushd /", + ], +) +def test_validate_local_bash_command_paths_allows_cd_words_as_arguments(command: str) -> None: + validate_local_bash_command_paths(command, _THREAD_DATA) + + +def test_validate_local_bash_command_paths_allows_workspace_relative_paths() -> None: + validate_local_bash_command_paths( + "mkdir -p reports && python script.py data/input.csv > reports/out.txt", + _THREAD_DATA, + ) + + +def test_validate_local_bash_command_paths_allows_cd_virtual_workspace_with_relative_paths() -> None: + validate_local_bash_command_paths( + "cd /mnt/user-data/workspace && cat data/input.csv > reports/out.txt", + _THREAD_DATA, + ) + + +def test_validate_local_bash_command_paths_allows_http_url_dotdot_segments() -> None: + validate_local_bash_command_paths( + "curl https://example.com/packages/../archive.tar.gz -o /mnt/user-data/workspace/archive.tar.gz", + _THREAD_DATA, + ) + validate_local_bash_command_paths( + "curl http://example.com/packages/../archive.tar.gz -o /mnt/user-data/workspace/archive.tar.gz", + _THREAD_DATA, + ) + + def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> None: runtime = SimpleNamespace( state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()}, @@ -367,6 +465,28 @@ def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> assert "Host bash execution is disabled" in result +def test_bash_tool_blocks_relative_traversal_before_host_execution(monkeypatch) -> None: + runtime = SimpleNamespace( + state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()}, + context={"thread_id": "thread-1"}, + ) + + monkeypatch.setattr( + "deerflow.sandbox.tools.ensure_sandbox_initialized", + lambda runtime: SimpleNamespace(execute_command=lambda command: pytest.fail("unsafe command should not execute")), + ) + monkeypatch.setattr("deerflow.sandbox.tools.ensure_thread_directories_exist", lambda runtime: None) + monkeypatch.setattr("deerflow.sandbox.tools.is_host_bash_allowed", lambda: True) + + result = bash_tool.func( + runtime=runtime, + description="run command", + command="cat ../uploads/secret.txt", + ) + + assert "path traversal" in result + + # ---------- Skills path tests ---------- diff --git a/backend/tests/test_skills_custom_router.py b/backend/tests/test_skills_custom_router.py index 44bf9ff7c..3c6b50849 100644 --- a/backend/tests/test_skills_custom_router.py +++ b/backend/tests/test_skills_custom_router.py @@ -1,5 +1,6 @@ import errno import json +import zipfile from pathlib import Path from types import SimpleNamespace @@ -40,6 +41,85 @@ def _make_test_app(config) -> FastAPI: app.state.config = config app.include_router(skills_router.router) return app + + +def _make_skill_archive(tmp_path: Path, name: str, content: str | None = None) -> Path: + archive = tmp_path / f"{name}.skill" + skill_content = content or _skill_content(name) + with zipfile.ZipFile(archive, "w") as zf: + zf.writestr(f"{name}/SKILL.md", skill_content) + return archive + + +def test_install_skill_archive_runs_security_scan(monkeypatch, tmp_path): + skills_root = tmp_path / "skills" + (skills_root / "custom").mkdir(parents=True) + archive = _make_skill_archive(tmp_path, "archive-skill") + scan_calls = [] + refresh_calls = [] + + async def _scan(content, *, executable, location): + from deerflow.skills.security_scanner import ScanResult + + scan_calls.append({"content": content, "executable": executable, "location": location}) + return ScanResult(decision="allow", reason="ok") + + async def _refresh(): + refresh_calls.append("refresh") + + monkeypatch.setattr(skills_router, "resolve_thread_virtual_path", lambda thread_id, path: archive) + monkeypatch.setattr("deerflow.skills.installer.get_skills_root_path", lambda: skills_root) + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + monkeypatch.setattr(skills_router, "refresh_skills_system_prompt_cache_async", _refresh) + + app = FastAPI() + app.include_router(skills_router.router) + + with TestClient(app) as client: + response = client.post("/api/skills/install", json={"thread_id": "thread-1", "path": "mnt/user-data/outputs/archive-skill.skill"}) + + assert response.status_code == 200 + assert response.json()["skill_name"] == "archive-skill" + assert (skills_root / "custom" / "archive-skill" / "SKILL.md").exists() + assert scan_calls == [ + { + "content": _skill_content("archive-skill"), + "executable": False, + "location": "archive-skill/SKILL.md", + } + ] + assert refresh_calls == ["refresh"] + + +def test_install_skill_archive_security_scan_block_returns_400(monkeypatch, tmp_path): + skills_root = tmp_path / "skills" + (skills_root / "custom").mkdir(parents=True) + archive = _make_skill_archive(tmp_path, "blocked-skill") + refresh_calls = [] + + async def _scan(*args, **kwargs): + from deerflow.skills.security_scanner import ScanResult + + return ScanResult(decision="block", reason="prompt injection") + + async def _refresh(): + refresh_calls.append("refresh") + + monkeypatch.setattr(skills_router, "resolve_thread_virtual_path", lambda thread_id, path: archive) + monkeypatch.setattr("deerflow.skills.installer.get_skills_root_path", lambda: skills_root) + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + monkeypatch.setattr(skills_router, "refresh_skills_system_prompt_cache_async", _refresh) + + app = FastAPI() + app.include_router(skills_router.router) + + with TestClient(app) as client: + response = client.post("/api/skills/install", json={"thread_id": "thread-1", "path": "mnt/user-data/outputs/blocked-skill.skill"}) + + assert response.status_code == 400 + assert "Security scan blocked skill 'blocked-skill': prompt injection" in response.json()["detail"] + assert not (skills_root / "custom" / "blocked-skill").exists() + assert refresh_calls == [] def test_custom_skills_router_lifecycle(monkeypatch, tmp_path): diff --git a/backend/tests/test_skills_installer.py b/backend/tests/test_skills_installer.py index c5da4b070..a7c9eae56 100644 --- a/backend/tests/test_skills_installer.py +++ b/backend/tests/test_skills_installer.py @@ -1,5 +1,6 @@ """Tests for deerflow.skills.installer — shared skill installation logic.""" +import shutil import stat import zipfile from pathlib import Path @@ -7,6 +8,7 @@ from pathlib import Path import pytest from deerflow.skills.installer import ( + SkillSecurityScanError, install_skill_from_archive, is_symlink_member, is_unsafe_zip_member, @@ -14,6 +16,7 @@ from deerflow.skills.installer import ( safe_extract_skill_archive, should_ignore_archive_entry, ) +from deerflow.skills.security_scanner import ScanResult # --------------------------------------------------------------------------- # is_unsafe_zip_member @@ -169,6 +172,13 @@ class TestSafeExtract: class TestInstallSkillFromArchive: + @pytest.fixture(autouse=True) + def _allow_security_scan(self, monkeypatch): + async def _scan(*args, **kwargs): + return ScanResult(decision="allow", reason="ok") + + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + def _make_skill_zip(self, tmp_path: Path, skill_name: str = "test-skill") -> Path: """Create a valid .skill archive.""" zip_path = tmp_path / f"{skill_name}.skill" @@ -188,6 +198,178 @@ class TestInstallSkillFromArchive: assert result["skill_name"] == "test-skill" assert (skills_root / "custom" / "test-skill" / "SKILL.md").exists() + def test_scans_skill_markdown_before_install(self, tmp_path, monkeypatch): + zip_path = self._make_skill_zip(tmp_path) + skills_root = tmp_path / "skills" + skills_root.mkdir() + calls = [] + + async def _scan(content, *, executable, location): + calls.append({"content": content, "executable": executable, "location": location}) + return ScanResult(decision="allow", reason="ok") + + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + + install_skill_from_archive(zip_path, skills_root=skills_root) + + assert calls == [ + { + "content": "---\nname: test-skill\ndescription: A test skill\n---\n\n# test-skill\n", + "executable": False, + "location": "test-skill/SKILL.md", + } + ] + + def test_scans_support_files_and_scripts_before_install(self, tmp_path, monkeypatch): + zip_path = tmp_path / "test-skill.skill" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("test-skill/SKILL.md", "---\nname: test-skill\ndescription: A test skill\n---\n\n# test-skill\n") + zf.writestr("test-skill/references/guide.md", "# Guide\n") + zf.writestr("test-skill/templates/prompt.txt", "Use care.\n") + zf.writestr("test-skill/scripts/run.sh", "#!/bin/sh\necho ok\n") + zf.writestr("test-skill/assets/logo.png", b"\x89PNG\r\n\x1a\n") + zf.writestr("test-skill/references/.env", "TOKEN=secret\n") + zf.writestr("test-skill/templates/config.cfg", "TOKEN=secret\n") + skills_root = tmp_path / "skills" + skills_root.mkdir() + calls = [] + + async def _scan(content, *, executable, location): + calls.append({"content": content, "executable": executable, "location": location}) + return ScanResult(decision="allow", reason="ok") + + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + + install_skill_from_archive(zip_path, skills_root=skills_root) + + assert calls == [ + { + "content": "---\nname: test-skill\ndescription: A test skill\n---\n\n# test-skill\n", + "executable": False, + "location": "test-skill/SKILL.md", + }, + { + "content": "# Guide\n", + "executable": False, + "location": "test-skill/references/guide.md", + }, + { + "content": "#!/bin/sh\necho ok\n", + "executable": True, + "location": "test-skill/scripts/run.sh", + }, + { + "content": "Use care.\n", + "executable": False, + "location": "test-skill/templates/prompt.txt", + }, + ] + assert all("secret" not in call["content"] for call in calls) + + def test_nested_skill_markdown_prevents_install(self, tmp_path): + zip_path = tmp_path / "test-skill.skill" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("test-skill/SKILL.md", "---\nname: test-skill\ndescription: A test skill\n---\n\n# test-skill\n") + zf.writestr("test-skill/references/other/SKILL.md", "# Nested skill\n") + skills_root = tmp_path / "skills" + skills_root.mkdir() + + with pytest.raises(SkillSecurityScanError, match="nested SKILL.md"): + install_skill_from_archive(zip_path, skills_root=skills_root) + + assert not (skills_root / "custom" / "test-skill").exists() + + def test_script_warn_prevents_install(self, tmp_path, monkeypatch): + zip_path = tmp_path / "test-skill.skill" + with zipfile.ZipFile(zip_path, "w") as zf: + zf.writestr("test-skill/SKILL.md", "---\nname: test-skill\ndescription: A test skill\n---\n\n# test-skill\n") + zf.writestr("test-skill/scripts/run.sh", "#!/bin/sh\necho ok\n") + skills_root = tmp_path / "skills" + skills_root.mkdir() + + async def _scan(*args, executable, **kwargs): + if executable: + return ScanResult(decision="warn", reason="script needs review") + return ScanResult(decision="allow", reason="ok") + + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + + with pytest.raises(SkillSecurityScanError, match="rejected executable.*script needs review"): + install_skill_from_archive(zip_path, skills_root=skills_root) + + assert not (skills_root / "custom" / "test-skill").exists() + + def test_security_scan_block_prevents_install(self, tmp_path, monkeypatch): + zip_path = self._make_skill_zip(tmp_path, skill_name="blocked-skill") + skills_root = tmp_path / "skills" + skills_root.mkdir() + + async def _scan(*args, **kwargs): + return ScanResult(decision="block", reason="prompt injection") + + monkeypatch.setattr("deerflow.skills.installer.scan_skill_content", _scan) + + with pytest.raises(SkillSecurityScanError, match="Security scan blocked.*prompt injection"): + install_skill_from_archive(zip_path, skills_root=skills_root) + + assert not (skills_root / "custom" / "blocked-skill").exists() + + def test_copy_failure_does_not_leave_partial_install(self, tmp_path, monkeypatch): + zip_path = self._make_skill_zip(tmp_path) + skills_root = tmp_path / "skills" + skills_root.mkdir() + + def _copytree(src, dst): + partial = Path(dst) + partial.mkdir(parents=True) + (partial / "partial.txt").write_text("partial", encoding="utf-8") + raise OSError("copy failed") + + monkeypatch.setattr("deerflow.skills.installer.shutil.copytree", _copytree) + + with pytest.raises(OSError, match="copy failed"): + install_skill_from_archive(zip_path, skills_root=skills_root) + + custom_dir = skills_root / "custom" + assert not (custom_dir / "test-skill").exists() + assert not [path for path in custom_dir.iterdir() if path.name.startswith(".installing-test-skill-")] + + def test_concurrent_target_creation_does_not_get_clobbered(self, tmp_path, monkeypatch): + zip_path = self._make_skill_zip(tmp_path) + skills_root = tmp_path / "skills" + skills_root.mkdir() + target = skills_root / "custom" / "test-skill" + original_copytree = shutil.copytree + + def _copytree(src, dst): + target.mkdir(parents=True) + (target / "marker.txt").write_text("external", encoding="utf-8") + return original_copytree(src, dst) + + monkeypatch.setattr("deerflow.skills.installer.shutil.copytree", _copytree) + + with pytest.raises(ValueError, match="already exists"): + install_skill_from_archive(zip_path, skills_root=skills_root) + + assert (target / "marker.txt").read_text(encoding="utf-8") == "external" + assert not (target / "SKILL.md").exists() + + def test_move_failure_cleans_reserved_target(self, tmp_path, monkeypatch): + zip_path = self._make_skill_zip(tmp_path) + skills_root = tmp_path / "skills" + skills_root.mkdir() + + def _move(src, dst): + Path(dst).write_text("partial", encoding="utf-8") + raise OSError("move failed") + + monkeypatch.setattr("deerflow.skills.installer.shutil.move", _move) + + with pytest.raises(OSError, match="move failed"): + install_skill_from_archive(zip_path, skills_root=skills_root) + + assert not (skills_root / "custom" / "test-skill").exists() + def test_duplicate_raises(self, tmp_path): zip_path = self._make_skill_zip(tmp_path) skills_root = tmp_path / "skills" diff --git a/backend/tests/test_view_image_tool.py b/backend/tests/test_view_image_tool.py new file mode 100644 index 000000000..eb7db890c --- /dev/null +++ b/backend/tests/test_view_image_tool.py @@ -0,0 +1,164 @@ +import base64 +import importlib +import os +from pathlib import Path +from types import SimpleNamespace + +import pytest + +from deerflow.tools.builtins.view_image_tool import view_image_tool + +view_image_module = importlib.import_module("deerflow.tools.builtins.view_image_tool") + +PNG_BYTES = base64.b64decode("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==") + + +def _make_thread_data(tmp_path: Path) -> dict[str, str]: + user_data = tmp_path / "threads" / "thread-1" / "user-data" + workspace = user_data / "workspace" + uploads = user_data / "uploads" + outputs = user_data / "outputs" + for directory in (workspace, uploads, outputs): + directory.mkdir(parents=True) + + return { + "workspace_path": str(workspace), + "uploads_path": str(uploads), + "outputs_path": str(outputs), + } + + +def _make_runtime(thread_data: dict[str, str]) -> SimpleNamespace: + return SimpleNamespace( + state={"thread_data": thread_data}, + context={"thread_id": "thread-1"}, + config={}, + ) + + +def _message_content(result) -> str: + return result.update["messages"][0].content + + +def test_view_image_rejects_external_absolute_path(tmp_path: Path) -> None: + thread_data = _make_thread_data(tmp_path) + outside_image = tmp_path / "outside.png" + outside_image.write_bytes(PNG_BYTES) + + result = view_image_tool.func( + runtime=_make_runtime(thread_data), + image_path=str(outside_image), + tool_call_id="tc-external", + ) + + assert "Only image paths under /mnt/user-data" in _message_content(result) + assert "viewed_images" not in result.update + + +def test_view_image_reads_virtual_uploads_path(tmp_path: Path) -> None: + thread_data = _make_thread_data(tmp_path) + image_path = Path(thread_data["uploads_path"]) / "sample.png" + image_path.write_bytes(PNG_BYTES) + + result = view_image_tool.func( + runtime=_make_runtime(thread_data), + image_path="/mnt/user-data/uploads/sample.png", + tool_call_id="tc-uploads", + ) + + assert _message_content(result) == "Successfully read image" + viewed_image = result.update["viewed_images"]["/mnt/user-data/uploads/sample.png"] + assert viewed_image["base64"] == base64.b64encode(PNG_BYTES).decode("utf-8") + assert viewed_image["mime_type"] == "image/png" + + +def test_view_image_rejects_spoofed_extension(tmp_path: Path) -> None: + thread_data = _make_thread_data(tmp_path) + image_path = Path(thread_data["uploads_path"]) / "not-really.png" + image_path.write_bytes(b"not an image") + + result = view_image_tool.func( + runtime=_make_runtime(thread_data), + image_path="/mnt/user-data/uploads/not-really.png", + tool_call_id="tc-spoofed", + ) + + assert "contents do not match" in _message_content(result) + assert "viewed_images" not in result.update + + +def test_view_image_rejects_mismatched_magic_bytes(tmp_path: Path) -> None: + thread_data = _make_thread_data(tmp_path) + image_path = Path(thread_data["uploads_path"]) / "jpeg-named-png.png" + image_path.write_bytes(b"\xff\xd8\xff\xe0fake-jpeg") + + result = view_image_tool.func( + runtime=_make_runtime(thread_data), + image_path="/mnt/user-data/uploads/jpeg-named-png.png", + tool_call_id="tc-mismatch", + ) + + assert "file extension indicates image/png" in _message_content(result) + assert "viewed_images" not in result.update + + +def test_view_image_rejects_oversized_image(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + thread_data = _make_thread_data(tmp_path) + image_path = Path(thread_data["uploads_path"]) / "sample.png" + image_path.write_bytes(PNG_BYTES) + monkeypatch.setattr(view_image_module, "_MAX_IMAGE_BYTES", len(PNG_BYTES) - 1) + + result = view_image_tool.func( + runtime=_make_runtime(thread_data), + image_path="/mnt/user-data/uploads/sample.png", + tool_call_id="tc-oversized", + ) + + assert "Image file is too large" in _message_content(result) + assert "viewed_images" not in result.update + + +def test_view_image_sanitizes_read_errors(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + thread_data = _make_thread_data(tmp_path) + image_path = Path(thread_data["uploads_path"]) / "sample.png" + image_path.write_bytes(PNG_BYTES) + + def _open(*args, **kwargs): + raise PermissionError(f"permission denied: {image_path}") + + monkeypatch.setattr("builtins.open", _open) + + result = view_image_tool.func( + runtime=_make_runtime(thread_data), + image_path="/mnt/user-data/uploads/sample.png", + tool_call_id="tc-read-error", + ) + + message = _message_content(result) + assert "Error reading image file" in message + assert str(image_path) not in message + assert str(Path(thread_data["uploads_path"])) not in message + assert "/mnt/user-data/uploads/sample.png" in message + assert "viewed_images" not in result.update + + +@pytest.mark.skipif(os.name == "nt", reason="symlink semantics differ on Windows") +def test_view_image_rejects_uploads_symlink_escape(tmp_path: Path) -> None: + thread_data = _make_thread_data(tmp_path) + outside_image = tmp_path / "outside-target.png" + outside_image.write_bytes(PNG_BYTES) + + link_path = Path(thread_data["uploads_path"]) / "escape.png" + try: + link_path.symlink_to(outside_image) + except OSError as exc: + pytest.skip(f"symlink creation failed: {exc}") + + result = view_image_tool.func( + runtime=_make_runtime(thread_data), + image_path="/mnt/user-data/uploads/escape.png", + tool_call_id="tc-symlink", + ) + + assert "path traversal" in _message_content(result) + assert "viewed_images" not in result.update