diff --git a/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py b/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py index adcdc37bb..83129724b 100644 --- a/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py +++ b/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py @@ -1,7 +1,9 @@ +import errno import ntpath import os import shutil import subprocess +from dataclasses import dataclass from pathlib import Path from deerflow.sandbox.local.list_dir import list_dir @@ -9,6 +11,15 @@ from deerflow.sandbox.sandbox import Sandbox from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches +@dataclass(frozen=True) +class PathMapping: + """A path mapping from a container path to a local path with optional read-only flag.""" + + container_path: str + local_path: str + read_only: bool = False + + class LocalSandbox(Sandbox): @staticmethod def _shell_name(shell: str) -> str: @@ -40,17 +51,42 @@ class LocalSandbox(Sandbox): return None - def __init__(self, id: str, path_mappings: dict[str, str] | None = None): + def __init__(self, id: str, path_mappings: list[PathMapping] | None = None): """ Initialize local sandbox with optional path mappings. Args: id: Sandbox identifier - path_mappings: Dictionary mapping container paths to local paths - Example: {"/mnt/skills": "/absolute/path/to/skills"} + path_mappings: List of path mappings with optional read-only flag. + Skills directory is read-only by default. """ super().__init__(id) - self.path_mappings = path_mappings or {} + self.path_mappings = path_mappings or [] + + def _is_read_only_path(self, resolved_path: str) -> bool: + """Check if a resolved path is under a read-only mount. + + When multiple mappings match (nested mounts), prefer the most specific + mapping (i.e. the one whose local_path is the longest prefix of the + resolved path), similar to how ``_resolve_path`` handles container paths. + """ + resolved = str(Path(resolved_path).resolve()) + + best_mapping: PathMapping | None = None + best_prefix_len = -1 + + for mapping in self.path_mappings: + local_resolved = str(Path(mapping.local_path).resolve()) + if resolved == local_resolved or resolved.startswith(local_resolved + os.sep): + prefix_len = len(local_resolved) + if prefix_len > best_prefix_len: + best_prefix_len = prefix_len + best_mapping = mapping + + if best_mapping is None: + return False + + return best_mapping.read_only def _resolve_path(self, path: str) -> str: """ @@ -65,7 +101,9 @@ class LocalSandbox(Sandbox): path_str = str(path) # Try each mapping (longest prefix first for more specific matches) - for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True): + for mapping in sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True): + container_path = mapping.container_path + local_path = mapping.local_path if path_str == container_path or path_str.startswith(container_path + "/"): # Replace the container path prefix with local path relative = path_str[len(container_path) :].lstrip("/") @@ -85,15 +123,16 @@ class LocalSandbox(Sandbox): Returns: Container path if mapping exists, otherwise original path """ - path_str = str(Path(path).resolve()) + normalized_path = path.replace("\\", "/") + path_str = str(Path(normalized_path).resolve()) # Try each mapping (longest local path first for more specific matches) - for container_path, local_path in sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True): - local_path_resolved = str(Path(local_path).resolve()) - if path_str.startswith(local_path_resolved): + for mapping in sorted(self.path_mappings, key=lambda m: len(m.local_path), reverse=True): + local_path_resolved = str(Path(mapping.local_path).resolve()) + if path_str == local_path_resolved or path_str.startswith(local_path_resolved + "/"): # Replace the local path prefix with container path relative = path_str[len(local_path_resolved) :].lstrip("/") - resolved = f"{container_path}/{relative}" if relative else container_path + resolved = f"{mapping.container_path}/{relative}" if relative else mapping.container_path return resolved # No mapping found, return original path @@ -112,7 +151,7 @@ class LocalSandbox(Sandbox): import re # Sort mappings by local path length (longest first) for correct prefix matching - sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[1]), reverse=True) + sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.local_path), reverse=True) if not sorted_mappings: return output @@ -120,12 +159,11 @@ class LocalSandbox(Sandbox): # Create pattern that matches absolute paths # Match paths like /Users/... or other absolute paths result = output - for container_path, local_path in sorted_mappings: - local_path_resolved = str(Path(local_path).resolve()) + for mapping in sorted_mappings: # Escape the local path for use in regex - escaped_local = re.escape(local_path_resolved) - # Match the local path followed by optional path components - pattern = re.compile(escaped_local + r"(?:/[^\s\"';&|<>()]*)?") + escaped_local = re.escape(str(Path(mapping.local_path).resolve())) + # Match the local path followed by optional path components with either separator + pattern = re.compile(escaped_local + r"(?:[/\\][^\s\"';&|<>()]*)?") def replace_match(match: re.Match) -> str: matched_path = match.group(0) @@ -148,7 +186,7 @@ class LocalSandbox(Sandbox): import re # Sort mappings by length (longest first) for correct prefix matching - sorted_mappings = sorted(self.path_mappings.items(), key=lambda x: len(x[0]), reverse=True) + sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True) # Build regex pattern to match all container paths # Match container path followed by optional path components @@ -158,7 +196,7 @@ class LocalSandbox(Sandbox): # Create pattern that matches any of the container paths. # The lookahead (?=/|$|...) ensures we only match at a path-segment boundary, # preventing /mnt/skills from matching inside /mnt/skills-extra. - patterns = [re.escape(container_path) + r"(?=/|$|[\s\"';&|<>()])(?:/[^\s\"';&|<>()]*)?" for container_path, _ in sorted_mappings] + patterns = [re.escape(m.container_path) + r"(?=/|$|[\s\"';&|<>()])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings] pattern = re.compile("|".join(f"({p})" for p in patterns)) def replace_match(match: re.Match) -> str: @@ -249,6 +287,8 @@ class LocalSandbox(Sandbox): def write_file(self, path: str, content: str, append: bool = False) -> None: resolved_path = self._resolve_path(path) + if self._is_read_only_path(resolved_path): + raise OSError(errno.EROFS, "Read-only file system", path) try: dir_path = os.path.dirname(resolved_path) if dir_path: @@ -295,6 +335,8 @@ class LocalSandbox(Sandbox): def update_file(self, path: str, content: bytes) -> None: resolved_path = self._resolve_path(path) + if self._is_read_only_path(resolved_path): + raise OSError(errno.EROFS, "Read-only file system", path) try: dir_path = os.path.dirname(resolved_path) if dir_path: diff --git a/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py b/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py index 625f80f02..ec4693036 100644 --- a/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py +++ b/backend/packages/harness/deerflow/sandbox/local/local_sandbox_provider.py @@ -1,6 +1,7 @@ import logging +from pathlib import Path -from deerflow.sandbox.local.local_sandbox import LocalSandbox +from deerflow.sandbox.local.local_sandbox import LocalSandbox, PathMapping from deerflow.sandbox.sandbox import Sandbox from deerflow.sandbox.sandbox_provider import SandboxProvider @@ -14,16 +15,17 @@ class LocalSandboxProvider(SandboxProvider): """Initialize the local sandbox provider with path mappings.""" self._path_mappings = self._setup_path_mappings() - def _setup_path_mappings(self) -> dict[str, str]: + def _setup_path_mappings(self) -> list[PathMapping]: """ Setup path mappings for local sandbox. - Maps container paths to actual local paths, including skills directory. + Maps container paths to actual local paths, including skills directory + and any custom mounts configured in config.yaml. Returns: - Dictionary of path mappings + List of path mappings """ - mappings = {} + mappings: list[PathMapping] = [] # Map skills container path to local skills directory try: @@ -35,10 +37,63 @@ class LocalSandboxProvider(SandboxProvider): # Only add mapping if skills directory exists if skills_path.exists(): - mappings[container_path] = str(skills_path) + mappings.append( + PathMapping( + container_path=container_path, + local_path=str(skills_path), + read_only=True, # Skills directory is always read-only + ) + ) + + # Map custom mounts from sandbox config + _RESERVED_CONTAINER_PREFIXES = [container_path, "/mnt/acp-workspace", "/mnt/user-data"] + sandbox_config = config.sandbox + if sandbox_config and sandbox_config.mounts: + for mount in sandbox_config.mounts: + host_path = Path(mount.host_path) + container_path = mount.container_path.rstrip("/") or "/" + + if not host_path.is_absolute(): + logger.warning( + "Mount host_path must be absolute, skipping: %s -> %s", + mount.host_path, + mount.container_path, + ) + continue + + if not container_path.startswith("/"): + logger.warning( + "Mount container_path must be absolute, skipping: %s -> %s", + mount.host_path, + mount.container_path, + ) + continue + + # Reject mounts that conflict with reserved container paths + if any(container_path == p or container_path.startswith(p + "/") for p in _RESERVED_CONTAINER_PREFIXES): + logger.warning( + "Mount container_path conflicts with reserved prefix, skipping: %s", + mount.container_path, + ) + continue + # Ensure the host path exists before adding mapping + if host_path.exists(): + mappings.append( + PathMapping( + container_path=container_path, + local_path=str(host_path.resolve()), + read_only=mount.read_only, + ) + ) + else: + logger.warning( + "Mount host_path does not exist, skipping: %s -> %s", + mount.host_path, + mount.container_path, + ) except Exception as e: # Log but don't fail if config loading fails - logger.warning("Could not setup skills path mapping: %s", e, exc_info=True) + logger.warning("Could not setup path mappings: %s", e, exc_info=True) return mappings diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index 55591254e..acd661db0 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -119,6 +119,54 @@ def _is_acp_workspace_path(path: str) -> bool: return path == _ACP_WORKSPACE_VIRTUAL_PATH or path.startswith(f"{_ACP_WORKSPACE_VIRTUAL_PATH}/") +def _get_custom_mounts(): + """Get custom volume mounts from sandbox config. + + Result is cached after the first successful config load. If config loading + fails an empty list is returned *without* caching so that a later call can + pick up the real value once the config is available. + """ + cached = getattr(_get_custom_mounts, "_cached", None) + if cached is not None: + return cached + try: + from pathlib import Path + + from deerflow.config import get_app_config + + config = get_app_config() + mounts = [] + if config.sandbox and config.sandbox.mounts: + # Only include mounts whose host_path exists, consistent with + # LocalSandboxProvider._setup_path_mappings() which also filters + # by host_path.exists(). + mounts = [m for m in config.sandbox.mounts if Path(m.host_path).exists()] + _get_custom_mounts._cached = mounts # type: ignore[attr-defined] + return mounts + except Exception: + # If config loading fails, return an empty list without caching so that + # a later call can retry once the config is available. + return [] + + +def _is_custom_mount_path(path: str) -> bool: + """Check if path is under a custom mount container_path.""" + for mount in _get_custom_mounts(): + if path == mount.container_path or path.startswith(f"{mount.container_path}/"): + return True + return False + + +def _get_custom_mount_for_path(path: str): + """Get the mount config matching this path (longest prefix first).""" + best = None + for mount in _get_custom_mounts(): + if path == mount.container_path or path.startswith(f"{mount.container_path}/"): + if best is None or len(mount.container_path) > len(best.container_path): + best = mount + return best + + def _extract_thread_id_from_thread_data(thread_data: "ThreadDataState | None") -> str | None: """Extract thread_id from thread_data by inspecting workspace_path. @@ -448,6 +496,8 @@ def mask_local_paths_in_output(output: str, thread_data: ThreadDataState | None) result = pattern.sub(replace_acp, result) + # Custom mount host paths are masked by LocalSandbox._reverse_resolve_paths_in_output() + # Mask user-data host paths if thread_data is None: return result @@ -496,6 +546,7 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *, - ``/mnt/user-data/*`` — always allowed (read + write) - ``/mnt/skills/*`` — allowed only when *read_only* is True - ``/mnt/acp-workspace/*`` — allowed only when *read_only* is True + - Custom mount paths (from config.yaml) — respects per-mount ``read_only`` flag Args: path: The virtual path to validate. @@ -527,7 +578,14 @@ def validate_local_tool_path(path: str, thread_data: ThreadDataState | None, *, if path.startswith(f"{VIRTUAL_PATH_PREFIX}/"): return - raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, or {_ACP_WORKSPACE_VIRTUAL_PATH}/ are allowed") + # Custom mount paths — respect read_only config + if _is_custom_mount_path(path): + mount = _get_custom_mount_for_path(path) + if mount and mount.read_only and not read_only: + raise PermissionError(f"Write access to read-only mount is not allowed: {path}") + return + + raise PermissionError(f"Only paths under {VIRTUAL_PATH_PREFIX}/, {_get_skills_container_path()}/, {_ACP_WORKSPACE_VIRTUAL_PATH}/, or configured mount paths are allowed") def _validate_resolved_user_data_path(resolved: Path, thread_data: ThreadDataState) -> None: @@ -577,9 +635,10 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState boundary and must not be treated as isolation from the host filesystem. In local mode, commands must use virtual paths under /mnt/user-data for - user data access. Skills paths under /mnt/skills and ACP workspace paths - under /mnt/acp-workspace are allowed (path-traversal checks only; write - prevention for bash commands is not enforced here). + user data access. Skills paths under /mnt/skills, ACP workspace paths + under /mnt/acp-workspace, and custom mount container paths (configured in + config.yaml) are allowed (path-traversal checks only; write prevention + for bash commands is not enforced here). A small allowlist of common system path prefixes is kept for executable and device references (e.g. /bin/sh, /dev/null). """ @@ -614,6 +673,11 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState _reject_path_traversal(absolute_path) continue + # Allow custom mount container paths + if _is_custom_mount_path(absolute_path): + _reject_path_traversal(absolute_path) + continue + if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES): continue @@ -658,6 +722,8 @@ def replace_virtual_paths_in_command(command: str, thread_data: ThreadDataState result = acp_pattern.sub(replace_acp_match, result) + # Custom mount paths are resolved by LocalSandbox._resolve_paths_in_command() + # Replace user-data paths if VIRTUAL_PATH_PREFIX in result and thread_data is not None: pattern = re.compile(rf"{re.escape(VIRTUAL_PATH_PREFIX)}(/[^\s\"';&|<>()]*)?") @@ -954,8 +1020,9 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path: path = _resolve_skills_path(path) elif _is_acp_workspace_path(path): path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data)) - else: + elif not _is_custom_mount_path(path): path = _resolve_and_validate_user_data_path(path, thread_data) + # Custom mount paths are resolved by LocalSandbox._resolve_path() children = sandbox.list_dir(path) if not children: return "(empty)" @@ -1117,8 +1184,9 @@ def read_file_tool( path = _resolve_skills_path(path) elif _is_acp_workspace_path(path): path = _resolve_acp_workspace_path(path, _extract_thread_id_from_thread_data(thread_data)) - else: + elif not _is_custom_mount_path(path): path = _resolve_and_validate_user_data_path(path, thread_data) + # Custom mount paths are resolved by LocalSandbox._resolve_path() content = sandbox.read_file(path) if not content: return "(empty)" @@ -1166,7 +1234,9 @@ def write_file_tool( if is_local_sandbox(runtime): thread_data = get_thread_data(runtime) validate_local_tool_path(path, thread_data) - path = _resolve_and_validate_user_data_path(path, thread_data) + if not _is_custom_mount_path(path): + path = _resolve_and_validate_user_data_path(path, thread_data) + # Custom mount paths are resolved by LocalSandbox._resolve_path() with get_file_operation_lock(sandbox, path): sandbox.write_file(path, content, append) return "OK" @@ -1208,7 +1278,9 @@ def str_replace_tool( if is_local_sandbox(runtime): thread_data = get_thread_data(runtime) validate_local_tool_path(path, thread_data) - path = _resolve_and_validate_user_data_path(path, thread_data) + if not _is_custom_mount_path(path): + path = _resolve_and_validate_user_data_path(path, thread_data) + # Custom mount paths are resolved by LocalSandbox._resolve_path() with get_file_operation_lock(sandbox, path): content = sandbox.read_file(path) if not content: diff --git a/backend/tests/test_local_sandbox_provider_mounts.py b/backend/tests/test_local_sandbox_provider_mounts.py new file mode 100644 index 000000000..0eb6d4654 --- /dev/null +++ b/backend/tests/test_local_sandbox_provider_mounts.py @@ -0,0 +1,388 @@ +import errno +from types import SimpleNamespace +from unittest.mock import patch + +import pytest + +from deerflow.sandbox.local.local_sandbox import LocalSandbox, PathMapping +from deerflow.sandbox.local.local_sandbox_provider import LocalSandboxProvider + + +class TestPathMapping: + def test_path_mapping_dataclass(self): + mapping = PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True) + assert mapping.container_path == "/mnt/skills" + assert mapping.local_path == "/home/user/skills" + assert mapping.read_only is True + + def test_path_mapping_defaults_to_false(self): + mapping = PathMapping(container_path="/mnt/data", local_path="/home/user/data") + assert mapping.read_only is False + + +class TestLocalSandboxPathResolution: + def test_resolve_path_exact_match(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"), + ], + ) + resolved = sandbox._resolve_path("/mnt/skills") + assert resolved == "/home/user/skills" + + def test_resolve_path_nested_path(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"), + ], + ) + resolved = sandbox._resolve_path("/mnt/skills/agent/prompt.py") + assert resolved == "/home/user/skills/agent/prompt.py" + + def test_resolve_path_no_mapping(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"), + ], + ) + resolved = sandbox._resolve_path("/mnt/other/file.txt") + assert resolved == "/mnt/other/file.txt" + + def test_resolve_path_longest_prefix_first(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path="/home/user/skills"), + PathMapping(container_path="/mnt", local_path="/var/mnt"), + ], + ) + resolved = sandbox._resolve_path("/mnt/skills/file.py") + # Should match /mnt/skills first (longer prefix) + assert resolved == "/home/user/skills/file.py" + + def test_reverse_resolve_path_exact_match(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path=str(skills_dir)), + ], + ) + resolved = sandbox._reverse_resolve_path(str(skills_dir)) + assert resolved == "/mnt/skills" + + def test_reverse_resolve_path_nested(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + file_path = skills_dir / "agent" / "prompt.py" + file_path.parent.mkdir() + file_path.write_text("test") + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path=str(skills_dir)), + ], + ) + resolved = sandbox._reverse_resolve_path(str(file_path)) + assert resolved == "/mnt/skills/agent/prompt.py" + + +class TestReadOnlyPath: + def test_is_read_only_true(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True), + ], + ) + assert sandbox._is_read_only_path("/home/user/skills/file.py") is True + + def test_is_read_only_false_for_writable(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path="/home/user/data", read_only=False), + ], + ) + assert sandbox._is_read_only_path("/home/user/data/file.txt") is False + + def test_is_read_only_false_for_unmapped_path(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True), + ], + ) + # Path not under any mapping + assert sandbox._is_read_only_path("/tmp/other/file.txt") is False + + def test_is_read_only_true_for_exact_match(self): + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path="/home/user/skills", read_only=True), + ], + ) + assert sandbox._is_read_only_path("/home/user/skills") is True + + def test_write_file_blocked_on_read_only(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path=str(skills_dir), read_only=True), + ], + ) + # Skills dir is read-only, write should be blocked + with pytest.raises(OSError) as exc_info: + sandbox.write_file("/mnt/skills/new_file.py", "content") + assert exc_info.value.errno == errno.EROFS + + def test_write_file_allowed_on_writable_mount(self, tmp_path): + data_dir = tmp_path / "data" + data_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(data_dir), read_only=False), + ], + ) + sandbox.write_file("/mnt/data/file.txt", "content") + assert (data_dir / "file.txt").read_text() == "content" + + def test_update_file_blocked_on_read_only(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + existing_file = skills_dir / "existing.py" + existing_file.write_bytes(b"original") + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path=str(skills_dir), read_only=True), + ], + ) + with pytest.raises(OSError) as exc_info: + sandbox.update_file("/mnt/skills/existing.py", b"updated") + assert exc_info.value.errno == errno.EROFS + + +class TestMultipleMounts: + def test_multiple_read_write_mounts(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + data_dir = tmp_path / "data" + data_dir.mkdir() + external_dir = tmp_path / "external" + external_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/skills", local_path=str(skills_dir), read_only=True), + PathMapping(container_path="/mnt/data", local_path=str(data_dir), read_only=False), + PathMapping(container_path="/mnt/external", local_path=str(external_dir), read_only=True), + ], + ) + + # Skills is read-only + with pytest.raises(OSError): + sandbox.write_file("/mnt/skills/file.py", "content") + + # Data is writable + sandbox.write_file("/mnt/data/file.txt", "data content") + assert (data_dir / "file.txt").read_text() == "data content" + + # External is read-only + with pytest.raises(OSError): + sandbox.write_file("/mnt/external/file.txt", "content") + + def test_nested_mounts_writable_under_readonly(self, tmp_path): + """A writable mount nested under a read-only mount should allow writes.""" + ro_dir = tmp_path / "ro" + ro_dir.mkdir() + rw_dir = ro_dir / "writable" + rw_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/repo", local_path=str(ro_dir), read_only=True), + PathMapping(container_path="/mnt/repo/writable", local_path=str(rw_dir), read_only=False), + ], + ) + + # Parent mount is read-only + with pytest.raises(OSError): + sandbox.write_file("/mnt/repo/file.txt", "content") + + # Nested writable mount should allow writes + sandbox.write_file("/mnt/repo/writable/file.txt", "content") + assert (rw_dir / "file.txt").read_text() == "content" + + def test_execute_command_path_replacement(self, tmp_path, monkeypatch): + data_dir = tmp_path / "data" + data_dir.mkdir() + test_file = data_dir / "test.txt" + test_file.write_text("hello") + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(data_dir)), + ], + ) + + # Mock subprocess to capture the resolved command + captured = {} + original_run = __import__("subprocess").run + + def mock_run(*args, **kwargs): + if len(args) > 0: + captured["command"] = args[0] + return original_run(*args, **kwargs) + + monkeypatch.setattr("deerflow.sandbox.local.local_sandbox.subprocess.run", mock_run) + monkeypatch.setattr("deerflow.sandbox.local.local_sandbox.LocalSandbox._get_shell", lambda self: "/bin/sh") + + sandbox.execute_command("cat /mnt/data/test.txt") + # Verify the command received the resolved local path + assert str(data_dir) in captured.get("command", "") + + def test_reverse_resolve_path_does_not_match_partial_prefix(self, tmp_path): + foo_dir = tmp_path / "foo" + foo_dir.mkdir() + foobar_dir = tmp_path / "foobar" + foobar_dir.mkdir() + target = foobar_dir / "file.txt" + target.write_text("test") + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/foo", local_path=str(foo_dir)), + ], + ) + + resolved = sandbox._reverse_resolve_path(str(target)) + assert resolved == str(target.resolve()) + + def test_reverse_resolve_paths_in_output_supports_backslash_separator(self, tmp_path): + mount_dir = tmp_path / "mount" + mount_dir.mkdir() + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(mount_dir)), + ], + ) + + output = f"Copied: {mount_dir}\\file.txt" + masked = sandbox._reverse_resolve_paths_in_output(output) + + assert "/mnt/data/file.txt" in masked + assert str(mount_dir) not in masked + + +class TestLocalSandboxProviderMounts: + def test_setup_path_mappings_uses_configured_skills_container_path_as_reserved_prefix(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + custom_dir = tmp_path / "custom" + custom_dir.mkdir() + + from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig + + sandbox_config = SandboxConfig( + use="deerflow.sandbox.local:LocalSandboxProvider", + mounts=[ + VolumeMountConfig(host_path=str(custom_dir), container_path="/custom-skills/nested", read_only=False), + ], + ) + config = SimpleNamespace( + skills=SimpleNamespace(container_path="/custom-skills", get_skills_path=lambda: skills_dir), + sandbox=sandbox_config, + ) + + with patch("deerflow.config.get_app_config", return_value=config): + provider = LocalSandboxProvider() + + assert [m.container_path for m in provider._path_mappings] == ["/custom-skills"] + + def test_setup_path_mappings_skips_relative_host_path(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + + from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig + + sandbox_config = SandboxConfig( + use="deerflow.sandbox.local:LocalSandboxProvider", + mounts=[ + VolumeMountConfig(host_path="relative/path", container_path="/mnt/data", read_only=False), + ], + ) + config = SimpleNamespace( + skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir), + sandbox=sandbox_config, + ) + + with patch("deerflow.config.get_app_config", return_value=config): + provider = LocalSandboxProvider() + + assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"] + + def test_setup_path_mappings_skips_non_absolute_container_path(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + custom_dir = tmp_path / "custom" + custom_dir.mkdir() + + from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig + + sandbox_config = SandboxConfig( + use="deerflow.sandbox.local:LocalSandboxProvider", + mounts=[ + VolumeMountConfig(host_path=str(custom_dir), container_path="mnt/data", read_only=False), + ], + ) + config = SimpleNamespace( + skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir), + sandbox=sandbox_config, + ) + + with patch("deerflow.config.get_app_config", return_value=config): + provider = LocalSandboxProvider() + + assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"] + + def test_setup_path_mappings_normalizes_container_path_trailing_slash(self, tmp_path): + skills_dir = tmp_path / "skills" + skills_dir.mkdir() + custom_dir = tmp_path / "custom" + custom_dir.mkdir() + + from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig + + sandbox_config = SandboxConfig( + use="deerflow.sandbox.local:LocalSandboxProvider", + mounts=[ + VolumeMountConfig(host_path=str(custom_dir), container_path="/mnt/data/", read_only=False), + ], + ) + config = SimpleNamespace( + skills=SimpleNamespace(container_path="/mnt/skills", get_skills_path=lambda: skills_dir), + sandbox=sandbox_config, + ) + + with patch("deerflow.config.get_app_config", return_value=config): + provider = LocalSandboxProvider() + + assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills", "/mnt/data"] diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py index 02d1b27ce..01aedf6be 100644 --- a/backend/tests/test_sandbox_tools_security.py +++ b/backend/tests/test_sandbox_tools_security.py @@ -8,7 +8,10 @@ import pytest from deerflow.sandbox.tools import ( VIRTUAL_PATH_PREFIX, _apply_cwd_prefix, + _get_custom_mount_for_path, + _get_custom_mounts, _is_acp_workspace_path, + _is_custom_mount_path, _is_skills_path, _reject_path_traversal, _resolve_acp_workspace_path, @@ -96,6 +99,25 @@ def test_validate_local_tool_path_rejects_non_virtual_path() -> None: validate_local_tool_path("/Users/someone/config.yaml", _THREAD_DATA) +def test_validate_local_tool_path_rejects_non_virtual_path_mentions_configured_mounts() -> None: + with pytest.raises(PermissionError, match="configured mount paths"): + validate_local_tool_path("/Users/someone/config.yaml", _THREAD_DATA) + + +def test_validate_local_tool_path_prioritizes_user_data_before_custom_mounts() -> None: + from deerflow.config.sandbox_config import VolumeMountConfig + + mounts = [ + VolumeMountConfig(host_path="/tmp/host-user-data", container_path=VIRTUAL_PATH_PREFIX, read_only=False), + ] + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=mounts): + validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/file.txt", _THREAD_DATA, read_only=True) + + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=mounts): + with pytest.raises(PermissionError, match="path traversal"): + validate_local_tool_path(f"{VIRTUAL_PATH_PREFIX}/workspace/../../etc/passwd", _THREAD_DATA, read_only=True) + + def test_validate_local_tool_path_rejects_bare_virtual_root() -> None: """The bare /mnt/user-data root without trailing slash is not a valid sub-path.""" with pytest.raises(PermissionError, match="Only paths under"): @@ -567,6 +589,156 @@ def test_validate_local_bash_command_paths_allows_mcp_filesystem_paths() -> None validate_local_bash_command_paths("ls /mnt/d/workspace", _THREAD_DATA) +# ---------- Custom mount path tests ---------- + + +def _mock_custom_mounts(): + """Create mock VolumeMountConfig objects for testing.""" + from deerflow.config.sandbox_config import VolumeMountConfig + + return [ + VolumeMountConfig(host_path="/home/user/code-read", container_path="/mnt/code-read", read_only=True), + VolumeMountConfig(host_path="/home/user/data", container_path="/mnt/data", read_only=False), + ] + + +def test_is_custom_mount_path_recognises_configured_mounts() -> None: + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + assert _is_custom_mount_path("/mnt/code-read") is True + assert _is_custom_mount_path("/mnt/code-read/src/main.py") is True + assert _is_custom_mount_path("/mnt/data") is True + assert _is_custom_mount_path("/mnt/data/file.txt") is True + assert _is_custom_mount_path("/mnt/code-read-extra/foo") is False + assert _is_custom_mount_path("/mnt/other") is False + + +def test_get_custom_mount_for_path_returns_longest_prefix() -> None: + from deerflow.config.sandbox_config import VolumeMountConfig + + mounts = [ + VolumeMountConfig(host_path="/var/mnt", container_path="/mnt", read_only=False), + VolumeMountConfig(host_path="/home/user/code", container_path="/mnt/code", read_only=True), + ] + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=mounts): + mount = _get_custom_mount_for_path("/mnt/code/file.py") + assert mount is not None + assert mount.container_path == "/mnt/code" + + +def test_validate_local_tool_path_allows_custom_mount_read() -> None: + """read_file / ls should be able to access custom mount paths.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + validate_local_tool_path("/mnt/code-read/src/main.py", _THREAD_DATA, read_only=True) + validate_local_tool_path("/mnt/data/file.txt", _THREAD_DATA, read_only=True) + + +def test_validate_local_tool_path_blocks_read_only_mount_write() -> None: + """write_file / str_replace must NOT write to read-only custom mounts.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + with pytest.raises(PermissionError, match="Write access to read-only mount is not allowed"): + validate_local_tool_path("/mnt/code-read/src/main.py", _THREAD_DATA, read_only=False) + + +def test_validate_local_tool_path_allows_writable_mount_write() -> None: + """write_file / str_replace should succeed on writable custom mounts.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + validate_local_tool_path("/mnt/data/file.txt", _THREAD_DATA, read_only=False) + + +def test_validate_local_tool_path_blocks_traversal_in_custom_mount() -> None: + """Path traversal via .. in custom mount paths must be rejected.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + with pytest.raises(PermissionError, match="path traversal"): + validate_local_tool_path("/mnt/code-read/../../etc/passwd", _THREAD_DATA, read_only=True) + + +def test_validate_local_bash_command_paths_allows_custom_mount() -> None: + """bash commands referencing custom mount paths should be allowed.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + validate_local_bash_command_paths("cat /mnt/code-read/src/main.py", _THREAD_DATA) + validate_local_bash_command_paths("ls /mnt/data", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_traversal_in_custom_mount() -> None: + """Bash commands with traversal in custom mount paths should be blocked.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + with pytest.raises(PermissionError, match="path traversal"): + validate_local_bash_command_paths("cat /mnt/code-read/../../etc/passwd", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_still_blocks_non_mount_paths() -> None: + """Paths not matching any custom mount should still be blocked.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + with pytest.raises(PermissionError, match="Unsafe absolute paths"): + validate_local_bash_command_paths("cat /etc/shadow", _THREAD_DATA) + + +def test_get_custom_mounts_caching(monkeypatch, tmp_path) -> None: + """_get_custom_mounts should cache after first successful load.""" + # Clear any existing cache + if hasattr(_get_custom_mounts, "_cached"): + monkeypatch.delattr(_get_custom_mounts, "_cached") + + # Use real directories so host_path.exists() filtering passes + dir_a = tmp_path / "code-read" + dir_a.mkdir() + dir_b = tmp_path / "data" + dir_b.mkdir() + + from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig + + mounts = [ + VolumeMountConfig(host_path=str(dir_a), container_path="/mnt/code-read", read_only=True), + VolumeMountConfig(host_path=str(dir_b), container_path="/mnt/data", read_only=False), + ] + mock_sandbox = SandboxConfig(use="deerflow.sandbox.local:LocalSandboxProvider", mounts=mounts) + mock_config = SimpleNamespace(sandbox=mock_sandbox) + + with patch("deerflow.config.get_app_config", return_value=mock_config): + result = _get_custom_mounts() + assert len(result) == 2 + + # After caching, should return cached value even without mock + assert hasattr(_get_custom_mounts, "_cached") + assert len(_get_custom_mounts()) == 2 + + # Cleanup + monkeypatch.delattr(_get_custom_mounts, "_cached") + + +def test_get_custom_mounts_filters_nonexistent_host_path(monkeypatch, tmp_path) -> None: + """_get_custom_mounts should only return mounts whose host_path exists.""" + if hasattr(_get_custom_mounts, "_cached"): + monkeypatch.delattr(_get_custom_mounts, "_cached") + + from deerflow.config.sandbox_config import SandboxConfig, VolumeMountConfig + + existing_dir = tmp_path / "existing" + existing_dir.mkdir() + + mounts = [ + VolumeMountConfig(host_path=str(existing_dir), container_path="/mnt/existing", read_only=True), + VolumeMountConfig(host_path="/nonexistent/path/12345", container_path="/mnt/ghost", read_only=False), + ] + mock_sandbox = SandboxConfig(use="deerflow.sandbox.local:LocalSandboxProvider", mounts=mounts) + mock_config = SimpleNamespace(sandbox=mock_sandbox) + + with patch("deerflow.config.get_app_config", return_value=mock_config): + result = _get_custom_mounts() + assert len(result) == 1 + assert result[0].container_path == "/mnt/existing" + + # Cleanup + monkeypatch.delattr(_get_custom_mounts, "_cached") + + +def test_get_custom_mount_for_path_boundary_no_false_prefix_match() -> None: + """_get_custom_mount_for_path must not match /mnt/code-read-extra for /mnt/code-read.""" + with patch("deerflow.sandbox.tools._get_custom_mounts", return_value=_mock_custom_mounts()): + mount = _get_custom_mount_for_path("/mnt/code-read-extra/foo") + assert mount is None + + def test_str_replace_parallel_updates_should_preserve_both_edits(monkeypatch) -> None: class SharedSandbox: def __init__(self) -> None: diff --git a/config.example.yaml b/config.example.yaml index dc3db1a3c..3eb0b9e9e 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -375,6 +375,12 @@ sandbox: # not a secure isolation boundary for shell access. Enable only for fully # trusted, single-user local workflows. allow_host_bash: false + # Optional: Mount additional host directories into the sandbox. + # Each mount maps a host path to a virtual container path accessible by the agent. + # mounts: + # - host_path: /home/user/my-project # Absolute path on the host machine + # container_path: /mnt/my-project # Virtual path inside the sandbox + # read_only: true # Whether the mount is read-only (default: false) # Tool output truncation limits (characters). # bash uses middle-truncation (head + tail) since errors can appear anywhere in the output.