diff --git a/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py b/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py index 83129724b..2da0a678f 100644 --- a/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py +++ b/backend/packages/harness/deerflow/sandbox/local/local_sandbox.py @@ -62,6 +62,9 @@ class LocalSandbox(Sandbox): """ super().__init__(id) self.path_mappings = path_mappings or [] + # Track files written through write_file so read_file only + # reverse-resolves paths in agent-authored content. + self._agent_written_paths: set[str] = set() def _is_read_only_path(self, resolved_path: str) -> bool: """Check if a resolved path is under a read-only mount. @@ -205,6 +208,39 @@ class LocalSandbox(Sandbox): return pattern.sub(replace_match, command) + def _resolve_paths_in_content(self, content: str) -> str: + """Resolve container paths to local paths in arbitrary file content. + + Unlike ``_resolve_paths_in_command`` which uses shell-aware boundary + characters, this method treats the content as plain text and resolves + every occurrence of a container path prefix. Resolved paths are + normalized to forward slashes to avoid backslash-escape issues on + Windows hosts (e.g. ``C:\\Users\\..`` breaking Python string literals). + + Args: + content: File content that may contain container paths. + + Returns: + Content with container paths resolved to local paths (forward slashes). + """ + import re + + sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True) + if not sorted_mappings: + return content + + patterns = [re.escape(m.container_path) + r"(?=/|$|[^\w./-])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings] + pattern = re.compile("|".join(f"({p})" for p in patterns)) + + def replace_match(match: re.Match) -> str: + matched_path = match.group(0) + resolved = self._resolve_path(matched_path) + # Normalize to forward slashes so that Windows backslash paths + # don't create invalid escape sequences in source files. + return resolved.replace("\\", "/") + + return pattern.sub(replace_match, content) + @staticmethod def _get_shell() -> str: """Detect available shell executable with fallback.""" @@ -280,7 +316,14 @@ class LocalSandbox(Sandbox): resolved_path = self._resolve_path(path) try: with open(resolved_path, encoding="utf-8") as f: - return f.read() + content = f.read() + # Only reverse-resolve paths in files that were previously written + # by write_file (agent-authored content). User-uploaded files, + # external tool output, and other non-agent content should not be + # silently rewritten — see discussion on PR #1935. + if resolved_path in self._agent_written_paths: + content = self._reverse_resolve_paths_in_output(content) + return content except OSError as e: # Re-raise with the original path for clearer error messages, hiding internal resolved paths raise type(e)(e.errno, e.strerror, path) from None @@ -293,9 +336,16 @@ class LocalSandbox(Sandbox): dir_path = os.path.dirname(resolved_path) if dir_path: os.makedirs(dir_path, exist_ok=True) + # Resolve container paths in content to local paths + # using the content-specific resolver (forward-slash safe) + resolved_content = self._resolve_paths_in_content(content) mode = "a" if append else "w" with open(resolved_path, mode, encoding="utf-8") as f: - f.write(content) + f.write(resolved_content) + # Track this path so read_file knows to reverse-resolve on read. + # Only agent-written files get reverse-resolved; user uploads and + # external tool output are left untouched. + self._agent_written_paths.add(resolved_path) except OSError as e: # Re-raise with the original path for clearer error messages, hiding internal resolved paths raise type(e)(e.errno, e.strerror, path) from None diff --git a/backend/tests/test_local_sandbox_provider_mounts.py b/backend/tests/test_local_sandbox_provider_mounts.py index 0eb6d4654..18e180e3b 100644 --- a/backend/tests/test_local_sandbox_provider_mounts.py +++ b/backend/tests/test_local_sandbox_provider_mounts.py @@ -363,6 +363,98 @@ class TestLocalSandboxProviderMounts: assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"] + def test_write_file_resolves_container_paths_in_content(self, tmp_path): + """write_file should replace container paths in file content with local paths.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(data_dir)), + ], + ) + sandbox.write_file( + "/mnt/data/script.py", + 'import pathlib\npath = "/mnt/data/output"\nprint(path)', + ) + written = (data_dir / "script.py").read_text() + # Container path should be resolved to local path (forward slashes) + assert str(data_dir).replace("\\", "/") in written + assert "/mnt/data/output" not in written + + def test_write_file_uses_forward_slashes_on_windows_paths(self, tmp_path): + """Resolved paths in content should always use forward slashes.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(data_dir)), + ], + ) + sandbox.write_file( + "/mnt/data/config.py", + 'DATA_DIR = "/mnt/data/files"', + ) + written = (data_dir / "config.py").read_text() + # Must not contain backslashes that could break escape sequences + assert "\\" not in written.split("DATA_DIR = ")[1].split("\n")[0] + + def test_read_file_reverse_resolves_local_paths_in_agent_written_files(self, tmp_path): + """read_file should convert local paths back to container paths in agent-written files.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(data_dir)), + ], + ) + # Use write_file so the path is tracked as agent-written + sandbox.write_file("/mnt/data/info.txt", "File located at: /mnt/data/info.txt") + + content = sandbox.read_file("/mnt/data/info.txt") + assert "/mnt/data/info.txt" in content + + def test_read_file_does_not_reverse_resolve_non_agent_files(self, tmp_path): + """read_file should NOT rewrite paths in user-uploaded or external files.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(data_dir)), + ], + ) + # Write directly to filesystem (simulates user upload or external tool output) + local_path = str(data_dir).replace("\\", "/") + (data_dir / "config.yml").write_text(f"output_dir: {local_path}/outputs") + + content = sandbox.read_file("/mnt/data/config.yml") + # Content should be returned as-is, NOT reverse-resolved + assert local_path in content + + def test_write_then_read_roundtrip(self, tmp_path): + """Container paths survive a write → read roundtrip.""" + data_dir = tmp_path / "data" + data_dir.mkdir() + + sandbox = LocalSandbox( + "test", + [ + PathMapping(container_path="/mnt/data", local_path=str(data_dir)), + ], + ) + original = 'cfg = {"path": "/mnt/data/config.json", "flag": true}' + sandbox.write_file("/mnt/data/settings.py", original) + result = sandbox.read_file("/mnt/data/settings.py") + # The container path should be preserved through roundtrip + assert "/mnt/data/config.json" in result + def test_setup_path_mappings_normalizes_container_path_trailing_slash(self, tmp_path): skills_dir = tmp_path / "skills" skills_dir.mkdir()