mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-25 11:18:22 +00:00
fix(sandbox): resolve paths in read_file/write_file content for LocalSandbox (#1935)
* fix(sandbox): resolve paths in read_file/write_file content for LocalSandbox In LocalSandbox mode, read_file and write_file now transform container paths in file content, matching the path handling behavior of bash tool. - write_file: resolves virtual paths in content to system paths before writing, so scripts with /mnt/user-data paths work when executed - read_file: reverse-resolves system paths back to virtual paths in returned content for consistency This fixes scenarios where agents write Python scripts with virtual paths, then execute them via bash tool expecting the paths to work. Fixes #1778 * fix(sandbox): address Copilot review — dedicated content resolver + forward-slash safety + tests - Extract _resolve_paths_in_content() separate from _resolve_paths_in_command() to decouple file-content path resolution from shell-command parsing - Normalize resolved paths to forward slashes to avoid Windows backslash escape issues in source files (e.g. \U in Python string literals) - Add 4 focused tests: write resolves content, forward-slash guarantee, read reverse-resolves content, and write→read roundtrip * style: fix ruff lint — remove extraneous f-string prefix * fix(sandbox): only reverse-resolve paths in agent-written files read_file previously applied _reverse_resolve_paths_in_output to ALL file content, which could silently rewrite paths in user uploads and external tool output (Willem Jiang review on #1935). Now tracks files written through write_file in _agent_written_paths. Only those files get reverse-resolved on read. Non-agent files are returned as-is. --------- Co-authored-by: JasonOA888 <JasonOA888@users.noreply.github.com>
This commit is contained in:
parent
5b633449f8
commit
dc50a7fdfb
@ -62,6 +62,9 @@ class LocalSandbox(Sandbox):
|
|||||||
"""
|
"""
|
||||||
super().__init__(id)
|
super().__init__(id)
|
||||||
self.path_mappings = path_mappings or []
|
self.path_mappings = path_mappings or []
|
||||||
|
# Track files written through write_file so read_file only
|
||||||
|
# reverse-resolves paths in agent-authored content.
|
||||||
|
self._agent_written_paths: set[str] = set()
|
||||||
|
|
||||||
def _is_read_only_path(self, resolved_path: str) -> bool:
|
def _is_read_only_path(self, resolved_path: str) -> bool:
|
||||||
"""Check if a resolved path is under a read-only mount.
|
"""Check if a resolved path is under a read-only mount.
|
||||||
@ -205,6 +208,39 @@ class LocalSandbox(Sandbox):
|
|||||||
|
|
||||||
return pattern.sub(replace_match, command)
|
return pattern.sub(replace_match, command)
|
||||||
|
|
||||||
|
def _resolve_paths_in_content(self, content: str) -> str:
|
||||||
|
"""Resolve container paths to local paths in arbitrary file content.
|
||||||
|
|
||||||
|
Unlike ``_resolve_paths_in_command`` which uses shell-aware boundary
|
||||||
|
characters, this method treats the content as plain text and resolves
|
||||||
|
every occurrence of a container path prefix. Resolved paths are
|
||||||
|
normalized to forward slashes to avoid backslash-escape issues on
|
||||||
|
Windows hosts (e.g. ``C:\\Users\\..`` breaking Python string literals).
|
||||||
|
|
||||||
|
Args:
|
||||||
|
content: File content that may contain container paths.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Content with container paths resolved to local paths (forward slashes).
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
|
||||||
|
sorted_mappings = sorted(self.path_mappings, key=lambda m: len(m.container_path), reverse=True)
|
||||||
|
if not sorted_mappings:
|
||||||
|
return content
|
||||||
|
|
||||||
|
patterns = [re.escape(m.container_path) + r"(?=/|$|[^\w./-])(?:/[^\s\"';&|<>()]*)?" for m in sorted_mappings]
|
||||||
|
pattern = re.compile("|".join(f"({p})" for p in patterns))
|
||||||
|
|
||||||
|
def replace_match(match: re.Match) -> str:
|
||||||
|
matched_path = match.group(0)
|
||||||
|
resolved = self._resolve_path(matched_path)
|
||||||
|
# Normalize to forward slashes so that Windows backslash paths
|
||||||
|
# don't create invalid escape sequences in source files.
|
||||||
|
return resolved.replace("\\", "/")
|
||||||
|
|
||||||
|
return pattern.sub(replace_match, content)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _get_shell() -> str:
|
def _get_shell() -> str:
|
||||||
"""Detect available shell executable with fallback."""
|
"""Detect available shell executable with fallback."""
|
||||||
@ -280,7 +316,14 @@ class LocalSandbox(Sandbox):
|
|||||||
resolved_path = self._resolve_path(path)
|
resolved_path = self._resolve_path(path)
|
||||||
try:
|
try:
|
||||||
with open(resolved_path, encoding="utf-8") as f:
|
with open(resolved_path, encoding="utf-8") as f:
|
||||||
return f.read()
|
content = f.read()
|
||||||
|
# Only reverse-resolve paths in files that were previously written
|
||||||
|
# by write_file (agent-authored content). User-uploaded files,
|
||||||
|
# external tool output, and other non-agent content should not be
|
||||||
|
# silently rewritten — see discussion on PR #1935.
|
||||||
|
if resolved_path in self._agent_written_paths:
|
||||||
|
content = self._reverse_resolve_paths_in_output(content)
|
||||||
|
return content
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||||
raise type(e)(e.errno, e.strerror, path) from None
|
raise type(e)(e.errno, e.strerror, path) from None
|
||||||
@ -293,9 +336,16 @@ class LocalSandbox(Sandbox):
|
|||||||
dir_path = os.path.dirname(resolved_path)
|
dir_path = os.path.dirname(resolved_path)
|
||||||
if dir_path:
|
if dir_path:
|
||||||
os.makedirs(dir_path, exist_ok=True)
|
os.makedirs(dir_path, exist_ok=True)
|
||||||
|
# Resolve container paths in content to local paths
|
||||||
|
# using the content-specific resolver (forward-slash safe)
|
||||||
|
resolved_content = self._resolve_paths_in_content(content)
|
||||||
mode = "a" if append else "w"
|
mode = "a" if append else "w"
|
||||||
with open(resolved_path, mode, encoding="utf-8") as f:
|
with open(resolved_path, mode, encoding="utf-8") as f:
|
||||||
f.write(content)
|
f.write(resolved_content)
|
||||||
|
# Track this path so read_file knows to reverse-resolve on read.
|
||||||
|
# Only agent-written files get reverse-resolved; user uploads and
|
||||||
|
# external tool output are left untouched.
|
||||||
|
self._agent_written_paths.add(resolved_path)
|
||||||
except OSError as e:
|
except OSError as e:
|
||||||
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
# Re-raise with the original path for clearer error messages, hiding internal resolved paths
|
||||||
raise type(e)(e.errno, e.strerror, path) from None
|
raise type(e)(e.errno, e.strerror, path) from None
|
||||||
|
|||||||
@ -363,6 +363,98 @@ class TestLocalSandboxProviderMounts:
|
|||||||
|
|
||||||
assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"]
|
assert [m.container_path for m in provider._path_mappings] == ["/mnt/skills"]
|
||||||
|
|
||||||
|
def test_write_file_resolves_container_paths_in_content(self, tmp_path):
|
||||||
|
"""write_file should replace container paths in file content with local paths."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
sandbox.write_file(
|
||||||
|
"/mnt/data/script.py",
|
||||||
|
'import pathlib\npath = "/mnt/data/output"\nprint(path)',
|
||||||
|
)
|
||||||
|
written = (data_dir / "script.py").read_text()
|
||||||
|
# Container path should be resolved to local path (forward slashes)
|
||||||
|
assert str(data_dir).replace("\\", "/") in written
|
||||||
|
assert "/mnt/data/output" not in written
|
||||||
|
|
||||||
|
def test_write_file_uses_forward_slashes_on_windows_paths(self, tmp_path):
|
||||||
|
"""Resolved paths in content should always use forward slashes."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
sandbox.write_file(
|
||||||
|
"/mnt/data/config.py",
|
||||||
|
'DATA_DIR = "/mnt/data/files"',
|
||||||
|
)
|
||||||
|
written = (data_dir / "config.py").read_text()
|
||||||
|
# Must not contain backslashes that could break escape sequences
|
||||||
|
assert "\\" not in written.split("DATA_DIR = ")[1].split("\n")[0]
|
||||||
|
|
||||||
|
def test_read_file_reverse_resolves_local_paths_in_agent_written_files(self, tmp_path):
|
||||||
|
"""read_file should convert local paths back to container paths in agent-written files."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# Use write_file so the path is tracked as agent-written
|
||||||
|
sandbox.write_file("/mnt/data/info.txt", "File located at: /mnt/data/info.txt")
|
||||||
|
|
||||||
|
content = sandbox.read_file("/mnt/data/info.txt")
|
||||||
|
assert "/mnt/data/info.txt" in content
|
||||||
|
|
||||||
|
def test_read_file_does_not_reverse_resolve_non_agent_files(self, tmp_path):
|
||||||
|
"""read_file should NOT rewrite paths in user-uploaded or external files."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
# Write directly to filesystem (simulates user upload or external tool output)
|
||||||
|
local_path = str(data_dir).replace("\\", "/")
|
||||||
|
(data_dir / "config.yml").write_text(f"output_dir: {local_path}/outputs")
|
||||||
|
|
||||||
|
content = sandbox.read_file("/mnt/data/config.yml")
|
||||||
|
# Content should be returned as-is, NOT reverse-resolved
|
||||||
|
assert local_path in content
|
||||||
|
|
||||||
|
def test_write_then_read_roundtrip(self, tmp_path):
|
||||||
|
"""Container paths survive a write → read roundtrip."""
|
||||||
|
data_dir = tmp_path / "data"
|
||||||
|
data_dir.mkdir()
|
||||||
|
|
||||||
|
sandbox = LocalSandbox(
|
||||||
|
"test",
|
||||||
|
[
|
||||||
|
PathMapping(container_path="/mnt/data", local_path=str(data_dir)),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
original = 'cfg = {"path": "/mnt/data/config.json", "flag": true}'
|
||||||
|
sandbox.write_file("/mnt/data/settings.py", original)
|
||||||
|
result = sandbox.read_file("/mnt/data/settings.py")
|
||||||
|
# The container path should be preserved through roundtrip
|
||||||
|
assert "/mnt/data/config.json" in result
|
||||||
|
|
||||||
def test_setup_path_mappings_normalizes_container_path_trailing_slash(self, tmp_path):
|
def test_setup_path_mappings_normalizes_container_path_trailing_slash(self, tmp_path):
|
||||||
skills_dir = tmp_path / "skills"
|
skills_dir = tmp_path / "skills"
|
||||||
skills_dir.mkdir()
|
skills_dir.mkdir()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user