From 9ca68ffaaa00e53784c84cc9c6cd18144e33efc4 Mon Sep 17 00:00:00 2001 From: Evan Wu <850123119@qq.com> Date: Sun, 5 Apr 2026 15:52:22 +0800 Subject: [PATCH] fix: preserve virtual path separator style (#1828) * fix: preserve virtual path separator style * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --------- Co-authored-by: Willem Jiang Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../harness/deerflow/sandbox/tools.py | 16 +++-- backend/tests/test_sandbox_tools_security.py | 63 +++++++++++++++++++ 2 files changed, 75 insertions(+), 4 deletions(-) diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index cad88dc93..b52131ff4 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -366,12 +366,17 @@ def _path_variants(path: str) -> set[str]: return {path, path.replace("\\", "/"), path.replace("/", "\\")} +def _path_separator_for_style(path: str) -> str: + return "\\" if "\\" in path and "/" not in path else "/" + + def _join_path_preserving_style(base: str, relative: str) -> str: if not relative: return base - if "/" in base and "\\" not in base: - return f"{base.rstrip('/')}/{relative}" - return str(Path(base) / relative) + separator = _path_separator_for_style(base) + normalized_relative = relative.replace("\\" if separator == "/" else "/", separator).lstrip("/\\") + stripped_base = base.rstrip("/\\") + return f"{stripped_base}{separator}{normalized_relative}" def _sanitize_error(error: Exception, runtime: "ToolRuntime[ContextT, ThreadState] | None" = None) -> str: @@ -416,7 +421,10 @@ def replace_virtual_path(path: str, thread_data: ThreadDataState | None) -> str: return actual_base if path.startswith(f"{virtual_base}/"): rest = path[len(virtual_base) :].lstrip("/") - return _join_path_preserving_style(actual_base, rest) + result = _join_path_preserving_style(actual_base, rest) + if path.endswith("/") and not result.endswith(("/", "\\")): + result += _path_separator_for_style(actual_base) + return result return path diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py index 01aedf6be..268c5aada 100644 --- a/backend/tests/test_sandbox_tools_security.py +++ b/backend/tests/test_sandbox_tools_security.py @@ -42,6 +42,53 @@ def test_replace_virtual_path_maps_virtual_root_and_subpaths() -> None: assert Path(replace_virtual_path("/mnt/user-data", _THREAD_DATA)).as_posix() == "/tmp/deer-flow/threads/t1/user-data" +def test_replace_virtual_path_preserves_trailing_slash() -> None: + """Trailing slash must survive virtual-to-actual path replacement. + + Regression: '/mnt/user-data/workspace/' was previously returned without + the trailing slash, causing string concatenations like + output_dir + 'file.txt' to produce a missing-separator path. + """ + result = replace_virtual_path("/mnt/user-data/workspace/", _THREAD_DATA) + assert result.endswith("/"), f"Expected trailing slash, got: {result!r}" + assert result == "/tmp/deer-flow/threads/t1/user-data/workspace/" + + +def test_replace_virtual_path_preserves_trailing_slash_windows_style() -> None: + """Trailing slash must be preserved as backslash when actual_base is Windows-style. + + If actual_base uses backslash separators, appending '/' would produce a + mixed-separator path. The separator must match the style of actual_base. + """ + win_thread_data = { + "workspace_path": r"C:\deer-flow\threads\t1\user-data\workspace", + "uploads_path": r"C:\deer-flow\threads\t1\user-data\uploads", + "outputs_path": r"C:\deer-flow\threads\t1\user-data\outputs", + } + result = replace_virtual_path("/mnt/user-data/workspace/", win_thread_data) + assert result.endswith("\\"), f"Expected trailing backslash for Windows path, got: {result!r}" + assert "/" not in result, f"Mixed separators in Windows path: {result!r}" + + +def test_replace_virtual_path_preserves_windows_style_for_nested_subdir_trailing_slash() -> None: + """Nested Windows-style subdirectories must keep backslashes throughout.""" + win_thread_data = { + "workspace_path": r"C:\deer-flow\threads\t1\user-data\workspace", + "uploads_path": r"C:\deer-flow\threads\t1\user-data\uploads", + "outputs_path": r"C:\deer-flow\threads\t1\user-data\outputs", + } + result = replace_virtual_path("/mnt/user-data/workspace/subdir/", win_thread_data) + assert result == "C:\\deer-flow\\threads\\t1\\user-data\\workspace\\subdir\\" + assert "/" not in result, f"Mixed separators in Windows path: {result!r}" + + +def test_replace_virtual_paths_in_command_preserves_trailing_slash() -> None: + """Trailing slash on a virtual path inside a command must be preserved.""" + cmd = """python -c "output_dir = '/mnt/user-data/workspace/'; print(output_dir + 'some_file.txt')\"""" + result = replace_virtual_paths_in_command(cmd, _THREAD_DATA) + assert "/tmp/deer-flow/threads/t1/user-data/workspace/" in result, f"Trailing slash lost in: {result!r}" + + # ---------- mask_local_paths_in_output ---------- @@ -257,6 +304,22 @@ def test_validate_local_bash_command_paths_blocks_host_paths() -> None: validate_local_bash_command_paths("cat /etc/passwd", _THREAD_DATA) +def test_validate_local_bash_command_paths_allows_https_urls() -> None: + """URLs like https://github.com/... must not be flagged as unsafe absolute paths.""" + validate_local_bash_command_paths( + "cd /mnt/user-data/workspace && git clone https://github.com/CherryHQ/cherry-studio.git", + _THREAD_DATA, + ) + + +def test_validate_local_bash_command_paths_allows_http_urls() -> None: + """HTTP URLs must not be flagged as unsafe absolute paths.""" + validate_local_bash_command_paths( + "curl http://example.com/file.tar.gz -o /mnt/user-data/workspace/file.tar.gz", + _THREAD_DATA, + ) + + def test_validate_local_bash_command_paths_allows_virtual_and_system_paths() -> None: validate_local_bash_command_paths( "/bin/echo ok > /mnt/user-data/workspace/out.txt && cat /dev/null",