From ca1b7d5f48bf46db80898af21d20f1da23ccdf69 Mon Sep 17 00:00:00 2001 From: Shawn Jasper Date: Sat, 18 Apr 2026 08:46:59 +0800 Subject: [PATCH] fix(sandbox): add missing path masking in ls_tool output (#2317) ls_tool was the only file-system tool that did not call mask_local_paths_in_output() before returning its result, causing host absolute paths (e.g. /Users/.../backend/.deer-flow/knowledge-base/...) to leak to the LLM instead of the expected virtual paths (/mnt/knowledge-base/...). This patch: - Adds the mask_local_paths_in_output() call to ls_tool, consistent with bash_tool, glob_tool and grep_tool. - Initialises thread_data = None before the is_local_sandbox branch (same pattern as glob_tool) so the variable is always in scope. - Adds three new tests covering user-data path masking, skills path masking and the empty-directory edge case. --- .../harness/deerflow/sandbox/tools.py | 3 + backend/tests/test_sandbox_search_tools.py | 70 ++++++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index 089fa725d..7b09358e7 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -1047,6 +1047,7 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path: sandbox = ensure_sandbox_initialized(runtime) ensure_thread_directories_exist(runtime) requested_path = path + thread_data = None if is_local_sandbox(runtime): thread_data = get_thread_data(runtime) validate_local_tool_path(path, thread_data, read_only=True) @@ -1061,6 +1062,8 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path: if not children: return "(empty)" output = "\n".join(children) + if thread_data is not None: + output = mask_local_paths_in_output(output, thread_data) try: from deerflow.config.app_config import get_app_config diff --git a/backend/tests/test_sandbox_search_tools.py b/backend/tests/test_sandbox_search_tools.py index 6b6c686c4..88e87a783 100644 --- a/backend/tests/test_sandbox_search_tools.py +++ b/backend/tests/test_sandbox_search_tools.py @@ -4,7 +4,7 @@ from unittest.mock import patch from deerflow.community.aio_sandbox.aio_sandbox import AioSandbox from deerflow.sandbox.local.local_sandbox import LocalSandbox from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches -from deerflow.sandbox.tools import glob_tool, grep_tool +from deerflow.sandbox.tools import glob_tool, grep_tool, ls_tool def _make_runtime(tmp_path): @@ -391,3 +391,71 @@ def test_aio_sandbox_grep_skips_mismatched_line_number_payloads(monkeypatch) -> assert matches == [GrepMatch(path="/mnt/user-data/workspace/app.py", line_number=7, line="TODO = True")] assert truncated is False + + +# --------------------------------------------------------------------------- +# ls_tool — path masking +# --------------------------------------------------------------------------- + + +def test_ls_tool_masks_user_data_host_paths(tmp_path, monkeypatch) -> None: + """ls_tool output must not leak host user-data paths; they should be virtual.""" + runtime = _make_runtime(tmp_path) + workspace = tmp_path / "workspace" + (workspace / "report.txt").write_text("hello\n", encoding="utf-8") + (workspace / "subdir").mkdir() + + monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local")) + + result = ls_tool.func( + runtime=runtime, + description="list workspace", + path="/mnt/user-data/workspace", + ) + + # Virtual paths must be present + assert "/mnt/user-data/workspace" in result + # Host paths must NOT leak + assert str(workspace) not in result + assert str(tmp_path) not in result + + +def test_ls_tool_masks_skills_host_paths(tmp_path, monkeypatch) -> None: + """ls_tool output must not leak host skills paths; they should be virtual.""" + runtime = _make_runtime(tmp_path) + skills_dir = tmp_path / "skills" + (skills_dir / "public").mkdir(parents=True) + (skills_dir / "public" / "SKILL.md").write_text("# Skill\n", encoding="utf-8") + + monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local")) + + with ( + patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"), + patch("deerflow.sandbox.tools._get_skills_host_path", return_value=str(skills_dir)), + ): + result = ls_tool.func( + runtime=runtime, + description="list skills", + path="/mnt/skills", + ) + + # Virtual paths must be present + assert "/mnt/skills" in result + # Host paths must NOT leak + assert str(skills_dir) not in result + assert str(tmp_path) not in result + + +def test_ls_tool_returns_empty_for_empty_directory(tmp_path, monkeypatch) -> None: + """ls_tool should return '(empty)' for an empty directory.""" + runtime = _make_runtime(tmp_path) + + monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local")) + + result = ls_tool.func( + runtime=runtime, + description="list empty dir", + path="/mnt/user-data/workspace", + ) + + assert result == "(empty)"