From ca1b7d5f48bf46db80898af21d20f1da23ccdf69 Mon Sep 17 00:00:00 2001
From: Shawn Jasper <jzs6124@163.com>
Date: Sat, 18 Apr 2026 08:46:59 +0800
Subject: [PATCH] fix(sandbox): add missing path masking in ls_tool output
 (#2317)

ls_tool was the only file-system tool that did not call
mask_local_paths_in_output() before returning its result, causing host
absolute paths (e.g. /Users/.../backend/.deer-flow/knowledge-base/...)
to leak to the LLM instead of the expected virtual paths
(/mnt/knowledge-base/...).

This patch:
- Adds the mask_local_paths_in_output() call to ls_tool, consistent
  with bash_tool, glob_tool and grep_tool.
- Initialises thread_data = None before the is_local_sandbox branch
  (same pattern as glob_tool) so the variable is always in scope.
- Adds three new tests covering user-data path masking, skills path
  masking and the empty-directory edge case.
---
 .../harness/deerflow/sandbox/tools.py         |  3 +
 backend/tests/test_sandbox_search_tools.py    | 70 ++++++++++++++++++-
 2 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py
index 089fa725d..7b09358e7 100644
--- a/backend/packages/harness/deerflow/sandbox/tools.py
+++ b/backend/packages/harness/deerflow/sandbox/tools.py
@@ -1047,6 +1047,7 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
         sandbox = ensure_sandbox_initialized(runtime)
         ensure_thread_directories_exist(runtime)
         requested_path = path
+        thread_data = None
         if is_local_sandbox(runtime):
             thread_data = get_thread_data(runtime)
             validate_local_tool_path(path, thread_data, read_only=True)
@@ -1061,6 +1062,8 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path:
         if not children:
             return "(empty)"
         output = "\n".join(children)
+        if thread_data is not None:
+            output = mask_local_paths_in_output(output, thread_data)
         try:
             from deerflow.config.app_config import get_app_config
 
diff --git a/backend/tests/test_sandbox_search_tools.py b/backend/tests/test_sandbox_search_tools.py
index 6b6c686c4..88e87a783 100644
--- a/backend/tests/test_sandbox_search_tools.py
+++ b/backend/tests/test_sandbox_search_tools.py
@@ -4,7 +4,7 @@ from unittest.mock import patch
 from deerflow.community.aio_sandbox.aio_sandbox import AioSandbox
 from deerflow.sandbox.local.local_sandbox import LocalSandbox
 from deerflow.sandbox.search import GrepMatch, find_glob_matches, find_grep_matches
-from deerflow.sandbox.tools import glob_tool, grep_tool
+from deerflow.sandbox.tools import glob_tool, grep_tool, ls_tool
 
 
 def _make_runtime(tmp_path):
@@ -391,3 +391,71 @@ def test_aio_sandbox_grep_skips_mismatched_line_number_payloads(monkeypatch) ->
 
     assert matches == [GrepMatch(path="/mnt/user-data/workspace/app.py", line_number=7, line="TODO = True")]
     assert truncated is False
+
+
+# ---------------------------------------------------------------------------
+# ls_tool — path masking
+# ---------------------------------------------------------------------------
+
+
+def test_ls_tool_masks_user_data_host_paths(tmp_path, monkeypatch) -> None:
+    """ls_tool output must not leak host user-data paths; they should be virtual."""
+    runtime = _make_runtime(tmp_path)
+    workspace = tmp_path / "workspace"
+    (workspace / "report.txt").write_text("hello\n", encoding="utf-8")
+    (workspace / "subdir").mkdir()
+
+    monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local"))
+
+    result = ls_tool.func(
+        runtime=runtime,
+        description="list workspace",
+        path="/mnt/user-data/workspace",
+    )
+
+    # Virtual paths must be present
+    assert "/mnt/user-data/workspace" in result
+    # Host paths must NOT leak
+    assert str(workspace) not in result
+    assert str(tmp_path) not in result
+
+
+def test_ls_tool_masks_skills_host_paths(tmp_path, monkeypatch) -> None:
+    """ls_tool output must not leak host skills paths; they should be virtual."""
+    runtime = _make_runtime(tmp_path)
+    skills_dir = tmp_path / "skills"
+    (skills_dir / "public").mkdir(parents=True)
+    (skills_dir / "public" / "SKILL.md").write_text("# Skill\n", encoding="utf-8")
+
+    monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local"))
+
+    with (
+        patch("deerflow.sandbox.tools._get_skills_container_path", return_value="/mnt/skills"),
+        patch("deerflow.sandbox.tools._get_skills_host_path", return_value=str(skills_dir)),
+    ):
+        result = ls_tool.func(
+            runtime=runtime,
+            description="list skills",
+            path="/mnt/skills",
+        )
+
+    # Virtual paths must be present
+    assert "/mnt/skills" in result
+    # Host paths must NOT leak
+    assert str(skills_dir) not in result
+    assert str(tmp_path) not in result
+
+
+def test_ls_tool_returns_empty_for_empty_directory(tmp_path, monkeypatch) -> None:
+    """ls_tool should return '(empty)' for an empty directory."""
+    runtime = _make_runtime(tmp_path)
+
+    monkeypatch.setattr("deerflow.sandbox.tools.ensure_sandbox_initialized", lambda runtime: LocalSandbox(id="local"))
+
+    result = ls_tool.func(
+        runtime=runtime,
+        description="list empty dir",
+        path="/mnt/user-data/workspace",
+    )
+
+    assert result == "(empty)"