From 5fd2c581f6b775d693f01f88ddb68694c56921d9 Mon Sep 17 00:00:00 2001 From: yangzheli <43645580+yangzheli@users.noreply.github.com> Date: Mon, 6 Apr 2026 15:09:57 +0800 Subject: [PATCH] fix: add output truncation to ls_tool to prevent context window overflow (#1896) ls_tool was the only sandbox tool without output size limits, allowing multi-MB results from large directories to blow up the model context window. Add head-truncation (configurable via ls_output_max_chars, default 20000) consistent with existing bash and read_file truncation. Closes #1887 Co-authored-by: Claude Opus 4.6 --- .../harness/deerflow/config/sandbox_config.py | 5 ++ .../harness/deerflow/sandbox/tools.py | 33 ++++++++- backend/tests/test_tool_output_truncation.py | 71 ++++++++++++++++++- config.example.yaml | 3 +- 4 files changed, 109 insertions(+), 3 deletions(-) diff --git a/backend/packages/harness/deerflow/config/sandbox_config.py b/backend/packages/harness/deerflow/config/sandbox_config.py index 0634ce7b9..d9aac4ab4 100644 --- a/backend/packages/harness/deerflow/config/sandbox_config.py +++ b/backend/packages/harness/deerflow/config/sandbox_config.py @@ -74,5 +74,10 @@ class SandboxConfig(BaseModel): ge=0, description="Maximum characters to keep from read_file tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.", ) + ls_output_max_chars: int = Field( + default=20000, + ge=0, + description="Maximum characters to keep from ls tool output. Output exceeding this limit is head-truncated. Set to 0 to disable truncation.", + ) model_config = ConfigDict(extra="allow") diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index b52131ff4..089fa725d 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -963,6 +963,29 @@ def _truncate_read_file_output(output: str, max_chars: int) -> str: return f"{output[:kept]}{marker}" +def _truncate_ls_output(output: str, max_chars: int) -> str: + """Head-truncate ls output, preserving the beginning of the listing. + + Directory listings are read top-to-bottom; the head shows the most + relevant structure. + + The returned string (including the truncation marker) is guaranteed to be + no longer than max_chars characters. Pass max_chars=0 to disable truncation + and return the full output unchanged. + """ + if max_chars == 0: + return output + if len(output) <= max_chars: + return output + total = len(output) + marker_max_len = len(f"\n... [truncated: showing first {total} of {total} chars. Use a more specific path to see fewer results] ...") + kept = max(0, max_chars - marker_max_len) + if kept == 0: + return output[:max_chars] + marker = f"\n... [truncated: showing first {kept} of {total} chars. Use a more specific path to see fewer results] ..." + return f"{output[:kept]}{marker}" + + @tool("bash", parse_docstring=True) def bash_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, command: str) -> str: """Execute a bash command in a Linux environment. @@ -1037,7 +1060,15 @@ def ls_tool(runtime: ToolRuntime[ContextT, ThreadState], description: str, path: children = sandbox.list_dir(path) if not children: return "(empty)" - return "\n".join(children) + output = "\n".join(children) + try: + from deerflow.config.app_config import get_app_config + + sandbox_cfg = get_app_config().sandbox + max_chars = sandbox_cfg.ls_output_max_chars if sandbox_cfg else 20000 + except Exception: + max_chars = 20000 + return _truncate_ls_output(output, max_chars) except SandboxError as e: return f"Error: {e}" except FileNotFoundError: diff --git a/backend/tests/test_tool_output_truncation.py b/backend/tests/test_tool_output_truncation.py index e76bb20e2..519af66a0 100644 --- a/backend/tests/test_tool_output_truncation.py +++ b/backend/tests/test_tool_output_truncation.py @@ -3,9 +3,10 @@ These functions truncate long tool outputs to prevent context window overflow. - _truncate_bash_output: middle-truncation (head + tail), for bash tool - _truncate_read_file_output: head-truncation, for read_file tool +- _truncate_ls_output: head-truncation, for ls tool """ -from deerflow.sandbox.tools import _truncate_bash_output, _truncate_read_file_output +from deerflow.sandbox.tools import _truncate_bash_output, _truncate_ls_output, _truncate_read_file_output # --------------------------------------------------------------------------- # _truncate_bash_output @@ -159,3 +160,71 @@ class TestTruncateReadFileOutput: for max_chars in [100, 1000, 5000, 20000, 49999]: result = _truncate_read_file_output(output, max_chars) assert len(result) <= max_chars, f"failed for max_chars={max_chars}" + + +# --------------------------------------------------------------------------- +# _truncate_ls_output +# --------------------------------------------------------------------------- + + +class TestTruncateLsOutput: + def test_short_output_returned_unchanged(self): + output = "dir1\ndir2\nfile1.txt" + assert _truncate_ls_output(output, 20000) == output + + def test_output_equal_to_limit_returned_unchanged(self): + output = "X" * 20000 + assert _truncate_ls_output(output, 20000) == output + + def test_long_output_is_truncated(self): + output = "\n".join(f"file_{i}.txt" for i in range(5000)) + result = _truncate_ls_output(output, 20000) + assert len(result) < len(output) + + def test_result_never_exceeds_max_chars(self): + output = "\n".join(f"subdir/file_{i}.txt" for i in range(5000)) + max_chars = 20000 + result = _truncate_ls_output(output, max_chars) + assert len(result) <= max_chars + + def test_head_is_preserved(self): + head = "first_dir\nsecond_dir\n" + output = head + "\n".join(f"file_{i}" for i in range(5000)) + result = _truncate_ls_output(output, 20000) + assert result.startswith(head) + + def test_truncation_marker_present(self): + output = "\n".join(f"file_{i}.txt" for i in range(5000)) + result = _truncate_ls_output(output, 20000) + assert "[truncated:" in result + assert "showing first" in result + + def test_total_chars_reported_correctly(self): + output = "X" * 30000 + result = _truncate_ls_output(output, 20000) + assert "of 30000 chars" in result + + def test_hint_suggests_specific_path(self): + output = "X" * 30000 + result = _truncate_ls_output(output, 20000) + assert "Use a more specific path" in result + + def test_max_chars_zero_disables_truncation(self): + output = "\n".join(f"file_{i}.txt" for i in range(10000)) + assert _truncate_ls_output(output, 0) == output + + def test_tail_is_not_preserved(self): + output = "H" * 20000 + "TAIL_SHOULD_NOT_APPEAR" + result = _truncate_ls_output(output, 20000) + assert "TAIL_SHOULD_NOT_APPEAR" not in result + + def test_small_max_chars_does_not_crash(self): + output = "\n".join(f"file_{i}.txt" for i in range(100)) + result = _truncate_ls_output(output, 10) + assert len(result) <= 10 + + def test_result_never_exceeds_max_chars_various_sizes(self): + output = "\n".join(f"file_{i}.txt" for i in range(5000)) + for max_chars in [100, 1000, 5000, 20000, len(output) - 1]: + result = _truncate_ls_output(output, max_chars) + assert len(result) <= max_chars, f"failed for max_chars={max_chars}" diff --git a/config.example.yaml b/config.example.yaml index d6f382591..380527f42 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -392,10 +392,11 @@ sandbox: # Tool output truncation limits (characters). # bash uses middle-truncation (head + tail) since errors can appear anywhere in the output. - # read_file uses head-truncation since source code context is front-loaded. + # read_file and ls use head-truncation since their content is front-loaded. # Set to 0 to disable truncation. bash_output_max_chars: 20000 read_file_output_max_chars: 50000 + ls_output_max_chars: 20000 # Option 2: Container-based AIO Sandbox # Executes commands in isolated containers (Docker or Apple Container)