fix(backend): make loop detection hash tool calls by stable keys (#1911)

* fix(backend): make loop detection hash tool calls by stable keys The loop detection middleware previously hashed full tool call arguments, which made repeated calls look different when only non-essential argument details changed. In particular, `read_file` calls with nearby line ranges could bypass repetition detection even when the agent was effectively reading the same file region again and again. - Hash tool calls using stable keys instead of the full raw args payload - Bucket `read_file` line ranges so nearby reads map to the same region key - Prefer stable identifiers such as `path`, `url`, `query`, or `command` before falling back to JSON serialization of args - Keep hashing order-independent so the same tool call set produces the same hash regardless of call order Fixes #1905 * fix(backend): harden loop detection hash normalization - Normalize and parse stringified tool args defensively - Expand stable key derivation to include pattern, glob, and cmd - Normalize reversed read_file ranges before bucketing Fixes #1905 * fix(backend): harden loop detection tool format * exclude write_file and str_replace from the stable-key path — writing different content to the same file shouldn't be flagged. --------- Co-authored-by: JeffJiang <for-eleven@hotmail.com>
2026-07-22 14:08:28 +00:00 · 2026-04-07 17:46:33 +08:00 · 2026-04-07 17:46:33 +08:00 · c3170f22da
commit c3170f22da
parent 1193ac64dc
2 changed files with 143 additions and 17 deletions
--- a/backend/packages/harness/deerflow/agents/middlewares/loop_detection_middleware.py
+++ b/backend/packages/harness/deerflow/agents/middlewares/loop_detection_middleware.py
@ -33,30 +33,92 @@ _DEFAULT_WINDOW_SIZE = 20  # track last N tool calls
 _DEFAULT_MAX_TRACKED_THREADS = 100  # LRU eviction limit


+def _normalize_tool_call_args(raw_args: object) -> tuple[dict, str | None]:
+    """Normalize tool call args to a dict plus an optional fallback key.
+
+    Some providers serialize ``args`` as a JSON string instead of a dict.
+    We defensively parse those cases so loop detection does not crash while
+    still preserving a stable fallback key for non-dict payloads.
+    """
+    if isinstance(raw_args, dict):
+        return raw_args, None
+
+    if isinstance(raw_args, str):
+        try:
+            parsed = json.loads(raw_args)
+        except (TypeError, ValueError, json.JSONDecodeError):
+            return {}, raw_args
+
+        if isinstance(parsed, dict):
+            return parsed, None
+        return {}, json.dumps(parsed, sort_keys=True, default=str)
+
+    if raw_args is None:
+        return {}, None
+
+    return {}, json.dumps(raw_args, sort_keys=True, default=str)
+
+
+def _stable_tool_key(name: str, args: dict, fallback_key: str | None) -> str:
+    """Derive a stable key from salient args without overfitting to noise."""
+    if name == "read_file" and fallback_key is None:
+        path = args.get("path") or ""
+        start_line = args.get("start_line")
+        end_line = args.get("end_line")
+
+        bucket_size = 200
+        try:
+            start_line = int(start_line) if start_line is not None else 1
+        except (TypeError, ValueError):
+            start_line = 1
+        try:
+            end_line = int(end_line) if end_line is not None else start_line
+        except (TypeError, ValueError):
+            end_line = start_line
+
+        start_line, end_line = sorted((start_line, end_line))
+        bucket_start = max(start_line, 1)
+        bucket_end = max(end_line, 1)
+        bucket_start = (bucket_start - 1) // bucket_size
+        bucket_end = (bucket_end - 1) // bucket_size
+        return f"{path}:{bucket_start}-{bucket_end}"
+
+    # write_file / str_replace are content-sensitive: same path may be updated
+    # with different payloads during iteration. Using only salient fields (path)
+    # can collapse distinct calls, so we hash full args to reduce false positives.
+    if name in {"write_file", "str_replace"}:
+        if fallback_key is not None:
+            return fallback_key
+        return json.dumps(args, sort_keys=True, default=str)
+
+    salient_fields = ("path", "url", "query", "command", "pattern", "glob", "cmd")
+    stable_args = {field: args[field] for field in salient_fields if args.get(field) is not None}
+    if stable_args:
+        return json.dumps(stable_args, sort_keys=True, default=str)
+
+    if fallback_key is not None:
+        return fallback_key
+
+    return json.dumps(args, sort_keys=True, default=str)
+
+
 def _hash_tool_calls(tool_calls: list[dict]) -> str:
-    """Deterministic hash of a set of tool calls (name + args).
+    """Deterministic hash of a set of tool calls (name + stable key).

    This is intended to be order-independent: the same multiset of tool calls
    should always produce the same hash, regardless of their input order.
    """
-    # First normalize each tool call to a minimal (name, args) structure.
-    normalized: list[dict] = []
+    # Normalize each tool call to a stable (name, key) structure.
+    normalized: list[str] = []
    for tc in tool_calls:
-        normalized.append(
-            {
-                "name": tc.get("name", ""),
-                "args": tc.get("args", {}),
-            }
-        )
+        name = tc.get("name", "")
+        args, fallback_key = _normalize_tool_call_args(tc.get("args", {}))
+        key = _stable_tool_key(name, args, fallback_key)

-    # Sort by both name and a deterministic serialization of args so that
-    # permutations of the same multiset of calls yield the same ordering.
-    normalized.sort(
-        key=lambda tc: (
-            tc["name"],
-            json.dumps(tc["args"], sort_keys=True, default=str),
-        )
-    )
+        normalized.append(f"{name}:{key}")
+
+    # Sort so permutations of the same multiset of calls yield the same ordering.
+    normalized.sort()
    blob = json.dumps(normalized, sort_keys=True, default=str)
    return hashlib.md5(blob.encode()).hexdigest()[:12]

--- a/backend/tests/test_loop_detection_middleware.py
+++ b/backend/tests/test_loop_detection_middleware.py
@ -55,6 +55,70 @@ class TestHashToolCalls:
        assert isinstance(h, str)
        assert len(h) > 0

+    def test_stringified_dict_args_match_dict_args(self):
+        dict_call = {
+            "name": "read_file",
+            "args": {"path": "/tmp/demo.py", "start_line": "1", "end_line": "150"},
+        }
+        string_call = {
+            "name": "read_file",
+            "args": '{"path":"/tmp/demo.py","start_line":"1","end_line":"150"}',
+        }
+
+        assert _hash_tool_calls([dict_call]) == _hash_tool_calls([string_call])
+
+    def test_reversed_read_file_range_matches_forward_range(self):
+        forward_call = {
+            "name": "read_file",
+            "args": {"path": "/tmp/demo.py", "start_line": 10, "end_line": 300},
+        }
+        reversed_call = {
+            "name": "read_file",
+            "args": {"path": "/tmp/demo.py", "start_line": 300, "end_line": 10},
+        }
+
+        assert _hash_tool_calls([forward_call]) == _hash_tool_calls([reversed_call])
+
+    def test_stringified_non_dict_args_do_not_crash(self):
+        non_dict_json_call = {"name": "bash", "args": '"echo hello"'}
+        plain_string_call = {"name": "bash", "args": "echo hello"}
+
+        json_hash = _hash_tool_calls([non_dict_json_call])
+        plain_hash = _hash_tool_calls([plain_string_call])
+
+        assert isinstance(json_hash, str)
+        assert isinstance(plain_hash, str)
+        assert json_hash
+        assert plain_hash
+
+    def test_grep_pattern_affects_hash(self):
+        grep_foo = {"name": "grep", "args": {"path": "/tmp", "pattern": "foo"}}
+        grep_bar = {"name": "grep", "args": {"path": "/tmp", "pattern": "bar"}}
+
+        assert _hash_tool_calls([grep_foo]) != _hash_tool_calls([grep_bar])
+
+    def test_glob_pattern_affects_hash(self):
+        glob_py = {"name": "glob", "args": {"path": "/tmp", "pattern": "*.py"}}
+        glob_ts = {"name": "glob", "args": {"path": "/tmp", "pattern": "*.ts"}}
+
+        assert _hash_tool_calls([glob_py]) != _hash_tool_calls([glob_ts])
+
+    def test_write_file_content_affects_hash(self):
+        v1 = {"name": "write_file", "args": {"path": "/tmp/a.py", "content": "v1"}}
+        v2 = {"name": "write_file", "args": {"path": "/tmp/a.py", "content": "v2"}}
+        assert _hash_tool_calls([v1]) != _hash_tool_calls([v2])
+
+    def test_str_replace_content_affects_hash(self):
+        a = {
+            "name": "str_replace",
+            "args": {"path": "/tmp/a.py", "old_str": "foo", "new_str": "bar"},
+        }
+        b = {
+            "name": "str_replace",
+            "args": {"path": "/tmp/a.py", "old_str": "foo", "new_str": "baz"},
+        }
+        assert _hash_tool_calls([a]) != _hash_tool_calls([b])
+

 class TestLoopDetection:
    def test_no_tool_calls_returns_none(self):