From 718dddde75c00541e079dab9c1892c44b7a65e6b Mon Sep 17 00:00:00 2001 From: Jin Date: Fri, 10 Apr 2026 22:55:53 +0800 Subject: [PATCH] fix(sandbox): prevent memory leak in file operation locks using WeakValueDictionary (#2096) * fix(sandbox): prevent memory leak in file operation locks using WeakValueDictionary * lint: fix lint issue in sandbox tools security --- .../deerflow/sandbox/file_operation_lock.py | 6 +++- backend/tests/test_sandbox_tools_security.py | 36 +++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/backend/packages/harness/deerflow/sandbox/file_operation_lock.py b/backend/packages/harness/deerflow/sandbox/file_operation_lock.py index 2464015c0..b834e9d1c 100644 --- a/backend/packages/harness/deerflow/sandbox/file_operation_lock.py +++ b/backend/packages/harness/deerflow/sandbox/file_operation_lock.py @@ -1,8 +1,12 @@ import threading +import weakref from deerflow.sandbox.sandbox import Sandbox -_FILE_OPERATION_LOCKS: dict[tuple[str, str], threading.Lock] = {} +# Use WeakValueDictionary to prevent memory leak in long-running processes. +# Locks are automatically removed when no longer referenced by any thread. +_LockKey = tuple[str, str] +_FILE_OPERATION_LOCKS: weakref.WeakValueDictionary[_LockKey, threading.Lock] = weakref.WeakValueDictionary() _FILE_OPERATION_LOCKS_GUARD = threading.Lock() diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py index 268c5aada..8c67cd50a 100644 --- a/backend/tests/test_sandbox_tools_security.py +++ b/backend/tests/test_sandbox_tools_security.py @@ -1018,3 +1018,39 @@ def test_str_replace_and_append_on_same_path_should_preserve_both_updates(monkey assert failures == [] assert sandbox.content == "ALPHA\ntail\n" + + +def test_file_operation_lock_memory_cleanup() -> None: + """Verify that released locks are eventually cleaned up by WeakValueDictionary. + + This ensures that the sandbox component doesn't leak memory over time when + operating on many unique file paths. + """ + import gc + + from deerflow.sandbox.file_operation_lock import _FILE_OPERATION_LOCKS, get_file_operation_lock + + class MockSandbox: + id = "test_cleanup_sandbox" + + test_path = "/tmp/deer-flow/memory_leak_test_file.txt" + lock_key = (MockSandbox.id, test_path) + + # 确保测试开始前 key 不存在 + assert lock_key not in _FILE_OPERATION_LOCKS + + def _use_lock_and_release() -> None: + # Create and acquire the lock within this scope + lock = get_file_operation_lock(MockSandbox(), test_path) + with lock: + pass + # As soon as this function returns, the local 'lock' variable is destroyed. + # Its reference count goes to zero, triggering WeakValueDictionary cleanup. + + _use_lock_and_release() + + # Force a garbage collection to be absolutely sure + gc.collect() + + # 检查特定 key 是否被清理(而不是检查总长度) + assert lock_key not in _FILE_OPERATION_LOCKS