diff --git a/backend/packages/harness/deerflow/sandbox/tools.py b/backend/packages/harness/deerflow/sandbox/tools.py index 72b8da834..73534eb2d 100644 --- a/backend/packages/harness/deerflow/sandbox/tools.py +++ b/backend/packages/harness/deerflow/sandbox/tools.py @@ -22,6 +22,9 @@ from deerflow.sandbox.security import LOCAL_HOST_BASH_DISABLED_MESSAGE, is_host_ _ABSOLUTE_PATH_PATTERN = re.compile(r"(?()]+)") _FILE_URL_PATTERN = re.compile(r"\bfile://\S+", re.IGNORECASE) +_URL_WITH_SCHEME_PATTERN = re.compile(r"^[a-z][a-z0-9+.-]*://", re.IGNORECASE) +_URL_IN_COMMAND_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"'`;&|<>()]+", re.IGNORECASE) +_DOTDOT_PATH_SEGMENT_PATTERN = re.compile(r"(?:^|[/\\=])\.\.(?:$|[/\\])") _LOCAL_BASH_SYSTEM_PATH_PREFIXES = ( "/bin/", "/usr/bin/", @@ -37,6 +40,42 @@ _DEFAULT_GLOB_MAX_RESULTS = 200 _MAX_GLOB_MAX_RESULTS = 1000 _DEFAULT_GREP_MAX_RESULTS = 100 _MAX_GREP_MAX_RESULTS = 500 +_LOCAL_BASH_CWD_COMMANDS = {"cd", "pushd"} +_LOCAL_BASH_COMMAND_WRAPPERS = {"command", "builtin"} +_LOCAL_BASH_COMMAND_PREFIX_KEYWORDS = {"!", "{", "case", "do", "elif", "else", "for", "if", "select", "then", "time", "until", "while"} +_LOCAL_BASH_COMMAND_END_KEYWORDS = {"}", "done", "esac", "fi"} +_LOCAL_BASH_ROOT_PATH_COMMANDS = { + "awk", + "cat", + "cp", + "du", + "find", + "grep", + "head", + "less", + "ln", + "ls", + "more", + "mv", + "rm", + "sed", + "tail", + "tar", +} +_SHELL_COMMAND_SEPARATORS = {";", "&&", "||", "|", "|&", "&", "(", ")"} +_SHELL_REDIRECTION_OPERATORS = { + "<", + ">", + "<<", + ">>", + "<<<", + "<>", + ">&", + "<&", + "&>", + "&>>", + ">|", +} def _get_skills_container_path() -> str: @@ -635,6 +674,214 @@ def _resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState return str(resolved) +def _is_non_file_url_token(token: str) -> bool: + """Return True for URL tokens that should not be interpreted as paths.""" + values = [token] + if "=" in token: + values.append(token.split("=", 1)[1]) + + for value in values: + match = _URL_WITH_SCHEME_PATTERN.match(value) + if match and not value.lower().startswith("file://"): + return True + return False + + +def _non_file_url_spans(command: str) -> list[tuple[int, int]]: + spans = [] + for match in _URL_IN_COMMAND_PATTERN.finditer(command): + if not match.group().lower().startswith("file://"): + spans.append(match.span()) + return spans + + +def _is_in_spans(position: int, spans: list[tuple[int, int]]) -> bool: + return any(start <= position < end for start, end in spans) + + +def _has_dotdot_path_segment(token: str) -> bool: + if _is_non_file_url_token(token): + return False + return bool(_DOTDOT_PATH_SEGMENT_PATTERN.search(token)) + + +def _split_shell_tokens(command: str) -> list[str]: + try: + normalized = command.replace("\r\n", "\n").replace("\r", "\n").replace("\n", " ; ") + lexer = shlex.shlex(normalized, posix=True, punctuation_chars=True) + lexer.whitespace_split = True + lexer.commenters = "" + return list(lexer) + except ValueError: + # The shell will reject malformed quoting later; keep validation + # best-effort instead of turning syntax errors into security messages. + return command.split() + + +def _is_shell_command_separator(token: str) -> bool: + return token in _SHELL_COMMAND_SEPARATORS + + +def _is_shell_redirection_operator(token: str) -> bool: + return token in _SHELL_REDIRECTION_OPERATORS + + +def _is_shell_assignment(token: str) -> bool: + name, separator, _ = token.partition("=") + if not separator or not name: + return False + return bool(re.fullmatch(r"[A-Za-z_][A-Za-z0-9_]*", name)) + + +def _is_allowed_local_bash_absolute_path(path: str, allowed_paths: list[str], *, allow_system_paths: bool) -> bool: + # Check for MCP filesystem server allowed paths + if any(path.startswith(allowed_path) or path == allowed_path.rstrip("/") for allowed_path in allowed_paths): + _reject_path_traversal(path) + return True + + if path == VIRTUAL_PATH_PREFIX or path.startswith(f"{VIRTUAL_PATH_PREFIX}/"): + _reject_path_traversal(path) + return True + + # Allow skills container path (resolved by tools.py before passing to sandbox) + if _is_skills_path(path): + _reject_path_traversal(path) + return True + + # Allow ACP workspace path (path-traversal check only) + if _is_acp_workspace_path(path): + _reject_path_traversal(path) + return True + + # Allow custom mount container paths + if _is_custom_mount_path(path): + _reject_path_traversal(path) + return True + + if allow_system_paths and any(path == prefix.rstrip("/") or path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES): + return True + + return False + + +def _next_cd_target(tokens: list[str], start_index: int) -> tuple[str | None, int]: + index = start_index + while index < len(tokens): + token = tokens[index] + if _is_shell_command_separator(token): + return None, index + if _is_shell_redirection_operator(token): + index += 2 + continue + if token == "--": + index += 1 + continue + if token in {"-L", "-P", "-e", "-@"}: + index += 1 + continue + if token.startswith("-") and token != "-": + index += 1 + continue + return token, index + 1 + return None, index + + +def _validate_local_bash_cwd_target(command_name: str, target: str | None, allowed_paths: list[str]) -> None: + if target is None or target == "-": + raise PermissionError(f"Unsafe working directory change in command: {command_name}. Use paths under {VIRTUAL_PATH_PREFIX}") + if target.startswith(("$", "`")): + raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}") + if target.startswith("~"): + raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}") + if target.startswith("/"): + _reject_path_traversal(target) + if not _is_allowed_local_bash_absolute_path(target, allowed_paths, allow_system_paths=False): + raise PermissionError(f"Unsafe working directory change in command: {command_name} {target}. Use paths under {VIRTUAL_PATH_PREFIX}") + + +def _looks_like_unsafe_cwd_target(target: str | None) -> bool: + if target is None: + return False + return target == "-" or target.startswith(("$", "`", "~", "/", "..")) or _has_dotdot_path_segment(target) + + +def _validate_local_bash_root_path_args(command_name: str, tokens: list[str], start_index: int) -> None: + if command_name not in _LOCAL_BASH_ROOT_PATH_COMMANDS: + return + + index = start_index + while index < len(tokens): + token = tokens[index] + if _is_shell_command_separator(token): + return + if _is_shell_redirection_operator(token): + index += 2 + continue + if token == "/" and not _is_non_file_url_token(token): + raise PermissionError(f"Unsafe absolute paths in command: /. Use paths under {VIRTUAL_PATH_PREFIX}") + index += 1 + + +def _validate_local_bash_shell_tokens(command: str, allowed_paths: list[str]) -> None: + """Conservatively reject relative path escapes missed by absolute-path scanning.""" + if re.search(r"\$\([^)]*\b(?:cd|pushd)\b", command): + raise PermissionError(f"Unsafe working directory change in command substitution. Use paths under {VIRTUAL_PATH_PREFIX}") + + tokens = _split_shell_tokens(command) + + for token in tokens: + if _is_shell_command_separator(token) or _is_shell_redirection_operator(token): + continue + if _has_dotdot_path_segment(token): + raise PermissionError("Access denied: path traversal detected") + + at_command_start = True + index = 0 + while index < len(tokens): + token = tokens[index] + + if _is_shell_command_separator(token): + at_command_start = True + index += 1 + continue + + if _is_shell_redirection_operator(token): + index += 1 + continue + + if at_command_start and _is_shell_assignment(token): + index += 1 + continue + + command_name = token.rsplit("/", 1)[-1] + if at_command_start and command_name in _LOCAL_BASH_COMMAND_PREFIX_KEYWORDS | _LOCAL_BASH_COMMAND_END_KEYWORDS: + index += 1 + continue + + if not at_command_start: + index += 1 + continue + + at_command_start = False + if command_name in _LOCAL_BASH_COMMAND_WRAPPERS and index + 1 < len(tokens): + wrapped_name = tokens[index + 1].rsplit("/", 1)[-1] + if wrapped_name in _LOCAL_BASH_CWD_COMMANDS: + target, next_index = _next_cd_target(tokens, index + 2) + _validate_local_bash_cwd_target(wrapped_name, target, allowed_paths) + index = next_index + continue + _validate_local_bash_root_path_args(wrapped_name, tokens, index + 2) + + if command_name not in _LOCAL_BASH_CWD_COMMANDS: + _validate_local_bash_root_path_args(command_name, tokens, index + 1) + index += 1 + continue + + target, next_index = _next_cd_target(tokens, index + 1) + _validate_local_bash_cwd_target(command_name, target, allowed_paths) + index = next_index + + def resolve_and_validate_user_data_path(path: str, thread_data: ThreadDataState) -> str: """Resolve a /mnt/user-data virtual path and validate it stays in bounds.""" return _resolve_and_validate_user_data_path(path, thread_data) @@ -665,33 +912,14 @@ def validate_local_bash_command_paths(command: str, thread_data: ThreadDataState unsafe_paths: list[str] = [] allowed_paths = _get_mcp_allowed_paths() + _validate_local_bash_shell_tokens(command, allowed_paths) + url_spans = _non_file_url_spans(command) - for absolute_path in _ABSOLUTE_PATH_PATTERN.findall(command): - # Check for MCP filesystem server allowed paths - if any(absolute_path.startswith(path) or absolute_path == path.rstrip("/") for path in allowed_paths): - _reject_path_traversal(absolute_path) + for match in _ABSOLUTE_PATH_PATTERN.finditer(command): + if _is_in_spans(match.start(), url_spans): continue - - if absolute_path == VIRTUAL_PATH_PREFIX or absolute_path.startswith(f"{VIRTUAL_PATH_PREFIX}/"): - _reject_path_traversal(absolute_path) - continue - - # Allow skills container path (resolved by tools.py before passing to sandbox) - if _is_skills_path(absolute_path): - _reject_path_traversal(absolute_path) - continue - - # Allow ACP workspace path (path-traversal check only) - if _is_acp_workspace_path(absolute_path): - _reject_path_traversal(absolute_path) - continue - - # Allow custom mount container paths - if _is_custom_mount_path(absolute_path): - _reject_path_traversal(absolute_path) - continue - - if any(absolute_path == prefix.rstrip("/") or absolute_path.startswith(prefix) for prefix in _LOCAL_BASH_SYSTEM_PATH_PREFIXES): + absolute_path = match.group() + if _is_allowed_local_bash_absolute_path(absolute_path, allowed_paths, allow_system_paths=True): continue unsafe_paths.append(absolute_path) diff --git a/backend/tests/test_sandbox_tools_security.py b/backend/tests/test_sandbox_tools_security.py index 8c67cd50a..57466a0fe 100644 --- a/backend/tests/test_sandbox_tools_security.py +++ b/backend/tests/test_sandbox_tools_security.py @@ -346,6 +346,104 @@ def test_validate_local_bash_command_paths_blocks_traversal_in_skills() -> None: ) +@pytest.mark.parametrize( + "command", + [ + "cat ../uploads/secret.txt", + "cat subdir/../../secret.txt", + "python script.py --input=../secret.txt", + "echo ok > ../outputs/result.txt", + ], +) +def test_validate_local_bash_command_paths_blocks_relative_dotdot_segments(command: str) -> None: + with pytest.raises(PermissionError, match="path traversal"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_cd_root_escape() -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths("cd / && cat etc/passwd", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_cd_parent_escape() -> None: + with pytest.raises(PermissionError, match="path traversal"): + validate_local_bash_command_paths("cd .. && cat etc/passwd", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_cd_env_var_escape() -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths("cd $HOME && cat .ssh/id_rsa", _THREAD_DATA) + + +def test_validate_local_bash_command_paths_blocks_multiline_cd_escape() -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths("echo ok\ncd $HOME && cat .ssh/id_rsa", _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + "command cd / && cat etc/passwd", + "builtin cd $HOME && cat .ssh/id_rsa", + "if cd $HOME; then cat .ssh/id_rsa; fi", + "{ cd /; cat etc/passwd; }", + 'echo "$(cd $HOME && cat .ssh/id_rsa)"', + ], +) +def test_validate_local_bash_command_paths_blocks_complex_cd_escapes(command: str) -> None: + with pytest.raises(PermissionError, match="Unsafe working directory"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + "ls /", + "ln -s / root && cat root/etc/passwd", + "command ls /", + ], +) +def test_validate_local_bash_command_paths_blocks_bare_root_path(command: str) -> None: + with pytest.raises(PermissionError, match="Unsafe absolute paths"): + validate_local_bash_command_paths(command, _THREAD_DATA) + + +@pytest.mark.parametrize( + "command", + [ + "echo cd /", + "printf '%s\\n' pushd /", + ], +) +def test_validate_local_bash_command_paths_allows_cd_words_as_arguments(command: str) -> None: + validate_local_bash_command_paths(command, _THREAD_DATA) + + +def test_validate_local_bash_command_paths_allows_workspace_relative_paths() -> None: + validate_local_bash_command_paths( + "mkdir -p reports && python script.py data/input.csv > reports/out.txt", + _THREAD_DATA, + ) + + +def test_validate_local_bash_command_paths_allows_cd_virtual_workspace_with_relative_paths() -> None: + validate_local_bash_command_paths( + "cd /mnt/user-data/workspace && cat data/input.csv > reports/out.txt", + _THREAD_DATA, + ) + + +def test_validate_local_bash_command_paths_allows_http_url_dotdot_segments() -> None: + validate_local_bash_command_paths( + "curl https://example.com/packages/../archive.tar.gz -o /mnt/user-data/workspace/archive.tar.gz", + _THREAD_DATA, + ) + validate_local_bash_command_paths( + "curl http://example.com/packages/../archive.tar.gz -o /mnt/user-data/workspace/archive.tar.gz", + _THREAD_DATA, + ) + + def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> None: runtime = SimpleNamespace( state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()}, @@ -367,6 +465,28 @@ def test_bash_tool_rejects_host_bash_when_local_sandbox_default(monkeypatch) -> assert "Host bash execution is disabled" in result +def test_bash_tool_blocks_relative_traversal_before_host_execution(monkeypatch) -> None: + runtime = SimpleNamespace( + state={"sandbox": {"sandbox_id": "local"}, "thread_data": _THREAD_DATA.copy()}, + context={"thread_id": "thread-1"}, + ) + + monkeypatch.setattr( + "deerflow.sandbox.tools.ensure_sandbox_initialized", + lambda runtime: SimpleNamespace(execute_command=lambda command: pytest.fail("unsafe command should not execute")), + ) + monkeypatch.setattr("deerflow.sandbox.tools.ensure_thread_directories_exist", lambda runtime: None) + monkeypatch.setattr("deerflow.sandbox.tools.is_host_bash_allowed", lambda: True) + + result = bash_tool.func( + runtime=runtime, + description="run command", + command="cat ../uploads/secret.txt", + ) + + assert "path traversal" in result + + # ---------- Skills path tests ----------