mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-06-09 09:02:02 +00:00
* fix(dev): create backend/sandbox before uvicorn reload-exclude (#3459) #3426 switched the dev gateway's --reload-exclude patterns to absolute paths. uvicorn only excludes an absolute path directly when it already exists as a directory; otherwise it globs the pattern, and Python 3.12's pathlib raises NotImplementedError("Non-relative patterns are unsupported") for an absolute glob pattern. serve.sh mkdir'd the .deer-flow excludes but not backend/sandbox, so `make dev` crashed on startup on a fresh checkout under Python 3.12 (#3454). docker/dev-entrypoint.sh had the same latent gap. Create backend/sandbox in both launchers so every absolute exclude stays on uvicorn's is_dir() short-circuit. Add a regression test that pins the uvicorn mechanism (crash on missing dir, safe once created) and enforces that every absolute --reload-exclude is mkdir'd before launch. Closes #3459 * test(dev): harden reload-exclude invariant parser against false pass/negatives The launcher invariant test parsed shell with a "mkdir -p" line filter and a substring membership check. Two latent gaps (sub-threshold for this fix, but this code guards a user-facing startup path, so close them): - A `\`-continued multi-line `mkdir` would drop arguments on continuation lines, silently weakening coverage. - Substring membership could false-pass when an exclude is a path-prefix of a different created dir (e.g. `/app/backend/sandbox` "found" inside `/app/backend/sandbox-other`). Fold line-continuations, drop comments, and shlex-tokenize each `mkdir` argument list into an exact set (quotes stripped, `$VAR` literal); assert exact set membership. Same shlex handling for `--reload-exclude` values. Verified the parser still flags the pre-fix missing `backend/sandbox` (RED preserved) and no longer false-passes on a path-prefix. * fix(dev): gitignore backend/sandbox runtime dir + pin mkdir-before-launch Address two review findings on the #3459 fix: - backend/sandbox was described as "gitignored runtime state" but no ignore rule actually matched it. Add an anchored `/sandbox/` to backend/.gitignore (anchored so it does NOT shadow the source package backend/packages/harness/deerflow/sandbox/) so sandbox artifacts created at runtime can't pollute the working tree or be committed by accident. New test asserts content under backend/sandbox is ignored, making the claim verifiable. - The launcher invariant test only proved the sandbox mkdir exists somewhere, not that it runs before uvicorn starts. Add an order test (sandbox mkdir line must precede the `uv run uvicorn` launch) so a future edit can't move the mkdir below the launch and silently reintroduce the crash. * Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> * test(dev): fix reload-exclude parser to handle serve.sh's quoted flag bundle The previous autofix tokenized each whole line with shlex, but serve.sh packs every flag into a single double-quoted `GATEWAY_EXTRA_FLAGS="..."` assignment. shlex collapses that into one token, so no `--reload-exclude` flag is found and `test_launcher_precreates_every_absolute_reload_exclude[scripts/serve.sh]` failed CI with "expected at least one absolute reload-exclude". Parse `--reload-exclude` with a regex that matches a balanced single/double quoted group or a bare token, so the assignment's surrounding `"` is never swallowed into the value. This recovers all three serve.sh excludes (the prior regex also silently dropped the last `$BACKEND_RUNTIME_HOME` because the adjacent closing quote broke shlex) while still covering dev-entrypoint.sh and the space-separated `--reload-exclude <value>` form. --------- Co-authored-by: Willem Jiang <willem.jiang@gmail.com> Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
117 lines
3.8 KiB
Python
117 lines
3.8 KiB
Python
"""Unit tests for docker/dev-entrypoint.sh (UV_EXTRAS validation + parsing).
|
|
|
|
Exercises the script via its `--print-extras` dry-run hook so we don't actually
|
|
launch uvicorn or hit /app/logs. Together with test_detect_uv_extras.py these
|
|
cover both the local make-dev path and the docker-compose-dev path with the
|
|
same shape — see PR #2767 / Issue #2754.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import os
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
import pytest
|
|
|
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
|
ENTRYPOINT = REPO_ROOT / "docker" / "dev-entrypoint.sh"
|
|
|
|
|
|
def _run(uv_extras: str | None) -> subprocess.CompletedProcess[str]:
|
|
"""Invoke `dev-entrypoint.sh --print-extras` with UV_EXTRAS set."""
|
|
env = os.environ.copy()
|
|
env.pop("UV_EXTRAS", None)
|
|
if uv_extras is not None:
|
|
env["UV_EXTRAS"] = uv_extras
|
|
return subprocess.run(
|
|
["sh", str(ENTRYPOINT), "--print-extras"],
|
|
env=env,
|
|
capture_output=True,
|
|
text=True,
|
|
check=False,
|
|
)
|
|
|
|
|
|
def test_entrypoint_script_exists_and_is_posix_sh():
|
|
assert ENTRYPOINT.is_file()
|
|
# Catch syntax errors before runtime — `sh -n` is a parse-only check.
|
|
proc = subprocess.run(["sh", "-n", str(ENTRYPOINT)], capture_output=True, text=True, check=False)
|
|
assert proc.returncode == 0, proc.stderr
|
|
|
|
|
|
def test_entrypoint_excludes_runtime_state_from_uvicorn_reload():
|
|
content = ENTRYPOINT.read_text(encoding="utf-8")
|
|
|
|
assert ': "${DEER_FLOW_HOME:=/app/backend/.deer-flow}"' in content
|
|
# sandbox must be created too, not just .deer-flow (#3459 / #3454).
|
|
assert 'mkdir -p "$DEER_FLOW_HOME" /app/backend/.deer-flow /app/backend/sandbox' in content
|
|
assert "--reload-include='*.yaml .env'" not in content
|
|
assert "--reload-include='*.yaml'" in content
|
|
assert "--reload-include='.env'" in content
|
|
assert "--reload-exclude=/app/backend/sandbox" in content
|
|
assert '--reload-exclude="$DEER_FLOW_HOME"' in content
|
|
assert "--reload-exclude=/app/backend/.deer-flow" in content
|
|
|
|
|
|
def test_no_uv_extras_yields_empty_flags():
|
|
proc = _run(None)
|
|
assert proc.returncode == 0
|
|
assert proc.stdout.strip() == ""
|
|
|
|
|
|
def test_single_extra():
|
|
proc = _run("postgres")
|
|
assert proc.returncode == 0
|
|
assert proc.stdout.strip() == "--extra postgres"
|
|
|
|
|
|
def test_multi_extra_comma_separated():
|
|
proc = _run("postgres,ollama")
|
|
assert proc.returncode == 0
|
|
assert proc.stdout.strip() == "--extra postgres --extra ollama"
|
|
|
|
|
|
def test_multi_extra_whitespace_separated():
|
|
proc = _run("postgres ollama")
|
|
assert proc.returncode == 0
|
|
assert proc.stdout.strip() == "--extra postgres --extra ollama"
|
|
|
|
|
|
def test_multi_extra_mixed_separators():
|
|
proc = _run(" postgres , ollama ,")
|
|
assert proc.returncode == 0
|
|
assert proc.stdout.strip() == "--extra postgres --extra ollama"
|
|
|
|
|
|
def test_empty_string_yields_empty_flags():
|
|
proc = _run("")
|
|
assert proc.returncode == 0
|
|
assert proc.stdout.strip() == ""
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"bad_value",
|
|
[
|
|
"; rm -rf /", # the canonical injection attempt
|
|
"$(whoami)", # command substitution
|
|
"`echo bad`", # backticks
|
|
"postgres;evil", # mixed legal+illegal in a single token
|
|
"1postgres", # leading digit
|
|
"-postgres", # leading hyphen
|
|
"post gres extra/path", # contains slash
|
|
],
|
|
)
|
|
def test_metacharacters_abort_with_nonzero_exit(bad_value):
|
|
proc = _run(bad_value)
|
|
assert proc.returncode != 0, f"expected abort for {bad_value!r}, got 0"
|
|
assert "is invalid" in proc.stderr
|
|
assert proc.stdout.strip() == ""
|
|
|
|
|
|
def test_underscores_and_hyphens_in_name_are_allowed():
|
|
"""Mirrors uv's accepted shape for `[project.optional-dependencies]` keys."""
|
|
proc = _run("post_gres,post-gres")
|
|
assert proc.returncode == 0
|
|
assert proc.stdout.strip() == "--extra post_gres --extra post-gres"
|