deer-flow/backend/tests/test_persistence_scaffold.py
rayhpeng b94383c93a fix(persistence): address 22 review comments from CodeQL, Copilot, and Code Quality
Bug fixes:
- Sanitize log params to prevent log injection (CodeQL)
- Reset threads_meta.status to idle/error when run completes
- Attach messages only to latest checkpoint in /history response
- Write threads_meta on POST /threads so new threads appear in search

Lint fixes:
- Remove unused imports (journal.py, migrations/env.py, test_converters.py)
- Convert lambda to named function (engine.py, Ruff E731)
- Remove unused logger definitions in repos (Ruff F841)
- Add logging to JSONL decode errors and empty except blocks
- Separate assert side-effects in tests (CodeQL)
- Remove unused local variables in tests (Ruff F841)
- Fix max_trace_content truncation to use byte length, not char length

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 22:49:26 +08:00

233 lines
8.4 KiB
Python

"""Tests for the persistence layer scaffolding.
Tests:
1. DatabaseConfig property derivation (paths, URLs)
2. MemoryRunStore CRUD + owner_id filtering
3. Base.to_dict() via inspect mixin
4. Engine init/close lifecycle (memory + SQLite)
5. Postgres missing-dep error message
"""
from datetime import UTC, datetime
import pytest
from deerflow.config.database_config import DatabaseConfig
from deerflow.runtime.runs.store.memory import MemoryRunStore
# -- DatabaseConfig --
class TestDatabaseConfig:
def test_defaults(self):
c = DatabaseConfig()
assert c.backend == "memory"
assert c.pool_size == 5
def test_sqlite_paths_are_different(self):
c = DatabaseConfig(backend="sqlite", sqlite_dir="./mydata")
assert c.checkpointer_sqlite_path.endswith("checkpoints.db")
assert c.app_sqlite_path.endswith("app.db")
assert "mydata" in c.checkpointer_sqlite_path
assert c.checkpointer_sqlite_path != c.app_sqlite_path
def test_app_sqlalchemy_url_sqlite(self):
c = DatabaseConfig(backend="sqlite", sqlite_dir="./data")
url = c.app_sqlalchemy_url
assert url.startswith("sqlite+aiosqlite:///")
assert "app.db" in url
def test_app_sqlalchemy_url_postgres(self):
c = DatabaseConfig(
backend="postgres",
postgres_url="postgresql://u:p@h:5432/db",
)
url = c.app_sqlalchemy_url
assert url.startswith("postgresql+asyncpg://")
assert "u:p@h:5432/db" in url
def test_app_sqlalchemy_url_postgres_already_asyncpg(self):
c = DatabaseConfig(
backend="postgres",
postgres_url="postgresql+asyncpg://u:p@h:5432/db",
)
url = c.app_sqlalchemy_url
assert url.count("asyncpg") == 1
def test_memory_has_no_url(self):
c = DatabaseConfig(backend="memory")
with pytest.raises(ValueError, match="No SQLAlchemy URL"):
_ = c.app_sqlalchemy_url
# -- MemoryRunStore --
class TestMemoryRunStore:
@pytest.fixture
def store(self):
return MemoryRunStore()
@pytest.mark.anyio
async def test_put_and_get(self, store):
await store.put("r1", thread_id="t1", status="pending")
row = await store.get("r1")
assert row is not None
assert row["run_id"] == "r1"
assert row["status"] == "pending"
@pytest.mark.anyio
async def test_get_missing_returns_none(self, store):
assert await store.get("nope") is None
@pytest.mark.anyio
async def test_update_status(self, store):
await store.put("r1", thread_id="t1")
await store.update_status("r1", "running")
assert (await store.get("r1"))["status"] == "running"
@pytest.mark.anyio
async def test_update_status_with_error(self, store):
await store.put("r1", thread_id="t1")
await store.update_status("r1", "error", error="boom")
row = await store.get("r1")
assert row["status"] == "error"
assert row["error"] == "boom"
@pytest.mark.anyio
async def test_list_by_thread(self, store):
await store.put("r1", thread_id="t1")
await store.put("r2", thread_id="t1")
await store.put("r3", thread_id="t2")
rows = await store.list_by_thread("t1")
assert len(rows) == 2
assert all(r["thread_id"] == "t1" for r in rows)
@pytest.mark.anyio
async def test_list_by_thread_owner_filter(self, store):
await store.put("r1", thread_id="t1", owner_id="alice")
await store.put("r2", thread_id="t1", owner_id="bob")
rows = await store.list_by_thread("t1", owner_id="alice")
assert len(rows) == 1
assert rows[0]["owner_id"] == "alice"
@pytest.mark.anyio
async def test_owner_none_returns_all(self, store):
await store.put("r1", thread_id="t1", owner_id="alice")
await store.put("r2", thread_id="t1", owner_id="bob")
rows = await store.list_by_thread("t1", owner_id=None)
assert len(rows) == 2
@pytest.mark.anyio
async def test_delete(self, store):
await store.put("r1", thread_id="t1")
await store.delete("r1")
assert await store.get("r1") is None
@pytest.mark.anyio
async def test_delete_nonexistent_is_noop(self, store):
await store.delete("nope") # should not raise
@pytest.mark.anyio
async def test_list_pending(self, store):
await store.put("r1", thread_id="t1", status="pending")
await store.put("r2", thread_id="t1", status="running")
await store.put("r3", thread_id="t2", status="pending")
pending = await store.list_pending()
assert len(pending) == 2
assert all(r["status"] == "pending" for r in pending)
@pytest.mark.anyio
async def test_list_pending_respects_before(self, store):
past = "2020-01-01T00:00:00+00:00"
future = "2099-01-01T00:00:00+00:00"
await store.put("r1", thread_id="t1", status="pending", created_at=past)
await store.put("r2", thread_id="t1", status="pending", created_at=future)
pending = await store.list_pending(before=datetime.now(UTC).isoformat())
assert len(pending) == 1
assert pending[0]["run_id"] == "r1"
@pytest.mark.anyio
async def test_list_pending_fifo_order(self, store):
await store.put("r2", thread_id="t1", status="pending", created_at="2024-01-02T00:00:00+00:00")
await store.put("r1", thread_id="t1", status="pending", created_at="2024-01-01T00:00:00+00:00")
pending = await store.list_pending()
assert pending[0]["run_id"] == "r1"
# -- Base.to_dict mixin --
class TestBaseToDictMixin:
@pytest.mark.anyio
async def test_to_dict_and_exclude(self, tmp_path):
"""Create a temp SQLite DB with a minimal model, verify to_dict."""
from sqlalchemy import String
from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine
from sqlalchemy.orm import Mapped, mapped_column
from deerflow.persistence.base import Base
class _Tmp(Base):
__tablename__ = "_tmp_test"
id: Mapped[str] = mapped_column(String(64), primary_key=True)
name: Mapped[str] = mapped_column(String(128))
engine = create_async_engine(f"sqlite+aiosqlite:///{tmp_path / 'test.db'}")
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
sf = async_sessionmaker(engine, expire_on_commit=False)
async with sf() as session:
session.add(_Tmp(id="1", name="hello"))
await session.commit()
obj = await session.get(_Tmp, "1")
assert obj.to_dict() == {"id": "1", "name": "hello"}
assert obj.to_dict(exclude={"name"}) == {"id": "1"}
assert "_Tmp" in repr(obj)
await engine.dispose()
# -- Engine lifecycle --
class TestEngineLifecycle:
@pytest.mark.anyio
async def test_memory_is_noop(self):
from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
await init_engine("memory")
assert get_session_factory() is None
await close_engine()
@pytest.mark.anyio
async def test_sqlite_creates_engine(self, tmp_path):
from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
sf = get_session_factory()
assert sf is not None
async with sf() as session:
assert session is not None
await close_engine()
assert get_session_factory() is None
@pytest.mark.anyio
async def test_postgres_without_asyncpg_gives_actionable_error(self):
"""If asyncpg is not installed, error message tells user what to do."""
from deerflow.persistence.engine import init_engine
try:
import asyncpg # noqa: F401
pytest.skip("asyncpg is installed -- cannot test missing-dep path")
except ImportError:
# asyncpg is not installed — this is the expected state for this test.
# We proceed to verify that init_engine raises an actionable ImportError.
pass # noqa: S110 — intentionally ignored
with pytest.raises(ImportError, match="uv sync --extra postgres"):
await init_engine("postgres", url="postgresql+asyncpg://x:x@localhost/x")