deer-flow/backend/tests/test_phase2b_integration.py
rayhpeng 2e4cb5c6a9 test+config: comprehensive Phase 2 test coverage + deprecate checkpointer config
- config.example.yaml: deprecate standalone checkpointer section, activate
  unified database:sqlite as default (drives both checkpointer + app data)
- New: test_thread_meta_repo.py (14 tests) — full ThreadMetaRepository coverage
  including check_access owner logic, list_by_owner pagination
- Extended test_run_repository.py (+4 tests) — completion preserves fields,
  list ordering desc, limit, owner_none returns all
- Extended test_run_journal.py (+8 tests) — on_chain_error, track_tokens=false,
  middleware no ai_message, unknown caller tokens, convenience fields,
  tool_error, non-summarization custom event
- Extended test_run_event_store.py (+7 tests) — DB batch seq continuity,
  make_run_event_store factory (memory/db/jsonl/fallback/unknown)
- Extended test_phase2b_integration.py (+4 tests) — create_or_reject persists,
  follow-up metadata, summarization in history, full DB-backed lifecycle
- Fixed DB integration test to use proper fake objects (not MagicMock)
  for JSON-serializable metadata
- 157 total Phase 2 tests pass, zero regressions

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 19:36:15 +08:00

280 lines
11 KiB
Python

"""Phase 2-B integration tests.
End-to-end test: simulate a run's complete lifecycle, verify data
is correctly written to both RunStore and RunEventStore.
"""
import asyncio
from uuid import uuid4
import pytest
from deerflow.runtime.events.store.memory import MemoryRunEventStore
from deerflow.runtime.journal import RunJournal
from deerflow.runtime.runs.store.memory import MemoryRunStore
class _FakeMessage:
def __init__(self, content, usage):
self.content = content
self.tool_calls = []
self.response_metadata = {"model_name": "test-model"}
self.usage_metadata = usage
self.id = "test-msg-id"
def model_dump(self):
return {"type": "ai", "content": self.content, "id": self.id, "tool_calls": [], "usage_metadata": self.usage_metadata, "response_metadata": self.response_metadata}
class _FakeGeneration:
def __init__(self, message):
self.message = message
class _FakeLLMResult:
def __init__(self, content, usage):
self.generations = [[_FakeGeneration(_FakeMessage(content, usage))]]
def _make_llm_response(content="Hello", usage=None):
return _FakeLLMResult(content, usage)
class TestRunLifecycle:
@pytest.mark.anyio
async def test_full_run_lifecycle(self):
"""Simulate a complete run lifecycle with RunStore + RunEventStore."""
run_store = MemoryRunStore()
event_store = MemoryRunEventStore()
# 1. Create run
await run_store.put("r1", thread_id="t1", status="pending")
# 2. Write human_message
await event_store.put(
thread_id="t1",
run_id="r1",
event_type="human_message",
category="message",
content="What is AI?",
)
# 3. Simulate RunJournal callback sequence
on_complete_data = {}
def on_complete(**data):
on_complete_data.update(data)
journal = RunJournal("r1", "t1", event_store, on_complete=on_complete, flush_threshold=100)
journal.set_first_human_message("What is AI?")
# chain_start (top-level)
journal.on_chain_start({}, {"messages": ["What is AI?"]}, run_id=uuid4(), parent_run_id=None)
# llm_start + llm_end
llm_run_id = uuid4()
journal.on_llm_start({"name": "gpt-4"}, ["prompt"], run_id=llm_run_id, tags=["lead_agent"])
usage = {"input_tokens": 50, "output_tokens": 100, "total_tokens": 150}
journal.on_llm_end(_make_llm_response("AI is artificial intelligence.", usage=usage), run_id=llm_run_id, tags=["lead_agent"])
# chain_end (triggers on_complete + flush_sync which creates a task)
journal.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
await journal.flush()
# Let event loop process any pending flush tasks from _flush_sync
await asyncio.sleep(0.05)
# 4. Verify messages
messages = await event_store.list_messages("t1")
assert len(messages) == 2 # human + ai
assert messages[0]["event_type"] == "human_message"
assert messages[1]["event_type"] == "ai_message"
assert messages[1]["content"] == "AI is artificial intelligence."
# 5. Verify events
events = await event_store.list_events("t1", "r1")
event_types = {e["event_type"] for e in events}
assert "run_start" in event_types
assert "llm_start" in event_types
assert "llm_end" in event_types
assert "run_end" in event_types
# 6. Verify on_complete data
assert on_complete_data["total_tokens"] == 150
assert on_complete_data["llm_call_count"] == 1
assert on_complete_data["lead_agent_tokens"] == 150
assert on_complete_data["message_count"] == 1
assert on_complete_data["last_ai_message"] == "AI is artificial intelligence."
assert on_complete_data["first_human_message"] == "What is AI?"
@pytest.mark.anyio
async def test_run_with_tool_calls(self):
"""Simulate a run that uses tools."""
event_store = MemoryRunEventStore()
journal = RunJournal("r1", "t1", event_store, flush_threshold=100)
# tool_start + tool_end
journal.on_tool_start({"name": "web_search"}, '{"query": "AI"}', run_id=uuid4())
journal.on_tool_end("Search results...", run_id=uuid4(), name="web_search")
await journal.flush()
events = await event_store.list_events("t1", "r1")
assert len(events) == 2
assert events[0]["event_type"] == "tool_start"
assert events[1]["event_type"] == "tool_end"
@pytest.mark.anyio
async def test_multi_run_thread(self):
"""Multiple runs on the same thread maintain unified seq ordering."""
event_store = MemoryRunEventStore()
# Run 1
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
# Run 2
await event_store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message", content="Q2")
await event_store.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message", content="A2")
messages = await event_store.list_messages("t1")
assert len(messages) == 4
assert [m["seq"] for m in messages] == [1, 2, 3, 4]
assert messages[0]["run_id"] == "r1"
assert messages[2]["run_id"] == "r2"
@pytest.mark.anyio
async def test_runmanager_with_store_backing(self):
"""RunManager persists to RunStore when one is provided."""
from deerflow.runtime.runs.manager import RunManager
run_store = MemoryRunStore()
mgr = RunManager(store=run_store)
record = await mgr.create("t1", assistant_id="lead_agent")
# Verify persisted to store
row = await run_store.get(record.run_id)
assert row is not None
assert row["thread_id"] == "t1"
assert row["status"] == "pending"
# Status update
from deerflow.runtime.runs.schemas import RunStatus
await mgr.set_status(record.run_id, RunStatus.running)
row = await run_store.get(record.run_id)
assert row["status"] == "running"
@pytest.mark.anyio
async def test_runmanager_create_or_reject_persists(self):
"""create_or_reject also persists to store."""
from deerflow.runtime.runs.manager import RunManager
run_store = MemoryRunStore()
mgr = RunManager(store=run_store)
record = await mgr.create_or_reject("t1", "lead_agent", metadata={"key": "val"})
row = await run_store.get(record.run_id)
assert row is not None
assert row["status"] == "pending"
assert row["metadata"] == {"key": "val"}
@pytest.mark.anyio
async def test_follow_up_metadata_in_messages(self):
"""human_message metadata carries follow_up_to_run_id."""
event_store = MemoryRunEventStore()
# Run 1
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
# Run 2 (follow-up)
await event_store.put(
thread_id="t1",
run_id="r2",
event_type="human_message",
category="message",
content="Tell me more",
metadata={"follow_up_to_run_id": "r1"},
)
messages = await event_store.list_messages("t1")
assert len(messages) == 3
assert messages[2]["metadata"]["follow_up_to_run_id"] == "r1"
@pytest.mark.anyio
async def test_summarization_in_history(self):
"""summary message appears correctly in message history."""
event_store = MemoryRunEventStore()
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
await event_store.put(thread_id="t1", run_id="r2", event_type="summary", category="message", content="Previous conversation summarized.", metadata={"replaced_count": 2})
await event_store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message", content="Q2")
await event_store.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message", content="A2")
messages = await event_store.list_messages("t1")
assert len(messages) == 5
assert messages[2]["event_type"] == "summary"
assert messages[2]["metadata"]["replaced_count"] == 2
@pytest.mark.anyio
async def test_db_backed_run_lifecycle(self, tmp_path):
"""Full lifecycle with SQLite-backed RunRepository + DbRunEventStore."""
from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
from deerflow.persistence.repositories.run_repo import RunRepository
from deerflow.runtime.events.store.db import DbRunEventStore
from deerflow.runtime.runs.manager import RunManager
url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
sf = get_session_factory()
run_store = RunRepository(sf)
event_store = DbRunEventStore(sf)
mgr = RunManager(store=run_store)
# Create run
record = await mgr.create("t1", "lead_agent")
run_id = record.run_id
# Write human_message
await event_store.put(thread_id="t1", run_id=run_id, event_type="human_message", category="message", content="Hello DB")
# Simulate journal
on_complete_data = {}
journal = RunJournal(run_id, "t1", event_store, on_complete=lambda **d: on_complete_data.update(d), flush_threshold=100)
journal.set_first_human_message("Hello DB")
journal.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None)
llm_rid = uuid4()
journal.on_llm_start({"name": "test"}, [], run_id=llm_rid, tags=["lead_agent"])
journal.on_llm_end(_make_llm_response("DB response", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}), run_id=llm_rid, tags=["lead_agent"])
journal.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
await journal.flush()
await asyncio.sleep(0.05)
# Verify run persisted
row = await run_store.get(run_id)
assert row is not None
assert row["status"] == "pending" # RunManager set it, journal doesn't update status
# Update completion
await run_store.update_run_completion(run_id, status="success", **on_complete_data)
row = await run_store.get(run_id)
assert row["status"] == "success"
assert row["total_tokens"] == 15
# Verify messages from DB
messages = await event_store.list_messages("t1")
assert len(messages) == 2
assert messages[0]["event_type"] == "human_message"
assert messages[1]["event_type"] == "ai_message"
# Verify events from DB
events = await event_store.list_events("t1", run_id)
event_types = {e["event_type"] for e in events}
assert "run_start" in event_types
assert "llm_end" in event_types
assert "run_end" in event_types
await close_engine()