mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-05-03 07:18:25 +00:00
- config.example.yaml: deprecate standalone checkpointer section, activate unified database:sqlite as default (drives both checkpointer + app data) - New: test_thread_meta_repo.py (14 tests) — full ThreadMetaRepository coverage including check_access owner logic, list_by_owner pagination - Extended test_run_repository.py (+4 tests) — completion preserves fields, list ordering desc, limit, owner_none returns all - Extended test_run_journal.py (+8 tests) — on_chain_error, track_tokens=false, middleware no ai_message, unknown caller tokens, convenience fields, tool_error, non-summarization custom event - Extended test_run_event_store.py (+7 tests) — DB batch seq continuity, make_run_event_store factory (memory/db/jsonl/fallback/unknown) - Extended test_phase2b_integration.py (+4 tests) — create_or_reject persists, follow-up metadata, summarization in history, full DB-backed lifecycle - Fixed DB integration test to use proper fake objects (not MagicMock) for JSON-serializable metadata - 157 total Phase 2 tests pass, zero regressions Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
280 lines
11 KiB
Python
280 lines
11 KiB
Python
"""Phase 2-B integration tests.
|
|
|
|
End-to-end test: simulate a run's complete lifecycle, verify data
|
|
is correctly written to both RunStore and RunEventStore.
|
|
"""
|
|
|
|
import asyncio
|
|
from uuid import uuid4
|
|
|
|
import pytest
|
|
|
|
from deerflow.runtime.events.store.memory import MemoryRunEventStore
|
|
from deerflow.runtime.journal import RunJournal
|
|
from deerflow.runtime.runs.store.memory import MemoryRunStore
|
|
|
|
|
|
class _FakeMessage:
|
|
def __init__(self, content, usage):
|
|
self.content = content
|
|
self.tool_calls = []
|
|
self.response_metadata = {"model_name": "test-model"}
|
|
self.usage_metadata = usage
|
|
self.id = "test-msg-id"
|
|
|
|
def model_dump(self):
|
|
return {"type": "ai", "content": self.content, "id": self.id, "tool_calls": [], "usage_metadata": self.usage_metadata, "response_metadata": self.response_metadata}
|
|
|
|
|
|
class _FakeGeneration:
|
|
def __init__(self, message):
|
|
self.message = message
|
|
|
|
|
|
class _FakeLLMResult:
|
|
def __init__(self, content, usage):
|
|
self.generations = [[_FakeGeneration(_FakeMessage(content, usage))]]
|
|
|
|
|
|
def _make_llm_response(content="Hello", usage=None):
|
|
return _FakeLLMResult(content, usage)
|
|
|
|
|
|
class TestRunLifecycle:
|
|
@pytest.mark.anyio
|
|
async def test_full_run_lifecycle(self):
|
|
"""Simulate a complete run lifecycle with RunStore + RunEventStore."""
|
|
run_store = MemoryRunStore()
|
|
event_store = MemoryRunEventStore()
|
|
|
|
# 1. Create run
|
|
await run_store.put("r1", thread_id="t1", status="pending")
|
|
|
|
# 2. Write human_message
|
|
await event_store.put(
|
|
thread_id="t1",
|
|
run_id="r1",
|
|
event_type="human_message",
|
|
category="message",
|
|
content="What is AI?",
|
|
)
|
|
|
|
# 3. Simulate RunJournal callback sequence
|
|
on_complete_data = {}
|
|
|
|
def on_complete(**data):
|
|
on_complete_data.update(data)
|
|
|
|
journal = RunJournal("r1", "t1", event_store, on_complete=on_complete, flush_threshold=100)
|
|
journal.set_first_human_message("What is AI?")
|
|
|
|
# chain_start (top-level)
|
|
journal.on_chain_start({}, {"messages": ["What is AI?"]}, run_id=uuid4(), parent_run_id=None)
|
|
|
|
# llm_start + llm_end
|
|
llm_run_id = uuid4()
|
|
journal.on_llm_start({"name": "gpt-4"}, ["prompt"], run_id=llm_run_id, tags=["lead_agent"])
|
|
usage = {"input_tokens": 50, "output_tokens": 100, "total_tokens": 150}
|
|
journal.on_llm_end(_make_llm_response("AI is artificial intelligence.", usage=usage), run_id=llm_run_id, tags=["lead_agent"])
|
|
|
|
# chain_end (triggers on_complete + flush_sync which creates a task)
|
|
journal.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
|
|
await journal.flush()
|
|
# Let event loop process any pending flush tasks from _flush_sync
|
|
await asyncio.sleep(0.05)
|
|
|
|
# 4. Verify messages
|
|
messages = await event_store.list_messages("t1")
|
|
assert len(messages) == 2 # human + ai
|
|
assert messages[0]["event_type"] == "human_message"
|
|
assert messages[1]["event_type"] == "ai_message"
|
|
assert messages[1]["content"] == "AI is artificial intelligence."
|
|
|
|
# 5. Verify events
|
|
events = await event_store.list_events("t1", "r1")
|
|
event_types = {e["event_type"] for e in events}
|
|
assert "run_start" in event_types
|
|
assert "llm_start" in event_types
|
|
assert "llm_end" in event_types
|
|
assert "run_end" in event_types
|
|
|
|
# 6. Verify on_complete data
|
|
assert on_complete_data["total_tokens"] == 150
|
|
assert on_complete_data["llm_call_count"] == 1
|
|
assert on_complete_data["lead_agent_tokens"] == 150
|
|
assert on_complete_data["message_count"] == 1
|
|
assert on_complete_data["last_ai_message"] == "AI is artificial intelligence."
|
|
assert on_complete_data["first_human_message"] == "What is AI?"
|
|
|
|
@pytest.mark.anyio
|
|
async def test_run_with_tool_calls(self):
|
|
"""Simulate a run that uses tools."""
|
|
event_store = MemoryRunEventStore()
|
|
journal = RunJournal("r1", "t1", event_store, flush_threshold=100)
|
|
|
|
# tool_start + tool_end
|
|
journal.on_tool_start({"name": "web_search"}, '{"query": "AI"}', run_id=uuid4())
|
|
journal.on_tool_end("Search results...", run_id=uuid4(), name="web_search")
|
|
await journal.flush()
|
|
|
|
events = await event_store.list_events("t1", "r1")
|
|
assert len(events) == 2
|
|
assert events[0]["event_type"] == "tool_start"
|
|
assert events[1]["event_type"] == "tool_end"
|
|
|
|
@pytest.mark.anyio
|
|
async def test_multi_run_thread(self):
|
|
"""Multiple runs on the same thread maintain unified seq ordering."""
|
|
event_store = MemoryRunEventStore()
|
|
|
|
# Run 1
|
|
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
|
|
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
|
|
|
|
# Run 2
|
|
await event_store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message", content="Q2")
|
|
await event_store.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message", content="A2")
|
|
|
|
messages = await event_store.list_messages("t1")
|
|
assert len(messages) == 4
|
|
assert [m["seq"] for m in messages] == [1, 2, 3, 4]
|
|
assert messages[0]["run_id"] == "r1"
|
|
assert messages[2]["run_id"] == "r2"
|
|
|
|
@pytest.mark.anyio
|
|
async def test_runmanager_with_store_backing(self):
|
|
"""RunManager persists to RunStore when one is provided."""
|
|
from deerflow.runtime.runs.manager import RunManager
|
|
|
|
run_store = MemoryRunStore()
|
|
mgr = RunManager(store=run_store)
|
|
|
|
record = await mgr.create("t1", assistant_id="lead_agent")
|
|
# Verify persisted to store
|
|
row = await run_store.get(record.run_id)
|
|
assert row is not None
|
|
assert row["thread_id"] == "t1"
|
|
assert row["status"] == "pending"
|
|
|
|
# Status update
|
|
from deerflow.runtime.runs.schemas import RunStatus
|
|
|
|
await mgr.set_status(record.run_id, RunStatus.running)
|
|
row = await run_store.get(record.run_id)
|
|
assert row["status"] == "running"
|
|
|
|
@pytest.mark.anyio
|
|
async def test_runmanager_create_or_reject_persists(self):
|
|
"""create_or_reject also persists to store."""
|
|
from deerflow.runtime.runs.manager import RunManager
|
|
|
|
run_store = MemoryRunStore()
|
|
mgr = RunManager(store=run_store)
|
|
|
|
record = await mgr.create_or_reject("t1", "lead_agent", metadata={"key": "val"})
|
|
row = await run_store.get(record.run_id)
|
|
assert row is not None
|
|
assert row["status"] == "pending"
|
|
assert row["metadata"] == {"key": "val"}
|
|
|
|
@pytest.mark.anyio
|
|
async def test_follow_up_metadata_in_messages(self):
|
|
"""human_message metadata carries follow_up_to_run_id."""
|
|
event_store = MemoryRunEventStore()
|
|
|
|
# Run 1
|
|
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
|
|
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
|
|
|
|
# Run 2 (follow-up)
|
|
await event_store.put(
|
|
thread_id="t1",
|
|
run_id="r2",
|
|
event_type="human_message",
|
|
category="message",
|
|
content="Tell me more",
|
|
metadata={"follow_up_to_run_id": "r1"},
|
|
)
|
|
|
|
messages = await event_store.list_messages("t1")
|
|
assert len(messages) == 3
|
|
assert messages[2]["metadata"]["follow_up_to_run_id"] == "r1"
|
|
|
|
@pytest.mark.anyio
|
|
async def test_summarization_in_history(self):
|
|
"""summary message appears correctly in message history."""
|
|
event_store = MemoryRunEventStore()
|
|
|
|
await event_store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="Q1")
|
|
await event_store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="A1")
|
|
await event_store.put(thread_id="t1", run_id="r2", event_type="summary", category="message", content="Previous conversation summarized.", metadata={"replaced_count": 2})
|
|
await event_store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message", content="Q2")
|
|
await event_store.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message", content="A2")
|
|
|
|
messages = await event_store.list_messages("t1")
|
|
assert len(messages) == 5
|
|
assert messages[2]["event_type"] == "summary"
|
|
assert messages[2]["metadata"]["replaced_count"] == 2
|
|
|
|
@pytest.mark.anyio
|
|
async def test_db_backed_run_lifecycle(self, tmp_path):
|
|
"""Full lifecycle with SQLite-backed RunRepository + DbRunEventStore."""
|
|
from deerflow.persistence.engine import close_engine, get_session_factory, init_engine
|
|
from deerflow.persistence.repositories.run_repo import RunRepository
|
|
from deerflow.runtime.events.store.db import DbRunEventStore
|
|
from deerflow.runtime.runs.manager import RunManager
|
|
|
|
url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}"
|
|
await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path))
|
|
sf = get_session_factory()
|
|
|
|
run_store = RunRepository(sf)
|
|
event_store = DbRunEventStore(sf)
|
|
mgr = RunManager(store=run_store)
|
|
|
|
# Create run
|
|
record = await mgr.create("t1", "lead_agent")
|
|
run_id = record.run_id
|
|
|
|
# Write human_message
|
|
await event_store.put(thread_id="t1", run_id=run_id, event_type="human_message", category="message", content="Hello DB")
|
|
|
|
# Simulate journal
|
|
on_complete_data = {}
|
|
journal = RunJournal(run_id, "t1", event_store, on_complete=lambda **d: on_complete_data.update(d), flush_threshold=100)
|
|
journal.set_first_human_message("Hello DB")
|
|
|
|
journal.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None)
|
|
llm_rid = uuid4()
|
|
journal.on_llm_start({"name": "test"}, [], run_id=llm_rid, tags=["lead_agent"])
|
|
journal.on_llm_end(_make_llm_response("DB response", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}), run_id=llm_rid, tags=["lead_agent"])
|
|
journal.on_chain_end({}, run_id=uuid4(), parent_run_id=None)
|
|
await journal.flush()
|
|
await asyncio.sleep(0.05)
|
|
|
|
# Verify run persisted
|
|
row = await run_store.get(run_id)
|
|
assert row is not None
|
|
assert row["status"] == "pending" # RunManager set it, journal doesn't update status
|
|
|
|
# Update completion
|
|
await run_store.update_run_completion(run_id, status="success", **on_complete_data)
|
|
row = await run_store.get(run_id)
|
|
assert row["status"] == "success"
|
|
assert row["total_tokens"] == 15
|
|
|
|
# Verify messages from DB
|
|
messages = await event_store.list_messages("t1")
|
|
assert len(messages) == 2
|
|
assert messages[0]["event_type"] == "human_message"
|
|
assert messages[1]["event_type"] == "ai_message"
|
|
|
|
# Verify events from DB
|
|
events = await event_store.list_events("t1", run_id)
|
|
event_types = {e["event_type"] for e in events}
|
|
assert "run_start" in event_types
|
|
assert "llm_end" in event_types
|
|
assert "run_end" in event_types
|
|
|
|
await close_engine()
|