diff --git a/.env.example b/.env.example index 5b11c82db..4a5251659 100644 --- a/.env.example +++ b/.env.example @@ -33,5 +33,9 @@ INFOQUEST_API_KEY=your-infoquest-api-key # GitHub API Token # GITHUB_TOKEN=your-github-token + +# Database (only needed when config.yaml has database.backend: postgres) +# DATABASE_URL=postgresql://deerflow:password@localhost:5432/deerflow +# # WECOM_BOT_ID=your-wecom-bot-id # WECOM_BOT_SECRET=your-wecom-bot-secret diff --git a/backend/Dockerfile b/backend/Dockerfile index c0f59d2f1..c046268d3 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -13,6 +13,9 @@ FROM python:3.12-slim-bookworm AS builder ARG NODE_MAJOR=22 ARG APT_MIRROR ARG UV_INDEX_URL +# Optional extras to install (e.g. "postgres" for PostgreSQL support) +# Usage: docker build --build-arg UV_EXTRAS=postgres ... +ARG UV_EXTRAS # Optionally override apt mirror for restricted networks (e.g. APT_MIRROR=mirrors.aliyun.com) RUN if [ -n "${APT_MIRROR}" ]; then \ @@ -43,8 +46,9 @@ WORKDIR /app COPY backend ./backend # Install dependencies with cache mount +# When UV_EXTRAS is set (e.g. "postgres"), installs optional dependencies. RUN --mount=type=cache,target=/root/.cache/uv \ - sh -c "cd backend && UV_INDEX_URL=${UV_INDEX_URL:-https://pypi.org/simple} uv sync" + sh -c "cd backend && UV_INDEX_URL=${UV_INDEX_URL:-https://pypi.org/simple} uv sync ${UV_EXTRAS:+--extra $UV_EXTRAS}" # ── Stage 2: Dev ────────────────────────────────────────────────────────────── # Retains compiler toolchain from builder so startup-time `uv sync` can build diff --git a/backend/app/gateway/app.py b/backend/app/gateway/app.py index 39d17498f..01606a8cb 100644 --- a/backend/app/gateway/app.py +++ b/backend/app/gateway/app.py @@ -11,6 +11,7 @@ from app.gateway.routers import ( artifacts, assistants_compat, channels, + feedback, mcp, memory, models, @@ -199,6 +200,9 @@ This gateway provides custom endpoints for models, MCP configuration, skills, an # Assistants compatibility API (LangGraph Platform stub) app.include_router(assistants_compat.router) + # Feedback API is mounted at /api/threads/{thread_id}/runs/{run_id}/feedback + app.include_router(feedback.router) + # Thread Runs API (LangGraph Platform-compatible runs lifecycle) app.include_router(thread_runs.router) diff --git a/backend/app/gateway/deps.py b/backend/app/gateway/deps.py index 115868331..bdcea365c 100644 --- a/backend/app/gateway/deps.py +++ b/backend/app/gateway/deps.py @@ -1,7 +1,8 @@ """Centralized accessors for singleton objects stored on ``app.state``. **Getters** (used by routers): raise 503 when a required dependency is -missing, except ``get_store`` which returns ``None``. +missing, except ``get_store`` and ``get_thread_meta_repo`` which return +``None``. Initialization is handled directly in ``app.py`` via :class:`AsyncExitStack`. """ @@ -13,7 +14,7 @@ from contextlib import AsyncExitStack, asynccontextmanager from fastapi import FastAPI, HTTPException, Request -from deerflow.runtime import RunManager, StreamBridge +from deerflow.runtime import RunContext, RunManager @asynccontextmanager @@ -26,45 +27,110 @@ async def langgraph_runtime(app: FastAPI) -> AsyncGenerator[None, None]: yield """ from deerflow.agents.checkpointer.async_provider import make_checkpointer + from deerflow.config import get_app_config + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine_from_config from deerflow.runtime import make_store, make_stream_bridge + from deerflow.runtime.events.store import make_run_event_store async with AsyncExitStack() as stack: app.state.stream_bridge = await stack.enter_async_context(make_stream_bridge()) + + # Initialize persistence engine BEFORE checkpointer so that + # auto-create-database logic runs first (postgres backend). + config = get_app_config() + await init_engine_from_config(config.database) + app.state.checkpointer = await stack.enter_async_context(make_checkpointer()) app.state.store = await stack.enter_async_context(make_store()) - app.state.run_manager = RunManager() - yield + + # Initialize repositories — one get_session_factory() call for all. + sf = get_session_factory() + if sf is not None: + from deerflow.persistence.feedback import FeedbackRepository + from deerflow.persistence.run import RunRepository + from deerflow.persistence.thread_meta import ThreadMetaRepository + + app.state.run_store = RunRepository(sf) + app.state.feedback_repo = FeedbackRepository(sf) + app.state.thread_meta_repo = ThreadMetaRepository(sf) + else: + from deerflow.persistence.thread_meta import MemoryThreadMetaStore + from deerflow.runtime.runs.store.memory import MemoryRunStore + + app.state.run_store = MemoryRunStore() + app.state.feedback_repo = None + app.state.thread_meta_repo = MemoryThreadMetaStore(app.state.store) + + # Run event store (has its own factory with config-driven backend selection) + run_events_config = getattr(config, "run_events", None) + app.state.run_event_store = make_run_event_store(run_events_config) + + # RunManager with store backing for persistence + app.state.run_manager = RunManager(store=app.state.run_store) + + try: + yield + finally: + await close_engine() # --------------------------------------------------------------------------- -# Getters – called by routers per-request +# Getters -- called by routers per-request # --------------------------------------------------------------------------- -def get_stream_bridge(request: Request) -> StreamBridge: - """Return the global :class:`StreamBridge`, or 503.""" - bridge = getattr(request.app.state, "stream_bridge", None) - if bridge is None: - raise HTTPException(status_code=503, detail="Stream bridge not available") - return bridge +def _require(attr: str, label: str): + """Create a FastAPI dependency that returns ``app.state.`` or 503.""" + + def dep(request: Request): + val = getattr(request.app.state, attr, None) + if val is None: + raise HTTPException(status_code=503, detail=f"{label} not available") + return val + + dep.__name__ = dep.__qualname__ = f"get_{attr}" + return dep -def get_run_manager(request: Request) -> RunManager: - """Return the global :class:`RunManager`, or 503.""" - mgr = getattr(request.app.state, "run_manager", None) - if mgr is None: - raise HTTPException(status_code=503, detail="Run manager not available") - return mgr - - -def get_checkpointer(request: Request): - """Return the global checkpointer, or 503.""" - cp = getattr(request.app.state, "checkpointer", None) - if cp is None: - raise HTTPException(status_code=503, detail="Checkpointer not available") - return cp +get_stream_bridge = _require("stream_bridge", "Stream bridge") +get_run_manager = _require("run_manager", "Run manager") +get_checkpointer = _require("checkpointer", "Checkpointer") +get_run_event_store = _require("run_event_store", "Run event store") +get_feedback_repo = _require("feedback_repo", "Feedback") +get_run_store = _require("run_store", "Run store") def get_store(request: Request): """Return the global store (may be ``None`` if not configured).""" return getattr(request.app.state, "store", None) + + +get_thread_meta_repo = _require("thread_meta_repo", "Thread metadata store") + + +def get_run_context(request: Request) -> RunContext: + """Build a :class:`RunContext` from ``app.state`` singletons. + + Returns a *base* context with infrastructure dependencies. Callers that + need per-run fields (e.g. ``follow_up_to_run_id``) should use + ``dataclasses.replace(ctx, follow_up_to_run_id=...)`` before passing it + to :func:`run_agent`. + """ + from deerflow.config import get_app_config + + return RunContext( + checkpointer=get_checkpointer(request), + store=get_store(request), + event_store=get_run_event_store(request), + run_events_config=getattr(get_app_config(), "run_events", None), + thread_meta_repo=get_thread_meta_repo(request), + ) + + +async def get_current_user(request: Request) -> str | None: + """Extract user identity from request. + + Phase 2: always returns None (no authentication). + Phase 3: extract user_id from JWT / session / API key header. + """ + return None diff --git a/backend/app/gateway/routers/feedback.py b/backend/app/gateway/routers/feedback.py new file mode 100644 index 000000000..579b29a9e --- /dev/null +++ b/backend/app/gateway/routers/feedback.py @@ -0,0 +1,127 @@ +"""Feedback endpoints — create, list, stats, delete. + +Allows users to submit thumbs-up/down feedback on runs, +optionally scoped to a specific message. +""" + +from __future__ import annotations + +import logging +from typing import Any + +from fastapi import APIRouter, HTTPException, Request +from pydantic import BaseModel, Field + +from app.gateway.deps import get_current_user, get_feedback_repo, get_run_store + +logger = logging.getLogger(__name__) +router = APIRouter(prefix="/api/threads", tags=["feedback"]) + + +# --------------------------------------------------------------------------- +# Request / response models +# --------------------------------------------------------------------------- + + +class FeedbackCreateRequest(BaseModel): + rating: int = Field(..., description="Feedback rating: +1 (positive) or -1 (negative)") + comment: str | None = Field(default=None, description="Optional text feedback") + message_id: str | None = Field(default=None, description="Optional: scope feedback to a specific message") + + +class FeedbackResponse(BaseModel): + feedback_id: str + run_id: str + thread_id: str + owner_id: str | None = None + message_id: str | None = None + rating: int + comment: str | None = None + created_at: str = "" + + +class FeedbackStatsResponse(BaseModel): + run_id: str + total: int = 0 + positive: int = 0 + negative: int = 0 + + +# --------------------------------------------------------------------------- +# Endpoints +# --------------------------------------------------------------------------- + + +@router.post("/{thread_id}/runs/{run_id}/feedback", response_model=FeedbackResponse) +async def create_feedback( + thread_id: str, + run_id: str, + body: FeedbackCreateRequest, + request: Request, +) -> dict[str, Any]: + """Submit feedback (thumbs-up/down) for a run.""" + if body.rating not in (1, -1): + raise HTTPException(status_code=400, detail="rating must be +1 or -1") + + user_id = await get_current_user(request) + + # Validate run exists and belongs to thread + run_store = get_run_store(request) + run = await run_store.get(run_id) + if run is None: + raise HTTPException(status_code=404, detail=f"Run {run_id} not found") + if run.get("thread_id") != thread_id: + raise HTTPException(status_code=404, detail=f"Run {run_id} not found in thread {thread_id}") + + feedback_repo = get_feedback_repo(request) + return await feedback_repo.create( + run_id=run_id, + thread_id=thread_id, + rating=body.rating, + owner_id=user_id, + message_id=body.message_id, + comment=body.comment, + ) + + +@router.get("/{thread_id}/runs/{run_id}/feedback", response_model=list[FeedbackResponse]) +async def list_feedback( + thread_id: str, + run_id: str, + request: Request, +) -> list[dict[str, Any]]: + """List all feedback for a run.""" + feedback_repo = get_feedback_repo(request) + return await feedback_repo.list_by_run(thread_id, run_id) + + +@router.get("/{thread_id}/runs/{run_id}/feedback/stats", response_model=FeedbackStatsResponse) +async def feedback_stats( + thread_id: str, + run_id: str, + request: Request, +) -> dict[str, Any]: + """Get aggregated feedback stats (positive/negative counts) for a run.""" + feedback_repo = get_feedback_repo(request) + return await feedback_repo.aggregate_by_run(thread_id, run_id) + + +@router.delete("/{thread_id}/runs/{run_id}/feedback/{feedback_id}") +async def delete_feedback( + thread_id: str, + run_id: str, + feedback_id: str, + request: Request, +) -> dict[str, bool]: + """Delete a feedback record.""" + feedback_repo = get_feedback_repo(request) + # Verify feedback belongs to the specified thread/run before deleting + existing = await feedback_repo.get(feedback_id) + if existing is None: + raise HTTPException(status_code=404, detail=f"Feedback {feedback_id} not found") + if existing.get("thread_id") != thread_id or existing.get("run_id") != run_id: + raise HTTPException(status_code=404, detail=f"Feedback {feedback_id} not found in run {run_id}") + deleted = await feedback_repo.delete(feedback_id) + if not deleted: + raise HTTPException(status_code=404, detail=f"Feedback {feedback_id} not found") + return {"success": True} diff --git a/backend/app/gateway/routers/thread_runs.py b/backend/app/gateway/routers/thread_runs.py index 105fc9ca6..a26bdfbf3 100644 --- a/backend/app/gateway/routers/thread_runs.py +++ b/backend/app/gateway/routers/thread_runs.py @@ -19,7 +19,7 @@ from fastapi import APIRouter, HTTPException, Query, Request from fastapi.responses import Response, StreamingResponse from pydantic import BaseModel, Field -from app.gateway.deps import get_checkpointer, get_run_manager, get_stream_bridge +from app.gateway.deps import get_checkpointer, get_run_event_store, get_run_manager, get_run_store, get_stream_bridge from app.gateway.services import sse_consumer, start_run from deerflow.runtime import RunRecord, serialize_channel_values @@ -53,6 +53,7 @@ class RunCreateRequest(BaseModel): after_seconds: float | None = Field(default=None, description="Delayed execution") if_not_exists: Literal["reject", "create"] = Field(default="create", description="Thread creation policy") feedback_keys: list[str] | None = Field(default=None, description="LangSmith feedback keys") + follow_up_to_run_id: str | None = Field(default=None, description="Run ID this message follows up on. Auto-detected from latest successful run if not provided.") class RunResponse(BaseModel): @@ -265,3 +266,50 @@ async def stream_existing_run( "X-Accel-Buffering": "no", }, ) + + +# --------------------------------------------------------------------------- +# Messages / Events / Token usage endpoints +# --------------------------------------------------------------------------- + + +@router.get("/{thread_id}/messages") +async def list_thread_messages( + thread_id: str, + request: Request, + limit: int = Query(default=50, le=200), + before_seq: int | None = Query(default=None), + after_seq: int | None = Query(default=None), +) -> list[dict]: + """Return displayable messages for a thread (across all runs).""" + event_store = get_run_event_store(request) + return await event_store.list_messages(thread_id, limit=limit, before_seq=before_seq, after_seq=after_seq) + + +@router.get("/{thread_id}/runs/{run_id}/messages") +async def list_run_messages(thread_id: str, run_id: str, request: Request) -> list[dict]: + """Return displayable messages for a specific run.""" + event_store = get_run_event_store(request) + return await event_store.list_messages_by_run(thread_id, run_id) + + +@router.get("/{thread_id}/runs/{run_id}/events") +async def list_run_events( + thread_id: str, + run_id: str, + request: Request, + event_types: str | None = Query(default=None), + limit: int = Query(default=500, le=2000), +) -> list[dict]: + """Return the full event stream for a run (debug/audit).""" + event_store = get_run_event_store(request) + types = event_types.split(",") if event_types else None + return await event_store.list_events(thread_id, run_id, event_types=types, limit=limit) + + +@router.get("/{thread_id}/token-usage") +async def thread_token_usage(thread_id: str, request: Request) -> dict: + """Thread-level token usage aggregation.""" + run_store = get_run_store(request) + agg = await run_store.aggregate_tokens_by_thread(thread_id) + return {"thread_id": thread_id, **agg} diff --git a/backend/app/gateway/routers/threads.py b/backend/app/gateway/routers/threads.py index 808604980..487bf5413 100644 --- a/backend/app/gateway/routers/threads.py +++ b/backend/app/gateway/routers/threads.py @@ -20,17 +20,11 @@ from typing import Any from fastapi import APIRouter, HTTPException, Request from pydantic import BaseModel, Field -from app.gateway.deps import get_checkpointer, get_store +from app.gateway.deps import get_checkpointer +from app.gateway.utils import sanitize_log_param from deerflow.config.paths import Paths, get_paths from deerflow.runtime import serialize_channel_values -# --------------------------------------------------------------------------- -# Store namespace -# --------------------------------------------------------------------------- - -THREADS_NS: tuple[str, ...] = ("threads",) -"""Namespace used by the Store for thread metadata records.""" - logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/threads", tags=["threads"]) @@ -63,6 +57,7 @@ class ThreadCreateRequest(BaseModel): """Request body for creating a thread.""" thread_id: str | None = Field(default=None, description="Optional thread ID (auto-generated if omitted)") + assistant_id: str | None = Field(default=None, description="Associate thread with an assistant") metadata: dict[str, Any] = Field(default_factory=dict, description="Initial metadata") @@ -135,61 +130,16 @@ def _delete_thread_data(thread_id: str, paths: Paths | None = None) -> ThreadDel raise HTTPException(status_code=422, detail=str(exc)) from exc except FileNotFoundError: # Not critical — thread data may not exist on disk - logger.debug("No local thread data to delete for %s", thread_id) + logger.debug("No local thread data to delete for %s", sanitize_log_param(thread_id)) return ThreadDeleteResponse(success=True, message=f"No local data for {thread_id}") except Exception as exc: - logger.exception("Failed to delete thread data for %s", thread_id) + logger.exception("Failed to delete thread data for %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to delete local thread data.") from exc - logger.info("Deleted local thread data for %s", thread_id) + logger.info("Deleted local thread data for %s", sanitize_log_param(thread_id)) return ThreadDeleteResponse(success=True, message=f"Deleted local thread data for {thread_id}") -async def _store_get(store, thread_id: str) -> dict | None: - """Fetch a thread record from the Store; returns ``None`` if absent.""" - item = await store.aget(THREADS_NS, thread_id) - return item.value if item is not None else None - - -async def _store_put(store, record: dict) -> None: - """Write a thread record to the Store.""" - await store.aput(THREADS_NS, record["thread_id"], record) - - -async def _store_upsert(store, thread_id: str, *, metadata: dict | None = None, values: dict | None = None) -> None: - """Create or refresh a thread record in the Store. - - On creation the record is written with ``status="idle"``. On update only - ``updated_at`` (and optionally ``metadata`` / ``values``) are changed so - that existing fields are preserved. - - ``values`` carries the agent-state snapshot exposed to the frontend - (currently just ``{"title": "..."}``). - """ - now = time.time() - existing = await _store_get(store, thread_id) - if existing is None: - await _store_put( - store, - { - "thread_id": thread_id, - "status": "idle", - "created_at": now, - "updated_at": now, - "metadata": metadata or {}, - "values": values or {}, - }, - ) - else: - val = dict(existing) - val["updated_at"] = now - if metadata: - val.setdefault("metadata", {}).update(metadata) - if values: - val.setdefault("values", {}).update(values) - await _store_put(store, val) - - def _derive_thread_status(checkpoint_tuple) -> str: """Derive thread status from checkpoint metadata.""" if checkpoint_tuple is None: @@ -219,19 +169,14 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe """Delete local persisted filesystem data for a thread. Cleans DeerFlow-managed thread directories, removes checkpoint data, - and removes the thread record from the Store. + and removes the thread_meta row from the configured ThreadMetaStore + (sqlite or memory). """ + from app.gateway.deps import get_thread_meta_repo + # Clean local filesystem response = _delete_thread_data(thread_id) - # Remove from Store (best-effort) - store = get_store(request) - if store is not None: - try: - await store.adelete(THREADS_NS, thread_id) - except Exception: - logger.debug("Could not delete store record for thread %s (not critical)", thread_id) - # Remove checkpoints (best-effort) checkpointer = getattr(request.app.state, "checkpointer", None) if checkpointer is not None: @@ -239,7 +184,15 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe if hasattr(checkpointer, "adelete_thread"): await checkpointer.adelete_thread(thread_id) except Exception: - logger.debug("Could not delete checkpoints for thread %s (not critical)", thread_id) + logger.debug("Could not delete checkpoints for thread %s (not critical)", sanitize_log_param(thread_id)) + + # Remove thread_meta row (best-effort) — required for sqlite backend + # so the deleted thread no longer appears in /threads/search. + try: + thread_meta_repo = get_thread_meta_repo(request) + await thread_meta_repo.delete(thread_id) + except Exception: + logger.debug("Could not delete thread_meta for %s (not critical)", sanitize_log_param(thread_id)) return response @@ -248,43 +201,38 @@ async def delete_thread_data(thread_id: str, request: Request) -> ThreadDeleteRe async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadResponse: """Create a new thread. - The thread record is written to the Store (for fast listing) and an - empty checkpoint is written to the checkpointer (for state reads). + Writes a thread_meta record (so the thread appears in /threads/search) + and an empty checkpoint (so state endpoints work immediately). Idempotent: returns the existing record when ``thread_id`` already exists. """ - store = get_store(request) + from app.gateway.deps import get_thread_meta_repo + checkpointer = get_checkpointer(request) + thread_meta_repo = get_thread_meta_repo(request) thread_id = body.thread_id or str(uuid.uuid4()) now = time.time() - # Idempotency: return existing record from Store when already present - if store is not None: - existing_record = await _store_get(store, thread_id) - if existing_record is not None: - return ThreadResponse( - thread_id=thread_id, - status=existing_record.get("status", "idle"), - created_at=str(existing_record.get("created_at", "")), - updated_at=str(existing_record.get("updated_at", "")), - metadata=existing_record.get("metadata", {}), - ) + # Idempotency: return existing record when already present + existing_record = await thread_meta_repo.get(thread_id) + if existing_record is not None: + return ThreadResponse( + thread_id=thread_id, + status=existing_record.get("status", "idle"), + created_at=str(existing_record.get("created_at", "")), + updated_at=str(existing_record.get("updated_at", "")), + metadata=existing_record.get("metadata", {}), + ) - # Write thread record to Store - if store is not None: - try: - await _store_put( - store, - { - "thread_id": thread_id, - "status": "idle", - "created_at": now, - "updated_at": now, - "metadata": body.metadata, - }, - ) - except Exception: - logger.exception("Failed to write thread %s to store", thread_id) - raise HTTPException(status_code=500, detail="Failed to create thread") + # Write thread_meta so the thread appears in /threads/search immediately + try: + await thread_meta_repo.create( + thread_id, + assistant_id=getattr(body, "assistant_id", None), + metadata=body.metadata, + ) + except Exception: + logger.exception("Failed to write thread_meta for %s", sanitize_log_param(thread_id)) + raise HTTPException(status_code=500, detail="Failed to create thread") # Write an empty checkpoint so state endpoints work immediately config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}} @@ -301,10 +249,10 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe } await checkpointer.aput(config, empty_checkpoint(), ckpt_metadata, {}) except Exception: - logger.exception("Failed to create checkpoint for thread %s", thread_id) + logger.exception("Failed to create checkpoint for thread %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to create thread") - logger.info("Thread created: %s", thread_id) + logger.info("Thread created: %s", sanitize_log_param(thread_id)) return ThreadResponse( thread_id=thread_id, status="idle", @@ -318,135 +266,56 @@ async def create_thread(body: ThreadCreateRequest, request: Request) -> ThreadRe async def search_threads(body: ThreadSearchRequest, request: Request) -> list[ThreadResponse]: """Search and list threads. - Two-phase approach: - - **Phase 1 — Store (fast path, O(threads))**: returns threads that were - created or run through this Gateway. Store records are tiny metadata - dicts so fetching all of them at once is cheap. - - **Phase 2 — Checkpointer supplement (lazy migration)**: threads that - were created directly by LangGraph Server (and therefore absent from the - Store) are discovered here by iterating the shared checkpointer. Any - newly found thread is immediately written to the Store so that the next - search skips Phase 2 for that thread — the Store converges to a full - index over time without a one-shot migration job. + Delegates to the configured ThreadMetaStore implementation + (SQL-backed for sqlite/postgres, Store-backed for memory mode). """ - store = get_store(request) - checkpointer = get_checkpointer(request) + from app.gateway.deps import get_thread_meta_repo - # ----------------------------------------------------------------------- - # Phase 1: Store - # ----------------------------------------------------------------------- - merged: dict[str, ThreadResponse] = {} - - if store is not None: - try: - items = await store.asearch(THREADS_NS, limit=10_000) - except Exception: - logger.warning("Store search failed — falling back to checkpointer only", exc_info=True) - items = [] - - for item in items: - val = item.value - merged[val["thread_id"]] = ThreadResponse( - thread_id=val["thread_id"], - status=val.get("status", "idle"), - created_at=str(val.get("created_at", "")), - updated_at=str(val.get("updated_at", "")), - metadata=val.get("metadata", {}), - values=val.get("values", {}), - ) - - # ----------------------------------------------------------------------- - # Phase 2: Checkpointer supplement - # Discovers threads not yet in the Store (e.g. created by LangGraph - # Server) and lazily migrates them so future searches skip this phase. - # ----------------------------------------------------------------------- - try: - async for checkpoint_tuple in checkpointer.alist(None): - cfg = getattr(checkpoint_tuple, "config", {}) - thread_id = cfg.get("configurable", {}).get("thread_id") - if not thread_id or thread_id in merged: - continue - - # Skip sub-graph checkpoints (checkpoint_ns is non-empty for those) - if cfg.get("configurable", {}).get("checkpoint_ns", ""): - continue - - ckpt_meta = getattr(checkpoint_tuple, "metadata", {}) or {} - # Strip LangGraph internal keys from the user-visible metadata dict - user_meta = {k: v for k, v in ckpt_meta.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")} - - # Extract state values (title) from the checkpoint's channel_values - checkpoint_data = getattr(checkpoint_tuple, "checkpoint", {}) or {} - channel_values = checkpoint_data.get("channel_values", {}) - ckpt_values = {} - if title := channel_values.get("title"): - ckpt_values["title"] = title - - thread_resp = ThreadResponse( - thread_id=thread_id, - status=_derive_thread_status(checkpoint_tuple), - created_at=str(ckpt_meta.get("created_at", "")), - updated_at=str(ckpt_meta.get("updated_at", ckpt_meta.get("created_at", ""))), - metadata=user_meta, - values=ckpt_values, - ) - merged[thread_id] = thread_resp - - # Lazy migration — write to Store so the next search finds it there - if store is not None: - try: - await _store_upsert(store, thread_id, metadata=user_meta, values=ckpt_values or None) - except Exception: - logger.debug("Failed to migrate thread %s to store (non-fatal)", thread_id) - except Exception: - logger.exception("Checkpointer scan failed during thread search") - # Don't raise — return whatever was collected from Store + partial scan - - # ----------------------------------------------------------------------- - # Phase 3: Filter → sort → paginate - # ----------------------------------------------------------------------- - results = list(merged.values()) - - if body.metadata: - results = [r for r in results if all(r.metadata.get(k) == v for k, v in body.metadata.items())] - - if body.status: - results = [r for r in results if r.status == body.status] - - results.sort(key=lambda r: r.updated_at, reverse=True) - return results[body.offset : body.offset + body.limit] + repo = get_thread_meta_repo(request) + rows = await repo.search( + metadata=body.metadata or None, + status=body.status, + limit=body.limit, + offset=body.offset, + ) + return [ + ThreadResponse( + thread_id=r["thread_id"], + status=r.get("status", "idle"), + created_at=r.get("created_at", ""), + updated_at=r.get("updated_at", ""), + metadata=r.get("metadata", {}), + values={"title": r["display_name"]} if r.get("display_name") else {}, + interrupts={}, + ) + for r in rows + ] @router.patch("/{thread_id}", response_model=ThreadResponse) async def patch_thread(thread_id: str, body: ThreadPatchRequest, request: Request) -> ThreadResponse: """Merge metadata into a thread record.""" - store = get_store(request) - if store is None: - raise HTTPException(status_code=503, detail="Store not available") + from app.gateway.deps import get_thread_meta_repo - record = await _store_get(store, thread_id) + thread_meta_repo = get_thread_meta_repo(request) + record = await thread_meta_repo.get(thread_id) if record is None: raise HTTPException(status_code=404, detail=f"Thread {thread_id} not found") - now = time.time() - updated = dict(record) - updated.setdefault("metadata", {}).update(body.metadata) - updated["updated_at"] = now - try: - await _store_put(store, updated) + await thread_meta_repo.update_metadata(thread_id, body.metadata) except Exception: - logger.exception("Failed to patch thread %s", thread_id) + logger.exception("Failed to patch thread %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to update thread") + # Re-read to get the merged metadata + refreshed updated_at + record = await thread_meta_repo.get(thread_id) or record return ThreadResponse( thread_id=thread_id, - status=updated.get("status", "idle"), - created_at=str(updated.get("created_at", "")), - updated_at=str(now), - metadata=updated.get("metadata", {}), + status=record.get("status", "idle"), + created_at=str(record.get("created_at", "")), + updated_at=str(record.get("updated_at", "")), + metadata=record.get("metadata", {}), ) @@ -454,30 +323,31 @@ async def patch_thread(thread_id: str, body: ThreadPatchRequest, request: Reques async def get_thread(thread_id: str, request: Request) -> ThreadResponse: """Get thread info. - Reads metadata from the Store and derives the accurate execution - status from the checkpointer. Falls back to the checkpointer alone - for threads that pre-date Store adoption (backward compat). + Reads metadata from the ThreadMetaStore and derives the accurate + execution status from the checkpointer. Falls back to the checkpointer + alone for threads that pre-date ThreadMetaStore adoption (backward compat). """ - store = get_store(request) + from app.gateway.deps import get_thread_meta_repo + + thread_meta_repo = get_thread_meta_repo(request) checkpointer = get_checkpointer(request) - record: dict | None = None - if store is not None: - record = await _store_get(store, thread_id) + record: dict | None = await thread_meta_repo.get(thread_id) # Derive accurate status from the checkpointer config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}} try: checkpoint_tuple = await checkpointer.aget_tuple(config) except Exception: - logger.exception("Failed to get checkpoint for thread %s", thread_id) + logger.exception("Failed to get checkpoint for thread %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to get thread") if record is None and checkpoint_tuple is None: raise HTTPException(status_code=404, detail=f"Thread {thread_id} not found") - # If the thread exists in the checkpointer but not the store (e.g. legacy - # data), synthesize a minimal store record from the checkpoint metadata. + # If the thread exists in the checkpointer but not in thread_meta (e.g. + # legacy data created before thread_meta adoption), synthesize a minimal + # record from the checkpoint metadata. if record is None and checkpoint_tuple is not None: ckpt_meta = getattr(checkpoint_tuple, "metadata", {}) or {} record = { @@ -518,7 +388,7 @@ async def get_thread_state(thread_id: str, request: Request) -> ThreadStateRespo try: checkpoint_tuple = await checkpointer.aget_tuple(config) except Exception: - logger.exception("Failed to get state for thread %s", thread_id) + logger.exception("Failed to get state for thread %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to get thread state") if checkpoint_tuple is None: @@ -559,11 +429,14 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re """Update thread state (e.g. for human-in-the-loop resume or title rename). Writes a new checkpoint that merges *body.values* into the latest - channel values, then syncs any updated ``title`` field back to the Store - so that ``/threads/search`` reflects the change immediately. + channel values, then syncs any updated ``title`` field through the + ThreadMetaStore abstraction so that ``/threads/search`` reflects the + change immediately in both sqlite and memory backends. """ + from app.gateway.deps import get_thread_meta_repo + checkpointer = get_checkpointer(request) - store = get_store(request) + thread_meta_repo = get_thread_meta_repo(request) # checkpoint_ns must be present in the config for aput — default to "" # (the root graph namespace). checkpoint_id is optional; omitting it @@ -580,7 +453,7 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re try: checkpoint_tuple = await checkpointer.aget_tuple(read_config) except Exception: - logger.exception("Failed to get state for thread %s", thread_id) + logger.exception("Failed to get state for thread %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to get thread state") if checkpoint_tuple is None: @@ -614,19 +487,22 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re try: new_config = await checkpointer.aput(write_config, checkpoint, metadata, {}) except Exception: - logger.exception("Failed to update state for thread %s", thread_id) + logger.exception("Failed to update state for thread %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to update thread state") new_checkpoint_id: str | None = None if isinstance(new_config, dict): new_checkpoint_id = new_config.get("configurable", {}).get("checkpoint_id") - # Sync title changes to the Store so /threads/search reflects them immediately. - if store is not None and body.values and "title" in body.values: - try: - await _store_upsert(store, thread_id, values={"title": body.values["title"]}) - except Exception: - logger.debug("Failed to sync title to store for thread %s (non-fatal)", thread_id) + # Sync title changes through the ThreadMetaStore abstraction so /threads/search + # reflects them immediately in both sqlite and memory backends. + if body.values and "title" in body.values: + new_title = body.values["title"] + if new_title: # Skip empty strings and None + try: + await thread_meta_repo.update_display_name(thread_id, new_title) + except Exception: + logger.debug("Failed to sync title to thread_meta for %s (non-fatal)", sanitize_log_param(thread_id)) return ThreadStateResponse( values=serialize_channel_values(channel_values), @@ -639,7 +515,14 @@ async def update_thread_state(thread_id: str, body: ThreadStateUpdateRequest, re @router.post("/{thread_id}/history", response_model=list[HistoryEntry]) async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request: Request) -> list[HistoryEntry]: - """Get checkpoint history for a thread.""" + """Get checkpoint history for a thread. + + Messages are read from the checkpointer's channel values (the + authoritative source) and serialized via + :func:`~deerflow.runtime.serialization.serialize_channel_values`. + Only the latest (first) checkpoint carries the ``messages`` key to + avoid duplicating them across every entry. + """ checkpointer = get_checkpointer(request) config: dict[str, Any] = {"configurable": {"thread_id": thread_id}} @@ -647,6 +530,7 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request config["configurable"]["checkpoint_id"] = body.before entries: list[HistoryEntry] = [] + is_latest_checkpoint = True try: async for checkpoint_tuple in checkpointer.alist(config, limit=body.limit): ckpt_config = getattr(checkpoint_tuple, "config", {}) @@ -661,22 +545,42 @@ async def get_thread_history(thread_id: str, body: ThreadHistoryRequest, request channel_values = checkpoint.get("channel_values", {}) + # Build values from checkpoint channel_values + values: dict[str, Any] = {} + if title := channel_values.get("title"): + values["title"] = title + if thread_data := channel_values.get("thread_data"): + values["thread_data"] = thread_data + + # Attach messages from checkpointer only for the latest checkpoint + if is_latest_checkpoint: + messages = channel_values.get("messages") + if messages: + values["messages"] = serialize_channel_values({"messages": messages}).get("messages", []) + is_latest_checkpoint = False + # Derive next tasks tasks_raw = getattr(checkpoint_tuple, "tasks", []) or [] next_tasks = [t.name for t in tasks_raw if hasattr(t, "name")] + # Strip LangGraph internal keys from metadata + user_meta = {k: v for k, v in metadata.items() if k not in ("created_at", "updated_at", "step", "source", "writes", "parents")} + # Keep step for ordering context + if "step" in metadata: + user_meta["step"] = metadata["step"] + entries.append( HistoryEntry( checkpoint_id=checkpoint_id, parent_checkpoint_id=parent_id, - metadata=metadata, - values=serialize_channel_values(channel_values), + metadata=user_meta, + values=values, created_at=str(metadata.get("created_at", "")), next=next_tasks, ) ) except Exception: - logger.exception("Failed to get history for thread %s", thread_id) + logger.exception("Failed to get history for thread %s", sanitize_log_param(thread_id)) raise HTTPException(status_code=500, detail="Failed to get thread history") return entries diff --git a/backend/app/gateway/services.py b/backend/app/gateway/services.py index 172e27817..d6acffd48 100644 --- a/backend/app/gateway/services.py +++ b/backend/app/gateway/services.py @@ -8,16 +8,17 @@ frames, and consuming stream bridge events. Router modules from __future__ import annotations import asyncio +import dataclasses import json import logging import re -import time from typing import Any from fastapi import HTTPException, Request from langchain_core.messages import HumanMessage -from app.gateway.deps import get_checkpointer, get_run_manager, get_store, get_stream_bridge +from app.gateway.deps import get_run_context, get_run_manager, get_run_store, get_stream_bridge +from app.gateway.utils import sanitize_log_param from deerflow.runtime import ( END_SENTINEL, HEARTBEAT_SENTINEL, @@ -171,71 +172,6 @@ def build_run_config( # --------------------------------------------------------------------------- -async def _upsert_thread_in_store(store, thread_id: str, metadata: dict | None) -> None: - """Create or refresh the thread record in the Store. - - Called from :func:`start_run` so that threads created via the stateless - ``/runs/stream`` endpoint (which never calls ``POST /threads``) still - appear in ``/threads/search`` results. - """ - # Deferred import to avoid circular import with the threads router module. - from app.gateway.routers.threads import _store_upsert - - try: - await _store_upsert(store, thread_id, metadata=metadata) - except Exception: - logger.warning("Failed to upsert thread %s in store (non-fatal)", thread_id) - - -async def _sync_thread_title_after_run( - run_task: asyncio.Task, - thread_id: str, - checkpointer: Any, - store: Any, -) -> None: - """Wait for *run_task* to finish, then persist the generated title to the Store. - - TitleMiddleware writes the generated title to the LangGraph agent state - (checkpointer) but the Gateway's Store record is not updated automatically. - This coroutine closes that gap by reading the final checkpoint after the - run completes and syncing ``values.title`` into the Store record so that - subsequent ``/threads/search`` responses include the correct title. - - Runs as a fire-and-forget :func:`asyncio.create_task`; failures are - logged at DEBUG level and never propagate. - """ - # Wait for the background run task to complete (any outcome). - # asyncio.wait does not propagate task exceptions — it just returns - # when the task is done, cancelled, or failed. - await asyncio.wait({run_task}) - - # Deferred import to avoid circular import with the threads router module. - from app.gateway.routers.threads import _store_get, _store_put - - try: - ckpt_config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}} - ckpt_tuple = await checkpointer.aget_tuple(ckpt_config) - if ckpt_tuple is None: - return - - channel_values = ckpt_tuple.checkpoint.get("channel_values", {}) - title = channel_values.get("title") - if not title: - return - - existing = await _store_get(store, thread_id) - if existing is None: - return - - updated = dict(existing) - updated.setdefault("values", {})["title"] = title - updated["updated_at"] = time.time() - await _store_put(store, updated) - logger.debug("Synced title %r for thread %s", title, thread_id) - except Exception: - logger.debug("Failed to sync title for thread %s (non-fatal)", thread_id, exc_info=True) - - async def start_run( body: Any, thread_id: str, @@ -255,11 +191,25 @@ async def start_run( """ bridge = get_stream_bridge(request) run_mgr = get_run_manager(request) - checkpointer = get_checkpointer(request) - store = get_store(request) + run_ctx = get_run_context(request) disconnect = DisconnectMode.cancel if body.on_disconnect == "cancel" else DisconnectMode.continue_ + # Resolve follow_up_to_run_id: explicit from request, or auto-detect from latest successful run + follow_up_to_run_id = getattr(body, "follow_up_to_run_id", None) + if follow_up_to_run_id is None: + run_store = get_run_store(request) + try: + recent_runs = await run_store.list_by_thread(thread_id, limit=1) + if recent_runs and recent_runs[0].get("status") == "success": + follow_up_to_run_id = recent_runs[0]["run_id"] + except Exception: + pass # Don't block run creation + + # Enrich base context with per-run field + if follow_up_to_run_id: + run_ctx = dataclasses.replace(run_ctx, follow_up_to_run_id=follow_up_to_run_id) + try: record = await run_mgr.create_or_reject( thread_id, @@ -268,17 +218,28 @@ async def start_run( metadata=body.metadata or {}, kwargs={"input": body.input, "config": body.config}, multitask_strategy=body.multitask_strategy, + follow_up_to_run_id=follow_up_to_run_id, ) except ConflictError as exc: raise HTTPException(status_code=409, detail=str(exc)) from exc except UnsupportedStrategyError as exc: raise HTTPException(status_code=501, detail=str(exc)) from exc - # Ensure the thread is visible in /threads/search, even for threads that - # were never explicitly created via POST /threads (e.g. stateless runs). - store = get_store(request) - if store is not None: - await _upsert_thread_in_store(store, thread_id, body.metadata) + # Upsert thread metadata so the thread appears in /threads/search, + # even for threads that were never explicitly created via POST /threads + # (e.g. stateless runs). + try: + existing = await run_ctx.thread_meta_repo.get(thread_id) + if existing is None: + await run_ctx.thread_meta_repo.create( + thread_id, + assistant_id=body.assistant_id, + metadata=body.metadata, + ) + else: + await run_ctx.thread_meta_repo.update_status(thread_id, "running") + except Exception: + logger.warning("Failed to upsert thread_meta for %s (non-fatal)", sanitize_log_param(thread_id)) agent_factory = resolve_agent_factory(body.assistant_id) graph_input = normalize_input(body.input) @@ -311,8 +272,7 @@ async def start_run( bridge, run_mgr, record, - checkpointer=checkpointer, - store=store, + ctx=run_ctx, agent_factory=agent_factory, graph_input=graph_input, config=config, @@ -324,11 +284,9 @@ async def start_run( ) record.task = task - # After the run completes, sync the title generated by TitleMiddleware from - # the checkpointer into the Store record so that /threads/search returns the - # correct title instead of an empty values dict. - if store is not None: - asyncio.create_task(_sync_thread_title_after_run(task, thread_id, checkpointer, store)) + # Title sync is handled by worker.py's finally block which reads the + # title from the checkpoint and calls thread_meta_repo.update_display_name + # after the run completes. return record diff --git a/backend/app/gateway/utils.py b/backend/app/gateway/utils.py new file mode 100644 index 000000000..8368d84fc --- /dev/null +++ b/backend/app/gateway/utils.py @@ -0,0 +1,6 @@ +"""Shared utility helpers for the Gateway layer.""" + + +def sanitize_log_param(value: str) -> str: + """Strip control characters to prevent log injection.""" + return value.replace("\n", "").replace("\r", "").replace("\x00", "") diff --git a/backend/packages/harness/deerflow/agents/checkpointer/async_provider.py b/backend/packages/harness/deerflow/agents/checkpointer/async_provider.py index 1129fc6b0..623333705 100644 --- a/backend/packages/harness/deerflow/agents/checkpointer/async_provider.py +++ b/backend/packages/harness/deerflow/agents/checkpointer/async_provider.py @@ -84,23 +84,76 @@ async def _async_checkpointer(config) -> AsyncIterator[Checkpointer]: @contextlib.asynccontextmanager -async def make_checkpointer() -> AsyncIterator[Checkpointer]: - """Async context manager that yields a checkpointer for the caller's lifetime. - Resources are opened on enter and closed on exit — no global state:: - - async with make_checkpointer() as checkpointer: - app.state.checkpointer = checkpointer - - Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*. - """ - - config = get_app_config() - - if config.checkpointer is None: +async def _async_checkpointer_from_database(db_config) -> AsyncIterator[Checkpointer]: + """Async context manager that constructs a checkpointer from unified DatabaseConfig.""" + if db_config.backend == "memory": from langgraph.checkpoint.memory import InMemorySaver yield InMemorySaver() return - async with _async_checkpointer(config.checkpointer) as saver: - yield saver + if db_config.backend == "sqlite": + try: + from langgraph.checkpoint.sqlite.aio import AsyncSqliteSaver + except ImportError as exc: + raise ImportError(SQLITE_INSTALL) from exc + + conn_str = db_config.checkpointer_sqlite_path + ensure_sqlite_parent_dir(conn_str) + async with AsyncSqliteSaver.from_conn_string(conn_str) as saver: + await saver.setup() + yield saver + return + + if db_config.backend == "postgres": + try: + from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver + except ImportError as exc: + raise ImportError(POSTGRES_INSTALL) from exc + + if not db_config.postgres_url: + raise ValueError("database.postgres_url is required for the postgres backend") + + async with AsyncPostgresSaver.from_conn_string(db_config.postgres_url) as saver: + await saver.setup() + yield saver + return + + raise ValueError(f"Unknown database backend: {db_config.backend!r}") + + +@contextlib.asynccontextmanager +async def make_checkpointer() -> AsyncIterator[Checkpointer]: + """Async context manager that yields a checkpointer for the caller's lifetime. + Resources are opened on enter and closed on exit -- no global state:: + + async with make_checkpointer() as checkpointer: + app.state.checkpointer = checkpointer + + Yields an ``InMemorySaver`` when no checkpointer is configured in *config.yaml*. + + Priority: + 1. Legacy ``checkpointer:`` config section (backward compatible) + 2. Unified ``database:`` config section + 3. Default InMemorySaver + """ + + config = get_app_config() + + # Legacy: standalone checkpointer config takes precedence + if config.checkpointer is not None: + async with _async_checkpointer(config.checkpointer) as saver: + yield saver + return + + # Unified database config + db_config = getattr(config, "database", None) + if db_config is not None and db_config.backend != "memory": + async with _async_checkpointer_from_database(db_config) as saver: + yield saver + return + + # Default: in-memory + from langgraph.checkpoint.memory import InMemorySaver + + yield InMemorySaver() diff --git a/backend/packages/harness/deerflow/agents/lead_agent/agent.py b/backend/packages/harness/deerflow/agents/lead_agent/agent.py index df6a453d6..352ff021e 100644 --- a/backend/packages/harness/deerflow/agents/lead_agent/agent.py +++ b/backend/packages/harness/deerflow/agents/lead_agent/agent.py @@ -56,13 +56,15 @@ def _create_summarization_middleware() -> SummarizationMiddleware | None: # Prepare keep parameter keep = config.keep.to_tuple() - # Prepare model parameter + # Prepare model parameter. + # Bind "middleware:summarize" tag so RunJournal identifies these LLM calls + # as middleware rather than lead_agent (SummarizationMiddleware is a + # LangChain built-in, so we tag the model at creation time). if config.model_name: model = create_chat_model(name=config.model_name, thinking_enabled=False) else: - # Use a lightweight model for summarization to save costs - # Falls back to default model if not explicitly specified model = create_chat_model(thinking_enabled=False) + model = model.with_config(tags=["middleware:summarize"]) # Prepare kwargs kwargs = { diff --git a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py index 71af2e653..983ae873c 100644 --- a/backend/packages/harness/deerflow/agents/lead_agent/prompt.py +++ b/backend/packages/harness/deerflow/agents/lead_agent/prompt.py @@ -164,6 +164,30 @@ Skip simple one-off tasks. """ +def _skill_mutability_label(category: str) -> str: + return "[custom, editable]" if category == "custom" else "[built-in]" + + +def clear_skills_system_prompt_cache() -> None: + _get_cached_skills_prompt_section.cache_clear() + + +def _build_skill_evolution_section(skill_evolution_enabled: bool) -> str: + if not skill_evolution_enabled: + return "" + return """ +## Skill Self-Evolution +After completing a task, consider creating or updating a skill when: +- The task required 5+ tool calls to resolve +- You overcame non-obvious errors or pitfalls +- The user corrected your approach and the corrected version worked +- You discovered a non-trivial, recurring workflow +If you used a skill and encountered issues not covered by it, patch it immediately. +Prefer patch over edit. Before creating a new skill, confirm with the user first. +Skip simple one-off tasks. +""" + + def _build_subagent_section(max_concurrent: int) -> str: """Build the subagent system prompt section with dynamic concurrency limit. diff --git a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py index 42f465f01..5b6b2fcd2 100644 --- a/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py +++ b/backend/packages/harness/deerflow/agents/middlewares/title_middleware.py @@ -1,10 +1,11 @@ """Middleware for automatic thread title generation.""" import logging -from typing import NotRequired, override +from typing import Any, NotRequired, override from langchain.agents import AgentState from langchain.agents.middleware import AgentMiddleware +from langgraph.config import get_config from langgraph.runtime import Runtime from deerflow.config.title_config import get_title_config @@ -100,6 +101,20 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]): return user_msg[:fallback_chars].rstrip() + "..." return user_msg if user_msg else "New Conversation" + def _get_runnable_config(self) -> dict[str, Any]: + """Inherit the parent RunnableConfig and add middleware tag. + + This ensures RunJournal identifies LLM calls from this middleware + as ``middleware:title`` instead of ``lead_agent``. + """ + try: + parent = get_config() + except Exception: + parent = {} + config = {**parent} + config["tags"] = [*(config.get("tags") or []), "middleware:title"] + return config + def _generate_title_result(self, state: TitleMiddlewareState) -> dict | None: """Generate a local fallback title without blocking on an LLM call.""" if not self._should_generate_title(state): @@ -121,7 +136,7 @@ class TitleMiddleware(AgentMiddleware[TitleMiddlewareState]): model = create_chat_model(name=config.model_name, thinking_enabled=False) else: model = create_chat_model(thinking_enabled=False) - response = await model.ainvoke(prompt) + response = await model.ainvoke(prompt, config=self._get_runnable_config()) title = self._parse_title(response.content) if title: return {"title": title} diff --git a/backend/packages/harness/deerflow/config/app_config.py b/backend/packages/harness/deerflow/config/app_config.py index e1ffbf847..bec3d0b8d 100644 --- a/backend/packages/harness/deerflow/config/app_config.py +++ b/backend/packages/harness/deerflow/config/app_config.py @@ -10,10 +10,12 @@ from pydantic import BaseModel, ConfigDict, Field from deerflow.config.acp_config import load_acp_config_from_dict from deerflow.config.checkpointer_config import CheckpointerConfig, load_checkpointer_config_from_dict +from deerflow.config.database_config import DatabaseConfig from deerflow.config.extensions_config import ExtensionsConfig from deerflow.config.guardrails_config import GuardrailsConfig, load_guardrails_config_from_dict from deerflow.config.memory_config import MemoryConfig, load_memory_config_from_dict from deerflow.config.model_config import ModelConfig +from deerflow.config.run_events_config import RunEventsConfig from deerflow.config.sandbox_config import SandboxConfig from deerflow.config.skill_evolution_config import SkillEvolutionConfig from deerflow.config.skills_config import SkillsConfig @@ -56,6 +58,8 @@ class AppConfig(BaseModel): subagents: SubagentsAppConfig = Field(default_factory=SubagentsAppConfig, description="Subagent runtime configuration") guardrails: GuardrailsConfig = Field(default_factory=GuardrailsConfig, description="Guardrail middleware configuration") model_config = ConfigDict(extra="allow", frozen=False) + database: DatabaseConfig = Field(default_factory=DatabaseConfig, description="Unified database backend configuration") + run_events: RunEventsConfig = Field(default_factory=RunEventsConfig, description="Run event storage configuration") checkpointer: CheckpointerConfig | None = Field(default=None, description="Checkpointer configuration") stream_bridge: StreamBridgeConfig | None = Field(default=None, description="Stream bridge configuration") diff --git a/backend/packages/harness/deerflow/config/database_config.py b/backend/packages/harness/deerflow/config/database_config.py new file mode 100644 index 000000000..207f8ef77 --- /dev/null +++ b/backend/packages/harness/deerflow/config/database_config.py @@ -0,0 +1,92 @@ +"""Unified database backend configuration. + +Controls BOTH the LangGraph checkpointer and the DeerFlow application +persistence layer (runs, threads metadata, users, etc.). The user +configures one backend; the system handles physical separation details. + +SQLite mode: checkpointer and app use different .db files in the same +directory to avoid write-lock contention. This is automatic. + +Postgres mode: both use the same database URL but maintain independent +connection pools with different lifecycles. + +Memory mode: checkpointer uses MemorySaver, app uses in-memory stores. +No database is initialized. + +Sensitive values (postgres_url) should use $VAR syntax in config.yaml +to reference environment variables from .env: + + database: + backend: postgres + postgres_url: $DATABASE_URL + +The $VAR resolution is handled by AppConfig.resolve_env_variables() +before this config is instantiated -- DatabaseConfig itself does not +need to do any environment variable processing. +""" + +from __future__ import annotations + +import os +from typing import Literal + +from pydantic import BaseModel, Field + + +class DatabaseConfig(BaseModel): + backend: Literal["memory", "sqlite", "postgres"] = Field( + default="memory", + description=("Storage backend for both checkpointer and application data. 'memory' for development (no persistence across restarts), 'sqlite' for single-node deployment, 'postgres' for production multi-node deployment."), + ) + sqlite_dir: str = Field( + default=".deer-flow/data", + description=("Directory for SQLite database files. Checkpointer uses {sqlite_dir}/checkpoints.db, application data uses {sqlite_dir}/app.db."), + ) + postgres_url: str = Field( + default="", + description=( + "PostgreSQL connection URL, shared by checkpointer and app. " + "Use $DATABASE_URL in config.yaml to reference .env. " + "Example: postgresql://user:pass@host:5432/deerflow " + "(the +asyncpg driver suffix is added automatically where needed)." + ), + ) + echo_sql: bool = Field( + default=False, + description="Echo all SQL statements to log (debug only).", + ) + pool_size: int = Field( + default=5, + description="Connection pool size for the app ORM engine (postgres only).", + ) + + # -- Derived helpers (not user-configured) -- + + @property + def _resolved_sqlite_dir(self) -> str: + """Resolve sqlite_dir to an absolute path (relative to CWD).""" + from pathlib import Path + + return str(Path(self.sqlite_dir).resolve()) + + @property + def checkpointer_sqlite_path(self) -> str: + """SQLite file path for the LangGraph checkpointer.""" + return os.path.join(self._resolved_sqlite_dir, "checkpoints.db") + + @property + def app_sqlite_path(self) -> str: + """SQLite file path for application ORM data.""" + return os.path.join(self._resolved_sqlite_dir, "app.db") + + @property + def app_sqlalchemy_url(self) -> str: + """SQLAlchemy async URL for the application ORM engine.""" + if self.backend == "sqlite": + return f"sqlite+aiosqlite:///{self.app_sqlite_path}" + if self.backend == "postgres": + url = self.postgres_url + if url.startswith("postgresql://"): + url = url.replace("postgresql://", "postgresql+asyncpg://", 1) + return url + raise ValueError(f"No SQLAlchemy URL for backend={self.backend!r}") diff --git a/backend/packages/harness/deerflow/config/run_events_config.py b/backend/packages/harness/deerflow/config/run_events_config.py new file mode 100644 index 000000000..cddd9061f --- /dev/null +++ b/backend/packages/harness/deerflow/config/run_events_config.py @@ -0,0 +1,33 @@ +"""Run event storage configuration. + +Controls where run events (messages + execution traces) are persisted. + +Backends: +- memory: In-memory storage, data lost on restart. Suitable for + development and testing. +- db: SQL database via SQLAlchemy ORM. Provides full query capability. + Suitable for production deployments. +- jsonl: Append-only JSONL files. Lightweight alternative for + single-node deployments that need persistence without a database. +""" + +from __future__ import annotations + +from typing import Literal + +from pydantic import BaseModel, Field + + +class RunEventsConfig(BaseModel): + backend: Literal["memory", "db", "jsonl"] = Field( + default="memory", + description="Storage backend for run events. 'memory' for development (no persistence), 'db' for production (SQL queries), 'jsonl' for lightweight single-node persistence.", + ) + max_trace_content: int = Field( + default=10240, + description="Maximum trace content size in bytes before truncation (db backend only).", + ) + track_token_usage: bool = Field( + default=True, + description="Whether RunJournal should accumulate token counts to RunRow.", + ) diff --git a/backend/packages/harness/deerflow/models/factory.py b/backend/packages/harness/deerflow/models/factory.py index a47f46d73..14699cdd5 100644 --- a/backend/packages/harness/deerflow/models/factory.py +++ b/backend/packages/harness/deerflow/models/factory.py @@ -113,7 +113,16 @@ def create_chat_model(name: str | None = None, thinking_enabled: bool = False, * elif "reasoning_effort" not in model_settings_from_config: model_settings_from_config["reasoning_effort"] = "medium" - model_instance = model_class(**{**model_settings_from_config, **kwargs}) + # Ensure stream_usage is enabled so that token usage metadata is available + # in streaming responses. LangChain's BaseChatOpenAI only defaults + # stream_usage=True when no custom base_url/api_base is set, so models + # hitting third-party endpoints (e.g. doubao, deepseek) silently lose + # usage data. We default it to True unless explicitly configured. + if "stream_usage" not in model_settings_from_config and "stream_usage" not in kwargs: + if "stream_usage" in getattr(model_class, "model_fields", {}): + model_settings_from_config["stream_usage"] = True + + model_instance = model_class(**kwargs, **model_settings_from_config) callbacks = build_tracing_callbacks() if callbacks: diff --git a/backend/packages/harness/deerflow/persistence/__init__.py b/backend/packages/harness/deerflow/persistence/__init__.py new file mode 100644 index 000000000..dfd64be95 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/__init__.py @@ -0,0 +1,13 @@ +"""DeerFlow application persistence layer (SQLAlchemy 2.0 async ORM). + +This module manages DeerFlow's own application data -- runs metadata, +thread ownership, cron jobs, users. It is completely separate from +LangGraph's checkpointer, which manages graph execution state. + +Usage: + from deerflow.persistence import init_engine, close_engine, get_session_factory +""" + +from deerflow.persistence.engine import close_engine, get_engine, get_session_factory, init_engine + +__all__ = ["close_engine", "get_engine", "get_session_factory", "init_engine"] diff --git a/backend/packages/harness/deerflow/persistence/base.py b/backend/packages/harness/deerflow/persistence/base.py new file mode 100644 index 000000000..fd99d5f74 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/base.py @@ -0,0 +1,40 @@ +"""SQLAlchemy declarative base with automatic to_dict support. + +All DeerFlow ORM models inherit from this Base. It provides a generic +to_dict() method via SQLAlchemy's inspect() so individual models don't +need to write their own serialization logic. + +LangGraph's checkpointer tables are NOT managed by this Base. +""" + +from __future__ import annotations + +from sqlalchemy import inspect as sa_inspect +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + """Base class for all DeerFlow ORM models. + + Provides: + - Automatic to_dict() via SQLAlchemy column inspection. + - Standard __repr__() showing all column values. + """ + + def to_dict(self, *, exclude: set[str] | None = None) -> dict: + """Convert ORM instance to plain dict. + + Uses SQLAlchemy's inspect() to iterate mapped column attributes. + + Args: + exclude: Optional set of column keys to omit. + + Returns: + Dict of {column_key: value} for all mapped columns. + """ + exclude = exclude or set() + return {c.key: getattr(self, c.key) for c in sa_inspect(type(self)).mapper.column_attrs if c.key not in exclude} + + def __repr__(self) -> str: + cols = ", ".join(f"{c.key}={getattr(self, c.key)!r}" for c in sa_inspect(type(self)).mapper.column_attrs) + return f"{type(self).__name__}({cols})" diff --git a/backend/packages/harness/deerflow/persistence/engine.py b/backend/packages/harness/deerflow/persistence/engine.py new file mode 100644 index 000000000..ddd9e510b --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/engine.py @@ -0,0 +1,166 @@ +"""Async SQLAlchemy engine lifecycle management. + +Initializes at Gateway startup, provides session factory for +repositories, disposes at shutdown. + +When database.backend="memory", init_engine is a no-op and +get_session_factory() returns None. Repositories must check for +None and fall back to in-memory implementations. +""" + +from __future__ import annotations + +import json +import logging + +from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, async_sessionmaker, create_async_engine + + +def _json_serializer(obj: object) -> str: + """JSON serializer with ensure_ascii=False for Chinese character support.""" + return json.dumps(obj, ensure_ascii=False) + + +logger = logging.getLogger(__name__) + +_engine: AsyncEngine | None = None +_session_factory: async_sessionmaker[AsyncSession] | None = None + + +async def _auto_create_postgres_db(url: str) -> None: + """Connect to the ``postgres`` maintenance DB and CREATE DATABASE. + + The target database name is extracted from *url*. The connection is + made to the default ``postgres`` database on the same server using + ``AUTOCOMMIT`` isolation (CREATE DATABASE cannot run inside a + transaction). + """ + from sqlalchemy import text + from sqlalchemy.engine.url import make_url + + parsed = make_url(url) + db_name = parsed.database + if not db_name: + raise ValueError("Cannot auto-create database: no database name in URL") + + # Connect to the default 'postgres' database to issue CREATE DATABASE + maint_url = parsed.set(database="postgres") + maint_engine = create_async_engine(maint_url, isolation_level="AUTOCOMMIT") + try: + async with maint_engine.connect() as conn: + await conn.execute(text(f'CREATE DATABASE "{db_name}"')) + logger.info("Auto-created PostgreSQL database: %s", db_name) + finally: + await maint_engine.dispose() + + +async def init_engine( + backend: str, + *, + url: str = "", + echo: bool = False, + pool_size: int = 5, + sqlite_dir: str = "", +) -> None: + """Create the async engine and session factory, then auto-create tables. + + Args: + backend: "memory", "sqlite", or "postgres". + url: SQLAlchemy async URL (for sqlite/postgres). + echo: Echo SQL to log. + pool_size: Postgres connection pool size. + sqlite_dir: Directory to create for SQLite (ensured to exist). + """ + global _engine, _session_factory + + if backend == "memory": + logger.info("Persistence backend=memory -- ORM engine not initialized") + return + + if backend == "postgres": + try: + import asyncpg # noqa: F401 + except ImportError: + raise ImportError("database.backend is set to 'postgres' but asyncpg is not installed.\nInstall it with:\n uv sync --extra postgres\nOr switch to backend: sqlite in config.yaml for single-node deployment.") from None + + if backend == "sqlite": + import os + + os.makedirs(sqlite_dir or ".", exist_ok=True) + _engine = create_async_engine(url, echo=echo, json_serializer=_json_serializer) + elif backend == "postgres": + _engine = create_async_engine( + url, + echo=echo, + pool_size=pool_size, + pool_pre_ping=True, + json_serializer=_json_serializer, + ) + else: + raise ValueError(f"Unknown persistence backend: {backend!r}") + + _session_factory = async_sessionmaker(_engine, expire_on_commit=False) + + # Auto-create tables (dev convenience). Production should use Alembic. + from deerflow.persistence.base import Base + + # Import all models so Base.metadata discovers them. + # When no models exist yet (scaffolding phase), this is a no-op. + try: + import deerflow.persistence.models # noqa: F401 + except ImportError: + # Models package not yet available — tables won't be auto-created. + # This is expected during initial scaffolding or minimal installs. + logger.debug("deerflow.persistence.models not found; skipping auto-create tables") + + try: + async with _engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + except Exception as exc: + if backend == "postgres" and "does not exist" in str(exc): + # Database not yet created — attempt to auto-create it, then retry. + await _auto_create_postgres_db(url) + # Rebuild engine against the now-existing database + await _engine.dispose() + _engine = create_async_engine(url, echo=echo, pool_size=pool_size, pool_pre_ping=True, json_serializer=_json_serializer) + _session_factory = async_sessionmaker(_engine, expire_on_commit=False) + async with _engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + else: + raise + + logger.info("Persistence engine initialized: backend=%s", backend) + + +async def init_engine_from_config(config) -> None: + """Convenience: init engine from a DatabaseConfig object.""" + if config.backend == "memory": + await init_engine("memory") + return + await init_engine( + backend=config.backend, + url=config.app_sqlalchemy_url, + echo=config.echo_sql, + pool_size=config.pool_size, + sqlite_dir=config.sqlite_dir if config.backend == "sqlite" else "", + ) + + +def get_session_factory() -> async_sessionmaker[AsyncSession] | None: + """Return the async session factory, or None if backend=memory.""" + return _session_factory + + +def get_engine() -> AsyncEngine | None: + """Return the async engine, or None if not initialized.""" + return _engine + + +async def close_engine() -> None: + """Dispose the engine, release all connections.""" + global _engine, _session_factory + if _engine is not None: + await _engine.dispose() + logger.info("Persistence engine closed") + _engine = None + _session_factory = None diff --git a/backend/packages/harness/deerflow/persistence/feedback/__init__.py b/backend/packages/harness/deerflow/persistence/feedback/__init__.py new file mode 100644 index 000000000..ee958b027 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/feedback/__init__.py @@ -0,0 +1,6 @@ +"""Feedback persistence — ORM and SQL repository.""" + +from deerflow.persistence.feedback.model import FeedbackRow +from deerflow.persistence.feedback.sql import FeedbackRepository + +__all__ = ["FeedbackRepository", "FeedbackRow"] diff --git a/backend/packages/harness/deerflow/persistence/feedback/model.py b/backend/packages/harness/deerflow/persistence/feedback/model.py new file mode 100644 index 000000000..221fb5fb1 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/feedback/model.py @@ -0,0 +1,30 @@ +"""ORM model for user feedback on runs.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from sqlalchemy import DateTime, String, Text +from sqlalchemy.orm import Mapped, mapped_column + +from deerflow.persistence.base import Base + + +class FeedbackRow(Base): + __tablename__ = "feedback" + + feedback_id: Mapped[str] = mapped_column(String(64), primary_key=True) + run_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True) + thread_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True) + owner_id: Mapped[str | None] = mapped_column(String(64), index=True) + message_id: Mapped[str | None] = mapped_column(String(64)) + # message_id is an optional RunEventStore event identifier — + # allows feedback to target a specific message or the entire run + + rating: Mapped[int] = mapped_column(nullable=False) + # +1 (thumbs-up) or -1 (thumbs-down) + + comment: Mapped[str | None] = mapped_column(Text) + # Optional text feedback from the user + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC)) diff --git a/backend/packages/harness/deerflow/persistence/feedback/sql.py b/backend/packages/harness/deerflow/persistence/feedback/sql.py new file mode 100644 index 000000000..eae2f9997 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/feedback/sql.py @@ -0,0 +1,98 @@ +"""SQLAlchemy-backed feedback storage. + +Each method acquires its own short-lived session. +""" + +from __future__ import annotations + +import uuid +from datetime import UTC, datetime + +from sqlalchemy import case, func, select +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from deerflow.persistence.feedback.model import FeedbackRow + + +class FeedbackRepository: + def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None: + self._sf = session_factory + + @staticmethod + def _row_to_dict(row: FeedbackRow) -> dict: + d = row.to_dict() + val = d.get("created_at") + if isinstance(val, datetime): + d["created_at"] = val.isoformat() + return d + + async def create( + self, + *, + run_id: str, + thread_id: str, + rating: int, + owner_id: str | None = None, + message_id: str | None = None, + comment: str | None = None, + ) -> dict: + """Create a feedback record. rating must be +1 or -1.""" + if rating not in (1, -1): + raise ValueError(f"rating must be +1 or -1, got {rating}") + row = FeedbackRow( + feedback_id=str(uuid.uuid4()), + run_id=run_id, + thread_id=thread_id, + owner_id=owner_id, + message_id=message_id, + rating=rating, + comment=comment, + created_at=datetime.now(UTC), + ) + async with self._sf() as session: + session.add(row) + await session.commit() + await session.refresh(row) + return self._row_to_dict(row) + + async def get(self, feedback_id: str) -> dict | None: + async with self._sf() as session: + row = await session.get(FeedbackRow, feedback_id) + return self._row_to_dict(row) if row else None + + async def list_by_run(self, thread_id: str, run_id: str, *, limit: int = 100) -> list[dict]: + stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id, FeedbackRow.run_id == run_id).order_by(FeedbackRow.created_at.asc()).limit(limit) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def list_by_thread(self, thread_id: str, *, limit: int = 100) -> list[dict]: + stmt = select(FeedbackRow).where(FeedbackRow.thread_id == thread_id).order_by(FeedbackRow.created_at.asc()).limit(limit) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def delete(self, feedback_id: str) -> bool: + async with self._sf() as session: + row = await session.get(FeedbackRow, feedback_id) + if row is None: + return False + await session.delete(row) + await session.commit() + return True + + async def aggregate_by_run(self, thread_id: str, run_id: str) -> dict: + """Aggregate feedback stats for a run using database-side counting.""" + stmt = select( + func.count().label("total"), + func.coalesce(func.sum(case((FeedbackRow.rating == 1, 1), else_=0)), 0).label("positive"), + func.coalesce(func.sum(case((FeedbackRow.rating == -1, 1), else_=0)), 0).label("negative"), + ).where(FeedbackRow.thread_id == thread_id, FeedbackRow.run_id == run_id) + async with self._sf() as session: + row = (await session.execute(stmt)).one() + return { + "run_id": run_id, + "total": row.total, + "positive": row.positive, + "negative": row.negative, + } diff --git a/backend/packages/harness/deerflow/persistence/migrations/alembic.ini b/backend/packages/harness/deerflow/persistence/migrations/alembic.ini new file mode 100644 index 000000000..adeccef32 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/migrations/alembic.ini @@ -0,0 +1,38 @@ +[alembic] +script_location = %(here)s +# Default URL for offline mode / autogenerate. +# Runtime uses engine from DeerFlow config. +sqlalchemy.url = sqlite+aiosqlite:///./data/app.db + +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/backend/packages/harness/deerflow/persistence/migrations/env.py b/backend/packages/harness/deerflow/persistence/migrations/env.py new file mode 100644 index 000000000..04c186fa0 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/migrations/env.py @@ -0,0 +1,65 @@ +"""Alembic environment for DeerFlow application tables. + +ONLY manages DeerFlow's tables (runs, threads_meta, cron_jobs, users). +LangGraph's checkpointer tables are managed by LangGraph itself -- they +have their own schema lifecycle and must not be touched by Alembic. +""" + +from __future__ import annotations + +import asyncio +import logging +from logging.config import fileConfig + +from alembic import context +from sqlalchemy.ext.asyncio import create_async_engine + +from deerflow.persistence.base import Base + +# Import all models so metadata is populated. +try: + import deerflow.persistence.models # noqa: F401 — register ORM models with Base.metadata +except ImportError: + # Models not available — migration will work with existing metadata only. + logging.getLogger(__name__).warning("Could not import deerflow.persistence.models; Alembic may not detect all tables") + +config = context.config +if config.config_file_name is not None: + fileConfig(config.config_file_name) + +target_metadata = Base.metadata + + +def run_migrations_offline() -> None: + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, + target_metadata=target_metadata, + literal_binds=True, + render_as_batch=True, + ) + with context.begin_transaction(): + context.run_migrations() + + +def do_run_migrations(connection): + context.configure( + connection=connection, + target_metadata=target_metadata, + render_as_batch=True, # Required for SQLite ALTER TABLE support + ) + with context.begin_transaction(): + context.run_migrations() + + +async def run_migrations_online() -> None: + connectable = create_async_engine(config.get_main_option("sqlalchemy.url")) + async with connectable.connect() as connection: + await connection.run_sync(do_run_migrations) + await connectable.dispose() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + asyncio.run(run_migrations_online()) diff --git a/backend/packages/harness/deerflow/persistence/migrations/versions/.gitkeep b/backend/packages/harness/deerflow/persistence/migrations/versions/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/backend/packages/harness/deerflow/persistence/models/__init__.py b/backend/packages/harness/deerflow/persistence/models/__init__.py new file mode 100644 index 000000000..659ac07f9 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/models/__init__.py @@ -0,0 +1,21 @@ +"""ORM model registration entry point. + +Importing this module ensures all ORM models are registered with +``Base.metadata`` so Alembic autogenerate detects every table. + +The actual ORM classes have moved to entity-specific subpackages: +- ``deerflow.persistence.thread_meta`` +- ``deerflow.persistence.run`` +- ``deerflow.persistence.feedback`` + +``RunEventRow`` remains in ``deerflow.persistence.models.run_event`` because +its storage implementation lives in ``deerflow.runtime.events.store.db`` and +there is no matching entity directory. +""" + +from deerflow.persistence.feedback.model import FeedbackRow +from deerflow.persistence.models.run_event import RunEventRow +from deerflow.persistence.run.model import RunRow +from deerflow.persistence.thread_meta.model import ThreadMetaRow + +__all__ = ["FeedbackRow", "RunEventRow", "RunRow", "ThreadMetaRow"] diff --git a/backend/packages/harness/deerflow/persistence/models/run_event.py b/backend/packages/harness/deerflow/persistence/models/run_event.py new file mode 100644 index 000000000..8db50aea7 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/models/run_event.py @@ -0,0 +1,31 @@ +"""ORM model for run events.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from sqlalchemy import JSON, DateTime, Index, String, Text, UniqueConstraint +from sqlalchemy.orm import Mapped, mapped_column + +from deerflow.persistence.base import Base + + +class RunEventRow(Base): + __tablename__ = "run_events" + + id: Mapped[int] = mapped_column(primary_key=True, autoincrement=True) + thread_id: Mapped[str] = mapped_column(String(64), nullable=False) + run_id: Mapped[str] = mapped_column(String(64), nullable=False) + event_type: Mapped[str] = mapped_column(String(32), nullable=False) + category: Mapped[str] = mapped_column(String(16), nullable=False) + # "message" | "trace" | "lifecycle" + content: Mapped[str] = mapped_column(Text, default="") + event_metadata: Mapped[dict] = mapped_column(JSON, default=dict) + seq: Mapped[int] = mapped_column(nullable=False) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC)) + + __table_args__ = ( + UniqueConstraint("thread_id", "seq", name="uq_events_thread_seq"), + Index("ix_events_thread_cat_seq", "thread_id", "category", "seq"), + Index("ix_events_run", "thread_id", "run_id", "seq"), + ) diff --git a/backend/packages/harness/deerflow/persistence/run/__init__.py b/backend/packages/harness/deerflow/persistence/run/__init__.py new file mode 100644 index 000000000..0aa01e7ea --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/run/__init__.py @@ -0,0 +1,6 @@ +"""Run metadata persistence — ORM and SQL repository.""" + +from deerflow.persistence.run.model import RunRow +from deerflow.persistence.run.sql import RunRepository + +__all__ = ["RunRepository", "RunRow"] diff --git a/backend/packages/harness/deerflow/persistence/run/model.py b/backend/packages/harness/deerflow/persistence/run/model.py new file mode 100644 index 000000000..67396bc25 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/run/model.py @@ -0,0 +1,49 @@ +"""ORM model for run metadata.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from sqlalchemy import JSON, DateTime, Index, String, Text +from sqlalchemy.orm import Mapped, mapped_column + +from deerflow.persistence.base import Base + + +class RunRow(Base): + __tablename__ = "runs" + + run_id: Mapped[str] = mapped_column(String(64), primary_key=True) + thread_id: Mapped[str] = mapped_column(String(64), nullable=False, index=True) + assistant_id: Mapped[str | None] = mapped_column(String(128)) + owner_id: Mapped[str | None] = mapped_column(String(64), index=True) + status: Mapped[str] = mapped_column(String(20), default="pending") + # "pending" | "running" | "success" | "error" | "timeout" | "interrupted" + + model_name: Mapped[str | None] = mapped_column(String(128)) + multitask_strategy: Mapped[str] = mapped_column(String(20), default="reject") + metadata_json: Mapped[dict] = mapped_column(JSON, default=dict) + kwargs_json: Mapped[dict] = mapped_column(JSON, default=dict) + error: Mapped[str | None] = mapped_column(Text) + + # Convenience fields (for listing pages without querying RunEventStore) + message_count: Mapped[int] = mapped_column(default=0) + first_human_message: Mapped[str | None] = mapped_column(Text) + last_ai_message: Mapped[str | None] = mapped_column(Text) + + # Token usage (accumulated in-memory by RunJournal, written on run completion) + total_input_tokens: Mapped[int] = mapped_column(default=0) + total_output_tokens: Mapped[int] = mapped_column(default=0) + total_tokens: Mapped[int] = mapped_column(default=0) + llm_call_count: Mapped[int] = mapped_column(default=0) + lead_agent_tokens: Mapped[int] = mapped_column(default=0) + subagent_tokens: Mapped[int] = mapped_column(default=0) + middleware_tokens: Mapped[int] = mapped_column(default=0) + + # Follow-up association + follow_up_to_run_id: Mapped[str | None] = mapped_column(String(64)) + + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC)) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) + + __table_args__ = (Index("ix_runs_thread_status", "thread_id", "status"),) diff --git a/backend/packages/harness/deerflow/persistence/run/sql.py b/backend/packages/harness/deerflow/persistence/run/sql.py new file mode 100644 index 000000000..fac88d968 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/run/sql.py @@ -0,0 +1,227 @@ +"""SQLAlchemy-backed RunStore implementation. + +Each method acquires and releases its own short-lived session. +Run status updates happen from background workers that may live +minutes -- we don't hold connections across long execution. +""" + +from __future__ import annotations + +import json +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy import func, select, update +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from deerflow.persistence.run.model import RunRow +from deerflow.runtime.runs.store.base import RunStore + + +class RunRepository(RunStore): + def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None: + self._sf = session_factory + + @staticmethod + def _safe_json(obj: Any) -> Any: + """Ensure obj is JSON-serializable. Falls back to model_dump() or str().""" + if obj is None: + return None + if isinstance(obj, (str, int, float, bool)): + return obj + if isinstance(obj, dict): + return {k: RunRepository._safe_json(v) for k, v in obj.items()} + if isinstance(obj, (list, tuple)): + return [RunRepository._safe_json(v) for v in obj] + if hasattr(obj, "model_dump"): + try: + return obj.model_dump() + except Exception: + pass + if hasattr(obj, "dict"): + try: + return obj.dict() + except Exception: + pass + try: + json.dumps(obj) + return obj + except (TypeError, ValueError): + return str(obj) + + @staticmethod + def _row_to_dict(row: RunRow) -> dict[str, Any]: + d = row.to_dict() + # Remap JSON columns to match RunStore interface + d["metadata"] = d.pop("metadata_json", {}) + d["kwargs"] = d.pop("kwargs_json", {}) + # Convert datetime to ISO string for consistency with MemoryRunStore + for key in ("created_at", "updated_at"): + val = d.get(key) + if isinstance(val, datetime): + d[key] = val.isoformat() + return d + + async def put( + self, + run_id, + *, + thread_id, + assistant_id=None, + owner_id=None, + status="pending", + multitask_strategy="reject", + metadata=None, + kwargs=None, + error=None, + created_at=None, + follow_up_to_run_id=None, + ): + now = datetime.now(UTC) + row = RunRow( + run_id=run_id, + thread_id=thread_id, + assistant_id=assistant_id, + owner_id=owner_id, + status=status, + multitask_strategy=multitask_strategy, + metadata_json=self._safe_json(metadata) or {}, + kwargs_json=self._safe_json(kwargs) or {}, + error=error, + follow_up_to_run_id=follow_up_to_run_id, + created_at=datetime.fromisoformat(created_at) if created_at else now, + updated_at=now, + ) + async with self._sf() as session: + session.add(row) + await session.commit() + + async def get(self, run_id): + async with self._sf() as session: + row = await session.get(RunRow, run_id) + return self._row_to_dict(row) if row else None + + async def list_by_thread(self, thread_id, *, owner_id=None, limit=100): + stmt = select(RunRow).where(RunRow.thread_id == thread_id) + if owner_id is not None: + stmt = stmt.where(RunRow.owner_id == owner_id) + stmt = stmt.order_by(RunRow.created_at.desc()).limit(limit) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def update_status(self, run_id, status, *, error=None): + values: dict[str, Any] = {"status": status, "updated_at": datetime.now(UTC)} + if error is not None: + values["error"] = error + async with self._sf() as session: + await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values)) + await session.commit() + + async def delete(self, run_id): + async with self._sf() as session: + row = await session.get(RunRow, run_id) + if row is not None: + await session.delete(row) + await session.commit() + + async def list_pending(self, *, before=None): + if before is None: + before_dt = datetime.now(UTC) + elif isinstance(before, datetime): + before_dt = before + else: + before_dt = datetime.fromisoformat(before) + stmt = select(RunRow).where(RunRow.status == "pending", RunRow.created_at <= before_dt).order_by(RunRow.created_at.asc()) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def update_run_completion( + self, + run_id: str, + *, + status: str, + total_input_tokens: int = 0, + total_output_tokens: int = 0, + total_tokens: int = 0, + llm_call_count: int = 0, + lead_agent_tokens: int = 0, + subagent_tokens: int = 0, + middleware_tokens: int = 0, + message_count: int = 0, + last_ai_message: str | None = None, + first_human_message: str | None = None, + error: str | None = None, + ) -> None: + """Update status + token usage + convenience fields on run completion.""" + values: dict[str, Any] = { + "status": status, + "total_input_tokens": total_input_tokens, + "total_output_tokens": total_output_tokens, + "total_tokens": total_tokens, + "llm_call_count": llm_call_count, + "lead_agent_tokens": lead_agent_tokens, + "subagent_tokens": subagent_tokens, + "middleware_tokens": middleware_tokens, + "message_count": message_count, + "updated_at": datetime.now(UTC), + } + if last_ai_message is not None: + values["last_ai_message"] = last_ai_message[:2000] + if first_human_message is not None: + values["first_human_message"] = first_human_message[:2000] + if error is not None: + values["error"] = error + async with self._sf() as session: + await session.execute(update(RunRow).where(RunRow.run_id == run_id).values(**values)) + await session.commit() + + async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]: + """Aggregate token usage via a single SQL GROUP BY query.""" + _completed = RunRow.status.in_(("success", "error")) + _thread = RunRow.thread_id == thread_id + + stmt = ( + select( + func.coalesce(RunRow.model_name, "unknown").label("model"), + func.count().label("runs"), + func.coalesce(func.sum(RunRow.total_tokens), 0).label("total_tokens"), + func.coalesce(func.sum(RunRow.total_input_tokens), 0).label("total_input_tokens"), + func.coalesce(func.sum(RunRow.total_output_tokens), 0).label("total_output_tokens"), + func.coalesce(func.sum(RunRow.lead_agent_tokens), 0).label("lead_agent"), + func.coalesce(func.sum(RunRow.subagent_tokens), 0).label("subagent"), + func.coalesce(func.sum(RunRow.middleware_tokens), 0).label("middleware"), + ) + .where(_thread, _completed) + .group_by(func.coalesce(RunRow.model_name, "unknown")) + ) + + async with self._sf() as session: + rows = (await session.execute(stmt)).all() + + total_tokens = total_input = total_output = total_runs = 0 + lead_agent = subagent = middleware = 0 + by_model: dict[str, dict] = {} + for r in rows: + by_model[r.model] = {"tokens": r.total_tokens, "runs": r.runs} + total_tokens += r.total_tokens + total_input += r.total_input_tokens + total_output += r.total_output_tokens + total_runs += r.runs + lead_agent += r.lead_agent + subagent += r.subagent + middleware += r.middleware + + return { + "total_tokens": total_tokens, + "total_input_tokens": total_input, + "total_output_tokens": total_output, + "total_runs": total_runs, + "by_model": by_model, + "by_caller": { + "lead_agent": lead_agent, + "subagent": subagent, + "middleware": middleware, + }, + } diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/__init__.py b/backend/packages/harness/deerflow/persistence/thread_meta/__init__.py new file mode 100644 index 000000000..8e497bb7e --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/thread_meta/__init__.py @@ -0,0 +1,13 @@ +"""Thread metadata persistence — ORM, abstract store, and concrete implementations.""" + +from deerflow.persistence.thread_meta.base import ThreadMetaStore +from deerflow.persistence.thread_meta.memory import MemoryThreadMetaStore +from deerflow.persistence.thread_meta.model import ThreadMetaRow +from deerflow.persistence.thread_meta.sql import ThreadMetaRepository + +__all__ = [ + "MemoryThreadMetaStore", + "ThreadMetaRepository", + "ThreadMetaRow", + "ThreadMetaStore", +] diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/base.py b/backend/packages/harness/deerflow/persistence/thread_meta/base.py new file mode 100644 index 000000000..466a82a21 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/thread_meta/base.py @@ -0,0 +1,60 @@ +"""Abstract interface for thread metadata storage. + +Implementations: +- ThreadMetaRepository: SQL-backed (sqlite / postgres via SQLAlchemy) +- MemoryThreadMetaStore: wraps LangGraph BaseStore (memory mode) +""" + +from __future__ import annotations + +import abc + + +class ThreadMetaStore(abc.ABC): + @abc.abstractmethod + async def create( + self, + thread_id: str, + *, + assistant_id: str | None = None, + owner_id: str | None = None, + display_name: str | None = None, + metadata: dict | None = None, + ) -> dict: + pass + + @abc.abstractmethod + async def get(self, thread_id: str) -> dict | None: + pass + + @abc.abstractmethod + async def search( + self, + *, + metadata: dict | None = None, + status: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[dict]: + pass + + @abc.abstractmethod + async def update_display_name(self, thread_id: str, display_name: str) -> None: + pass + + @abc.abstractmethod + async def update_status(self, thread_id: str, status: str) -> None: + pass + + @abc.abstractmethod + async def update_metadata(self, thread_id: str, metadata: dict) -> None: + """Merge ``metadata`` into the thread's metadata field. + + Existing keys are overwritten by the new values; keys absent from + ``metadata`` are preserved. No-op if the thread does not exist. + """ + pass + + @abc.abstractmethod + async def delete(self, thread_id: str) -> None: + pass diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/memory.py b/backend/packages/harness/deerflow/persistence/thread_meta/memory.py new file mode 100644 index 000000000..ab921f229 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/thread_meta/memory.py @@ -0,0 +1,120 @@ +"""In-memory ThreadMetaStore backed by LangGraph BaseStore. + +Used when database.backend=memory. Delegates to the LangGraph Store's +``("threads",)`` namespace — the same namespace used by the Gateway +router for thread records. +""" + +from __future__ import annotations + +import time +from typing import Any + +from langgraph.store.base import BaseStore + +from deerflow.persistence.thread_meta.base import ThreadMetaStore + +THREADS_NS: tuple[str, ...] = ("threads",) + + +class MemoryThreadMetaStore(ThreadMetaStore): + def __init__(self, store: BaseStore) -> None: + self._store = store + + async def create( + self, + thread_id: str, + *, + assistant_id: str | None = None, + owner_id: str | None = None, + display_name: str | None = None, + metadata: dict | None = None, + ) -> dict: + now = time.time() + record: dict[str, Any] = { + "thread_id": thread_id, + "assistant_id": assistant_id, + "owner_id": owner_id, + "display_name": display_name, + "status": "idle", + "metadata": metadata or {}, + "values": {}, + "created_at": now, + "updated_at": now, + } + await self._store.aput(THREADS_NS, thread_id, record) + return record + + async def get(self, thread_id: str) -> dict | None: + item = await self._store.aget(THREADS_NS, thread_id) + return item.value if item is not None else None + + async def search( + self, + *, + metadata: dict | None = None, + status: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[dict]: + filter_dict: dict[str, Any] = {} + if metadata: + filter_dict.update(metadata) + if status: + filter_dict["status"] = status + + items = await self._store.asearch( + THREADS_NS, + filter=filter_dict or None, + limit=limit, + offset=offset, + ) + return [self._item_to_dict(item) for item in items] + + async def update_display_name(self, thread_id: str, display_name: str) -> None: + item = await self._store.aget(THREADS_NS, thread_id) + if item is None: + return + record = dict(item.value) + record["display_name"] = display_name + record["updated_at"] = time.time() + await self._store.aput(THREADS_NS, thread_id, record) + + async def update_status(self, thread_id: str, status: str) -> None: + item = await self._store.aget(THREADS_NS, thread_id) + if item is None: + return + record = dict(item.value) + record["status"] = status + record["updated_at"] = time.time() + await self._store.aput(THREADS_NS, thread_id, record) + + async def update_metadata(self, thread_id: str, metadata: dict) -> None: + """Merge ``metadata`` into the in-memory record. No-op if absent.""" + item = await self._store.aget(THREADS_NS, thread_id) + if item is None: + return + record = dict(item.value) + merged = dict(record.get("metadata") or {}) + merged.update(metadata) + record["metadata"] = merged + record["updated_at"] = time.time() + await self._store.aput(THREADS_NS, thread_id, record) + + async def delete(self, thread_id: str) -> None: + await self._store.adelete(THREADS_NS, thread_id) + + @staticmethod + def _item_to_dict(item) -> dict[str, Any]: + """Convert a Store SearchItem to the dict format expected by callers.""" + val = item.value + return { + "thread_id": item.key, + "assistant_id": val.get("assistant_id"), + "owner_id": val.get("owner_id"), + "display_name": val.get("display_name"), + "status": val.get("status", "idle"), + "metadata": val.get("metadata", {}), + "created_at": str(val.get("created_at", "")), + "updated_at": str(val.get("updated_at", "")), + } diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/model.py b/backend/packages/harness/deerflow/persistence/thread_meta/model.py new file mode 100644 index 000000000..34a209277 --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/thread_meta/model.py @@ -0,0 +1,23 @@ +"""ORM model for thread metadata.""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from sqlalchemy import JSON, DateTime, String +from sqlalchemy.orm import Mapped, mapped_column + +from deerflow.persistence.base import Base + + +class ThreadMetaRow(Base): + __tablename__ = "threads_meta" + + thread_id: Mapped[str] = mapped_column(String(64), primary_key=True) + assistant_id: Mapped[str | None] = mapped_column(String(128), index=True) + owner_id: Mapped[str | None] = mapped_column(String(64), index=True) + display_name: Mapped[str | None] = mapped_column(String(256)) + status: Mapped[str] = mapped_column(String(20), default="idle") + metadata_json: Mapped[dict] = mapped_column(JSON, default=dict) + created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC)) + updated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=lambda: datetime.now(UTC), onupdate=lambda: datetime.now(UTC)) diff --git a/backend/packages/harness/deerflow/persistence/thread_meta/sql.py b/backend/packages/harness/deerflow/persistence/thread_meta/sql.py new file mode 100644 index 000000000..86c73030e --- /dev/null +++ b/backend/packages/harness/deerflow/persistence/thread_meta/sql.py @@ -0,0 +1,140 @@ +"""SQLAlchemy-backed thread metadata repository.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +from sqlalchemy import select, update +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from deerflow.persistence.thread_meta.base import ThreadMetaStore +from deerflow.persistence.thread_meta.model import ThreadMetaRow + + +class ThreadMetaRepository(ThreadMetaStore): + def __init__(self, session_factory: async_sessionmaker[AsyncSession]) -> None: + self._sf = session_factory + + @staticmethod + def _row_to_dict(row: ThreadMetaRow) -> dict[str, Any]: + d = row.to_dict() + d["metadata"] = d.pop("metadata_json", {}) + for key in ("created_at", "updated_at"): + val = d.get(key) + if isinstance(val, datetime): + d[key] = val.isoformat() + return d + + async def create( + self, + thread_id: str, + *, + assistant_id: str | None = None, + owner_id: str | None = None, + display_name: str | None = None, + metadata: dict | None = None, + ) -> dict: + now = datetime.now(UTC) + row = ThreadMetaRow( + thread_id=thread_id, + assistant_id=assistant_id, + owner_id=owner_id, + display_name=display_name, + metadata_json=metadata or {}, + created_at=now, + updated_at=now, + ) + async with self._sf() as session: + session.add(row) + await session.commit() + await session.refresh(row) + return self._row_to_dict(row) + + async def get(self, thread_id: str) -> dict | None: + async with self._sf() as session: + row = await session.get(ThreadMetaRow, thread_id) + return self._row_to_dict(row) if row else None + + async def list_by_owner(self, owner_id: str, *, limit: int = 100, offset: int = 0) -> list[dict]: + stmt = select(ThreadMetaRow).where(ThreadMetaRow.owner_id == owner_id).order_by(ThreadMetaRow.updated_at.desc()).limit(limit).offset(offset) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def check_access(self, thread_id: str, owner_id: str) -> bool: + """Check if owner_id has access to thread_id. + + Returns True if: row doesn't exist (untracked thread), owner_id + is None on the row (shared thread), or owner_id matches. + """ + async with self._sf() as session: + row = await session.get(ThreadMetaRow, thread_id) + if row is None: + return True + if row.owner_id is None: + return True + return row.owner_id == owner_id + + async def search( + self, + *, + metadata: dict | None = None, + status: str | None = None, + limit: int = 100, + offset: int = 0, + ) -> list[dict]: + """Search threads with optional metadata and status filters.""" + stmt = select(ThreadMetaRow).order_by(ThreadMetaRow.updated_at.desc()) + if status: + stmt = stmt.where(ThreadMetaRow.status == status) + + if metadata: + # When metadata filter is active, fetch a larger window and filter + # in Python. TODO(Phase 2): use JSON DB operators (Postgres @>, + # SQLite json_extract) for server-side filtering. + stmt = stmt.limit(limit * 5 + offset) + async with self._sf() as session: + result = await session.execute(stmt) + rows = [self._row_to_dict(r) for r in result.scalars()] + rows = [r for r in rows if all(r.get("metadata", {}).get(k) == v for k, v in metadata.items())] + return rows[offset : offset + limit] + else: + stmt = stmt.limit(limit).offset(offset) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def update_display_name(self, thread_id: str, display_name: str) -> None: + """Update the display_name (title) for a thread.""" + async with self._sf() as session: + await session.execute(update(ThreadMetaRow).where(ThreadMetaRow.thread_id == thread_id).values(display_name=display_name, updated_at=datetime.now(UTC))) + await session.commit() + + async def update_status(self, thread_id: str, status: str) -> None: + async with self._sf() as session: + await session.execute(update(ThreadMetaRow).where(ThreadMetaRow.thread_id == thread_id).values(status=status, updated_at=datetime.now(UTC))) + await session.commit() + + async def update_metadata(self, thread_id: str, metadata: dict) -> None: + """Merge ``metadata`` into ``metadata_json``. + + Read-modify-write inside a single session/transaction so concurrent + callers see consistent state. No-op if the row does not exist. + """ + async with self._sf() as session: + row = await session.get(ThreadMetaRow, thread_id) + if row is None: + return + merged = dict(row.metadata_json or {}) + merged.update(metadata) + row.metadata_json = merged + row.updated_at = datetime.now(UTC) + await session.commit() + + async def delete(self, thread_id: str) -> None: + async with self._sf() as session: + row = await session.get(ThreadMetaRow, thread_id) + if row is not None: + await session.delete(row) + await session.commit() diff --git a/backend/packages/harness/deerflow/runtime/__init__.py b/backend/packages/harness/deerflow/runtime/__init__.py index d7eccf101..d5faa9018 100644 --- a/backend/packages/harness/deerflow/runtime/__init__.py +++ b/backend/packages/harness/deerflow/runtime/__init__.py @@ -5,7 +5,7 @@ Re-exports the public API of :mod:`~deerflow.runtime.runs` and directly from ``deerflow.runtime``. """ -from .runs import ConflictError, DisconnectMode, RunManager, RunRecord, RunStatus, UnsupportedStrategyError, run_agent +from .runs import ConflictError, DisconnectMode, RunContext, RunManager, RunRecord, RunStatus, UnsupportedStrategyError, run_agent from .serialization import serialize, serialize_channel_values, serialize_lc_object, serialize_messages_tuple from .store import get_store, make_store, reset_store, store_context from .stream_bridge import END_SENTINEL, HEARTBEAT_SENTINEL, MemoryStreamBridge, StreamBridge, StreamEvent, make_stream_bridge @@ -14,6 +14,7 @@ __all__ = [ # runs "ConflictError", "DisconnectMode", + "RunContext", "RunManager", "RunRecord", "RunStatus", diff --git a/backend/packages/harness/deerflow/runtime/converters.py b/backend/packages/harness/deerflow/runtime/converters.py new file mode 100644 index 000000000..811031160 --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/converters.py @@ -0,0 +1,134 @@ +"""Pure functions to convert LangChain message objects to OpenAI Chat Completions format. + +Used by RunJournal to build content dicts for event storage. +""" + +from __future__ import annotations + +import json +from typing import Any + +_ROLE_MAP = { + "human": "user", + "ai": "assistant", + "system": "system", + "tool": "tool", +} + + +def langchain_to_openai_message(message: Any) -> dict: + """Convert a single LangChain BaseMessage to an OpenAI message dict. + + Handles: + - HumanMessage → {"role": "user", "content": "..."} + - AIMessage (text only) → {"role": "assistant", "content": "..."} + - AIMessage (with tool_calls) → {"role": "assistant", "content": null, "tool_calls": [...]} + - AIMessage (text + tool_calls) → both content and tool_calls present + - AIMessage (list content / multimodal) → content preserved as list + - SystemMessage → {"role": "system", "content": "..."} + - ToolMessage → {"role": "tool", "tool_call_id": "...", "content": "..."} + """ + msg_type = getattr(message, "type", "") + role = _ROLE_MAP.get(msg_type, msg_type) + content = getattr(message, "content", "") + + if role == "tool": + return { + "role": "tool", + "tool_call_id": getattr(message, "tool_call_id", ""), + "content": content, + } + + if role == "assistant": + tool_calls = getattr(message, "tool_calls", None) or [] + result: dict = {"role": "assistant"} + + if tool_calls: + openai_tool_calls = [] + for tc in tool_calls: + args = tc.get("args", {}) + openai_tool_calls.append( + { + "id": tc.get("id", ""), + "type": "function", + "function": { + "name": tc.get("name", ""), + "arguments": json.dumps(args) if not isinstance(args, str) else args, + }, + } + ) + # If no text content, set content to null per OpenAI spec + result["content"] = content if (isinstance(content, list) and content) or (isinstance(content, str) and content) else None + result["tool_calls"] = openai_tool_calls + else: + result["content"] = content + + return result + + # user / system / unknown + return {"role": role, "content": content} + + +def _infer_finish_reason(message: Any) -> str: + """Infer OpenAI finish_reason from an AIMessage. + + Returns "tool_calls" if tool_calls present, else looks in + response_metadata.finish_reason, else returns "stop". + """ + tool_calls = getattr(message, "tool_calls", None) or [] + if tool_calls: + return "tool_calls" + resp_meta = getattr(message, "response_metadata", None) or {} + if isinstance(resp_meta, dict): + finish = resp_meta.get("finish_reason") + if finish: + return finish + return "stop" + + +def langchain_to_openai_completion(message: Any) -> dict: + """Convert an AIMessage and its metadata to an OpenAI completion response dict. + + Returns: + { + "id": message.id, + "model": message.response_metadata.get("model_name"), + "choices": [{"index": 0, "message": , "finish_reason": }], + "usage": {"prompt_tokens": ..., "completion_tokens": ..., "total_tokens": ...} or None, + } + """ + resp_meta = getattr(message, "response_metadata", None) or {} + model_name = resp_meta.get("model_name") if isinstance(resp_meta, dict) else None + + openai_msg = langchain_to_openai_message(message) + finish_reason = _infer_finish_reason(message) + + usage_metadata = getattr(message, "usage_metadata", None) + if usage_metadata is not None: + input_tokens = usage_metadata.get("input_tokens", 0) or 0 + output_tokens = usage_metadata.get("output_tokens", 0) or 0 + usage: dict | None = { + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + "total_tokens": input_tokens + output_tokens, + } + else: + usage = None + + return { + "id": getattr(message, "id", None), + "model": model_name, + "choices": [ + { + "index": 0, + "message": openai_msg, + "finish_reason": finish_reason, + } + ], + "usage": usage, + } + + +def langchain_messages_to_openai(messages: list) -> list[dict]: + """Convert a list of LangChain BaseMessages to OpenAI message dicts.""" + return [langchain_to_openai_message(m) for m in messages] diff --git a/backend/packages/harness/deerflow/runtime/events/__init__.py b/backend/packages/harness/deerflow/runtime/events/__init__.py new file mode 100644 index 000000000..0da8fabe5 --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/events/__init__.py @@ -0,0 +1,4 @@ +from deerflow.runtime.events.store.base import RunEventStore +from deerflow.runtime.events.store.memory import MemoryRunEventStore + +__all__ = ["MemoryRunEventStore", "RunEventStore"] diff --git a/backend/packages/harness/deerflow/runtime/events/store/__init__.py b/backend/packages/harness/deerflow/runtime/events/store/__init__.py new file mode 100644 index 000000000..55f0dd33f --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/events/store/__init__.py @@ -0,0 +1,26 @@ +from deerflow.runtime.events.store.base import RunEventStore +from deerflow.runtime.events.store.memory import MemoryRunEventStore + + +def make_run_event_store(config=None) -> RunEventStore: + """Create a RunEventStore based on run_events.backend configuration.""" + if config is None or config.backend == "memory": + return MemoryRunEventStore() + if config.backend == "db": + from deerflow.persistence.engine import get_session_factory + + sf = get_session_factory() + if sf is None: + # database.backend=memory but run_events.backend=db -> fallback + return MemoryRunEventStore() + from deerflow.runtime.events.store.db import DbRunEventStore + + return DbRunEventStore(sf, max_trace_content=config.max_trace_content) + if config.backend == "jsonl": + from deerflow.runtime.events.store.jsonl import JsonlRunEventStore + + return JsonlRunEventStore() + raise ValueError(f"Unknown run_events backend: {config.backend!r}") + + +__all__ = ["MemoryRunEventStore", "RunEventStore", "make_run_event_store"] diff --git a/backend/packages/harness/deerflow/runtime/events/store/base.py b/backend/packages/harness/deerflow/runtime/events/store/base.py new file mode 100644 index 000000000..e5da4ed82 --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/events/store/base.py @@ -0,0 +1,99 @@ +"""Abstract interface for run event storage. + +RunEventStore is the unified storage interface for run event streams. +Messages (frontend display) and execution traces (debugging/audit) go +through the same interface, distinguished by the ``category`` field. + +Implementations: +- MemoryRunEventStore: in-memory dict (development, tests) +- Future: DB-backed store (SQLAlchemy ORM), JSONL file store +""" + +from __future__ import annotations + +import abc + + +class RunEventStore(abc.ABC): + """Run event stream storage interface. + + All implementations must guarantee: + 1. put() events are retrievable in subsequent queries + 2. seq is strictly increasing within the same thread + 3. list_messages() only returns category="message" events + 4. list_events() returns all events for the specified run + 5. Returned dicts match the RunEvent field structure + """ + + @abc.abstractmethod + async def put( + self, + *, + thread_id: str, + run_id: str, + event_type: str, + category: str, + content: str | dict = "", + metadata: dict | None = None, + created_at: str | None = None, + ) -> dict: + """Write an event, auto-assign seq, return the complete record.""" + + @abc.abstractmethod + async def put_batch(self, events: list[dict]) -> list[dict]: + """Batch-write events. Used by RunJournal flush buffer. + + Each dict's keys match put()'s keyword arguments. + Returns complete records with seq assigned. + """ + + @abc.abstractmethod + async def list_messages( + self, + thread_id: str, + *, + limit: int = 50, + before_seq: int | None = None, + after_seq: int | None = None, + ) -> list[dict]: + """Return displayable messages (category=message) for a thread, ordered by seq ascending. + + Supports bidirectional cursor pagination: + - before_seq: return the last ``limit`` records with seq < before_seq (ascending) + - after_seq: return the first ``limit`` records with seq > after_seq (ascending) + - neither: return the latest ``limit`` records (ascending) + """ + + @abc.abstractmethod + async def list_events( + self, + thread_id: str, + run_id: str, + *, + event_types: list[str] | None = None, + limit: int = 500, + ) -> list[dict]: + """Return the full event stream for a run, ordered by seq ascending. + + Optionally filter by event_types. + """ + + @abc.abstractmethod + async def list_messages_by_run( + self, + thread_id: str, + run_id: str, + ) -> list[dict]: + """Return displayable messages (category=message) for a specific run, ordered by seq ascending.""" + + @abc.abstractmethod + async def count_messages(self, thread_id: str) -> int: + """Count displayable messages (category=message) in a thread.""" + + @abc.abstractmethod + async def delete_by_thread(self, thread_id: str) -> int: + """Delete all events for a thread. Return the number of deleted events.""" + + @abc.abstractmethod + async def delete_by_run(self, thread_id: str, run_id: str) -> int: + """Delete all events for a specific run. Return the number of deleted events.""" diff --git a/backend/packages/harness/deerflow/runtime/events/store/db.py b/backend/packages/harness/deerflow/runtime/events/store/db.py new file mode 100644 index 000000000..0502cd879 --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/events/store/db.py @@ -0,0 +1,185 @@ +"""SQLAlchemy-backed RunEventStore implementation. + +Persists events to the ``run_events`` table. Trace content is truncated +at ``max_trace_content`` bytes to avoid bloating the database. +""" + +from __future__ import annotations + +import json +import logging +from datetime import UTC, datetime + +from sqlalchemy import delete, func, select +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker + +from deerflow.persistence.models.run_event import RunEventRow +from deerflow.runtime.events.store.base import RunEventStore + +logger = logging.getLogger(__name__) + + +class DbRunEventStore(RunEventStore): + def __init__(self, session_factory: async_sessionmaker[AsyncSession], *, max_trace_content: int = 10240): + self._sf = session_factory + self._max_trace_content = max_trace_content + + @staticmethod + def _row_to_dict(row: RunEventRow) -> dict: + d = row.to_dict() + d["metadata"] = d.pop("event_metadata", {}) + val = d.get("created_at") + if isinstance(val, datetime): + d["created_at"] = val.isoformat() + d.pop("id", None) + # Restore dict content that was JSON-serialized on write + raw = d.get("content", "") + if isinstance(raw, str) and d.get("metadata", {}).get("content_is_dict"): + try: + d["content"] = json.loads(raw) + except (json.JSONDecodeError, ValueError): + # Content looked like JSON (content_is_dict flag) but failed to parse; + # keep the raw string as-is. + logger.debug("Failed to deserialize content as JSON for event seq=%s", d.get("seq")) + return d + + def _truncate_trace(self, category: str, content: str | dict, metadata: dict | None) -> tuple[str | dict, dict]: + if category == "trace": + text = json.dumps(content, default=str, ensure_ascii=False) if isinstance(content, dict) else content + encoded = text.encode("utf-8") + if len(encoded) > self._max_trace_content: + # Truncate by bytes, then decode back (may cut a multi-byte char, so use errors="ignore") + content = encoded[: self._max_trace_content].decode("utf-8", errors="ignore") + metadata = {**(metadata or {}), "content_truncated": True, "original_byte_length": len(encoded)} + return content, metadata or {} + + async def put(self, *, thread_id, run_id, event_type, category, content="", metadata=None, created_at=None): # noqa: D401 + """Write a single event — low-frequency path only. + + This opens a dedicated transaction with a FOR UPDATE lock to + assign a monotonic *seq*. For high-throughput writes use + :meth:`put_batch`, which acquires the lock once for the whole + batch. Currently the only caller is ``worker.run_agent`` for + the initial ``human_message`` event (once per run). + """ + content, metadata = self._truncate_trace(category, content, metadata) + if isinstance(content, dict): + db_content = json.dumps(content, default=str, ensure_ascii=False) + metadata = {**(metadata or {}), "content_is_dict": True} + else: + db_content = content + async with self._sf() as session: + async with session.begin(): + # Use FOR UPDATE to serialize seq assignment within a thread. + # NOTE: with_for_update() on aggregates is a no-op on SQLite; + # the UNIQUE(thread_id, seq) constraint catches races there. + max_seq = await session.scalar(select(func.max(RunEventRow.seq)).where(RunEventRow.thread_id == thread_id).with_for_update()) + seq = (max_seq or 0) + 1 + row = RunEventRow( + thread_id=thread_id, + run_id=run_id, + event_type=event_type, + category=category, + content=db_content, + event_metadata=metadata, + seq=seq, + created_at=datetime.fromisoformat(created_at) if created_at else datetime.now(UTC), + ) + session.add(row) + return self._row_to_dict(row) + + async def put_batch(self, events): + if not events: + return [] + async with self._sf() as session: + async with session.begin(): + # Get max seq for the thread (assume all events in batch belong to same thread). + # NOTE: with_for_update() on aggregates is a no-op on SQLite; + # the UNIQUE(thread_id, seq) constraint catches races there. + thread_id = events[0]["thread_id"] + max_seq = await session.scalar(select(func.max(RunEventRow.seq)).where(RunEventRow.thread_id == thread_id).with_for_update()) + seq = max_seq or 0 + rows = [] + for e in events: + seq += 1 + content = e.get("content", "") + category = e.get("category", "trace") + metadata = e.get("metadata") + content, metadata = self._truncate_trace(category, content, metadata) + if isinstance(content, dict): + db_content = json.dumps(content, default=str, ensure_ascii=False) + metadata = {**(metadata or {}), "content_is_dict": True} + else: + db_content = content + row = RunEventRow( + thread_id=e["thread_id"], + run_id=e["run_id"], + event_type=e["event_type"], + category=category, + content=db_content, + event_metadata=metadata, + seq=seq, + created_at=datetime.fromisoformat(e["created_at"]) if e.get("created_at") else datetime.now(UTC), + ) + session.add(row) + rows.append(row) + return [self._row_to_dict(r) for r in rows] + + async def list_messages(self, thread_id, *, limit=50, before_seq=None, after_seq=None): + stmt = select(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.category == "message") + if before_seq is not None: + stmt = stmt.where(RunEventRow.seq < before_seq) + if after_seq is not None: + stmt = stmt.where(RunEventRow.seq > after_seq) + + if after_seq is not None: + # Forward pagination: first `limit` records after cursor + stmt = stmt.order_by(RunEventRow.seq.asc()).limit(limit) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + else: + # before_seq or default (latest): take last `limit` records, return ascending + stmt = stmt.order_by(RunEventRow.seq.desc()).limit(limit) + async with self._sf() as session: + result = await session.execute(stmt) + rows = list(result.scalars()) + return [self._row_to_dict(r) for r in reversed(rows)] + + async def list_events(self, thread_id, run_id, *, event_types=None, limit=500): + stmt = select(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.run_id == run_id) + if event_types: + stmt = stmt.where(RunEventRow.event_type.in_(event_types)) + stmt = stmt.order_by(RunEventRow.seq.asc()).limit(limit) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def list_messages_by_run(self, thread_id, run_id): + stmt = select(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.run_id == run_id, RunEventRow.category == "message").order_by(RunEventRow.seq.asc()) + async with self._sf() as session: + result = await session.execute(stmt) + return [self._row_to_dict(r) for r in result.scalars()] + + async def count_messages(self, thread_id): + stmt = select(func.count()).select_from(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.category == "message") + async with self._sf() as session: + return await session.scalar(stmt) or 0 + + async def delete_by_thread(self, thread_id): + async with self._sf() as session: + count_stmt = select(func.count()).select_from(RunEventRow).where(RunEventRow.thread_id == thread_id) + count = await session.scalar(count_stmt) or 0 + if count > 0: + await session.execute(delete(RunEventRow).where(RunEventRow.thread_id == thread_id)) + await session.commit() + return count + + async def delete_by_run(self, thread_id, run_id): + async with self._sf() as session: + count_stmt = select(func.count()).select_from(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.run_id == run_id) + count = await session.scalar(count_stmt) or 0 + if count > 0: + await session.execute(delete(RunEventRow).where(RunEventRow.thread_id == thread_id, RunEventRow.run_id == run_id)) + await session.commit() + return count diff --git a/backend/packages/harness/deerflow/runtime/events/store/jsonl.py b/backend/packages/harness/deerflow/runtime/events/store/jsonl.py new file mode 100644 index 000000000..1a4aac38c --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/events/store/jsonl.py @@ -0,0 +1,179 @@ +"""JSONL file-backed RunEventStore implementation. + +Each run's events are stored in a single file: +``.deer-flow/threads/{thread_id}/runs/{run_id}.jsonl`` + +All categories (message, trace, lifecycle) are in the same file. +This backend is suitable for lightweight single-node deployments. + +Known trade-off: ``list_messages()`` must scan all run files for a +thread since messages from multiple runs need unified seq ordering. +``list_events()`` reads only one file -- the fast path. +""" + +from __future__ import annotations + +import json +import logging +import re +from datetime import UTC, datetime +from pathlib import Path + +from deerflow.runtime.events.store.base import RunEventStore + +logger = logging.getLogger(__name__) + +_SAFE_ID_PATTERN = re.compile(r"^[A-Za-z0-9_\-]+$") + + +class JsonlRunEventStore(RunEventStore): + def __init__(self, base_dir: str | Path | None = None): + self._base_dir = Path(base_dir) if base_dir else Path(".deer-flow") + self._seq_counters: dict[str, int] = {} # thread_id -> current max seq + + @staticmethod + def _validate_id(value: str, label: str) -> str: + """Validate that an ID is safe for use in filesystem paths.""" + if not value or not _SAFE_ID_PATTERN.match(value): + raise ValueError(f"Invalid {label}: must be alphanumeric/dash/underscore, got {value!r}") + return value + + def _thread_dir(self, thread_id: str) -> Path: + self._validate_id(thread_id, "thread_id") + return self._base_dir / "threads" / thread_id / "runs" + + def _run_file(self, thread_id: str, run_id: str) -> Path: + self._validate_id(run_id, "run_id") + return self._thread_dir(thread_id) / f"{run_id}.jsonl" + + def _next_seq(self, thread_id: str) -> int: + self._seq_counters[thread_id] = self._seq_counters.get(thread_id, 0) + 1 + return self._seq_counters[thread_id] + + def _ensure_seq_loaded(self, thread_id: str) -> None: + """Load max seq from existing files if not yet cached.""" + if thread_id in self._seq_counters: + return + max_seq = 0 + thread_dir = self._thread_dir(thread_id) + if thread_dir.exists(): + for f in thread_dir.glob("*.jsonl"): + for line in f.read_text(encoding="utf-8").strip().splitlines(): + try: + record = json.loads(line) + max_seq = max(max_seq, record.get("seq", 0)) + except json.JSONDecodeError: + logger.debug("Skipping malformed JSONL line in %s", f) + continue + self._seq_counters[thread_id] = max_seq + + def _write_record(self, record: dict) -> None: + path = self._run_file(record["thread_id"], record["run_id"]) + path.parent.mkdir(parents=True, exist_ok=True) + with open(path, "a", encoding="utf-8") as f: + f.write(json.dumps(record, default=str, ensure_ascii=False) + "\n") + + def _read_thread_events(self, thread_id: str) -> list[dict]: + """Read all events for a thread, sorted by seq.""" + events = [] + thread_dir = self._thread_dir(thread_id) + if not thread_dir.exists(): + return events + for f in sorted(thread_dir.glob("*.jsonl")): + for line in f.read_text(encoding="utf-8").strip().splitlines(): + if not line: + continue + try: + events.append(json.loads(line)) + except json.JSONDecodeError: + logger.debug("Skipping malformed JSONL line in %s", f) + continue + events.sort(key=lambda e: e.get("seq", 0)) + return events + + def _read_run_events(self, thread_id: str, run_id: str) -> list[dict]: + """Read events for a specific run file.""" + path = self._run_file(thread_id, run_id) + if not path.exists(): + return [] + events = [] + for line in path.read_text(encoding="utf-8").strip().splitlines(): + if not line: + continue + try: + events.append(json.loads(line)) + except json.JSONDecodeError: + logger.debug("Skipping malformed JSONL line in %s", path) + continue + events.sort(key=lambda e: e.get("seq", 0)) + return events + + async def put(self, *, thread_id, run_id, event_type, category, content="", metadata=None, created_at=None): + self._ensure_seq_loaded(thread_id) + seq = self._next_seq(thread_id) + record = { + "thread_id": thread_id, + "run_id": run_id, + "event_type": event_type, + "category": category, + "content": content, + "metadata": metadata or {}, + "seq": seq, + "created_at": created_at or datetime.now(UTC).isoformat(), + } + self._write_record(record) + return record + + async def put_batch(self, events): + if not events: + return [] + results = [] + for ev in events: + record = await self.put(**ev) + results.append(record) + return results + + async def list_messages(self, thread_id, *, limit=50, before_seq=None, after_seq=None): + all_events = self._read_thread_events(thread_id) + messages = [e for e in all_events if e.get("category") == "message"] + + if before_seq is not None: + messages = [e for e in messages if e["seq"] < before_seq] + return messages[-limit:] + elif after_seq is not None: + messages = [e for e in messages if e["seq"] > after_seq] + return messages[:limit] + else: + return messages[-limit:] + + async def list_events(self, thread_id, run_id, *, event_types=None, limit=500): + events = self._read_run_events(thread_id, run_id) + if event_types is not None: + events = [e for e in events if e.get("event_type") in event_types] + return events[:limit] + + async def list_messages_by_run(self, thread_id, run_id): + events = self._read_run_events(thread_id, run_id) + return [e for e in events if e.get("category") == "message"] + + async def count_messages(self, thread_id): + all_events = self._read_thread_events(thread_id) + return sum(1 for e in all_events if e.get("category") == "message") + + async def delete_by_thread(self, thread_id): + all_events = self._read_thread_events(thread_id) + count = len(all_events) + thread_dir = self._thread_dir(thread_id) + if thread_dir.exists(): + for f in thread_dir.glob("*.jsonl"): + f.unlink() + self._seq_counters.pop(thread_id, None) + return count + + async def delete_by_run(self, thread_id, run_id): + events = self._read_run_events(thread_id, run_id) + count = len(events) + path = self._run_file(thread_id, run_id) + if path.exists(): + path.unlink() + return count diff --git a/backend/packages/harness/deerflow/runtime/events/store/memory.py b/backend/packages/harness/deerflow/runtime/events/store/memory.py new file mode 100644 index 000000000..889159086 --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/events/store/memory.py @@ -0,0 +1,120 @@ +"""In-memory RunEventStore. Used when run_events.backend=memory (default) and in tests. + +Thread-safe for single-process async usage (no threading locks needed +since all mutations happen within the same event loop). +""" + +from __future__ import annotations + +from datetime import UTC, datetime + +from deerflow.runtime.events.store.base import RunEventStore + + +class MemoryRunEventStore(RunEventStore): + def __init__(self) -> None: + self._events: dict[str, list[dict]] = {} # thread_id -> sorted event list + self._seq_counters: dict[str, int] = {} # thread_id -> last assigned seq + + def _next_seq(self, thread_id: str) -> int: + current = self._seq_counters.get(thread_id, 0) + next_val = current + 1 + self._seq_counters[thread_id] = next_val + return next_val + + def _put_one( + self, + *, + thread_id: str, + run_id: str, + event_type: str, + category: str, + content: str | dict = "", + metadata: dict | None = None, + created_at: str | None = None, + ) -> dict: + seq = self._next_seq(thread_id) + record = { + "thread_id": thread_id, + "run_id": run_id, + "event_type": event_type, + "category": category, + "content": content, + "metadata": metadata or {}, + "seq": seq, + "created_at": created_at or datetime.now(UTC).isoformat(), + } + self._events.setdefault(thread_id, []).append(record) + return record + + async def put( + self, + *, + thread_id, + run_id, + event_type, + category, + content="", + metadata=None, + created_at=None, + ): + return self._put_one( + thread_id=thread_id, + run_id=run_id, + event_type=event_type, + category=category, + content=content, + metadata=metadata, + created_at=created_at, + ) + + async def put_batch(self, events): + results = [] + for ev in events: + record = self._put_one(**ev) + results.append(record) + return results + + async def list_messages(self, thread_id, *, limit=50, before_seq=None, after_seq=None): + all_events = self._events.get(thread_id, []) + messages = [e for e in all_events if e["category"] == "message"] + + if before_seq is not None: + messages = [e for e in messages if e["seq"] < before_seq] + # Take the last `limit` records + return messages[-limit:] + elif after_seq is not None: + messages = [e for e in messages if e["seq"] > after_seq] + return messages[:limit] + else: + # Return the latest `limit` records, ascending + return messages[-limit:] + + async def list_events(self, thread_id, run_id, *, event_types=None, limit=500): + all_events = self._events.get(thread_id, []) + filtered = [e for e in all_events if e["run_id"] == run_id] + if event_types is not None: + filtered = [e for e in filtered if e["event_type"] in event_types] + return filtered[:limit] + + async def list_messages_by_run(self, thread_id, run_id): + all_events = self._events.get(thread_id, []) + return [e for e in all_events if e["run_id"] == run_id and e["category"] == "message"] + + async def count_messages(self, thread_id): + all_events = self._events.get(thread_id, []) + return sum(1 for e in all_events if e["category"] == "message") + + async def delete_by_thread(self, thread_id): + events = self._events.pop(thread_id, []) + self._seq_counters.pop(thread_id, None) + return len(events) + + async def delete_by_run(self, thread_id, run_id): + all_events = self._events.get(thread_id, []) + if not all_events: + return 0 + remaining = [e for e in all_events if e["run_id"] != run_id] + removed = len(all_events) - len(remaining) + self._events[thread_id] = remaining + return removed diff --git a/backend/packages/harness/deerflow/runtime/journal.py b/backend/packages/harness/deerflow/runtime/journal.py new file mode 100644 index 000000000..b9aa019ad --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/journal.py @@ -0,0 +1,471 @@ +"""Run event capture via LangChain callbacks. + +RunJournal sits between LangChain's callback mechanism and the pluggable +RunEventStore. It standardizes callback data into RunEvent records and +handles token usage accumulation. + +Key design decisions: +- on_llm_new_token is NOT implemented -- only complete messages via on_llm_end +- on_chat_model_start captures structured prompts as llm_request (OpenAI format) +- on_llm_end emits llm_response in OpenAI Chat Completions format +- Token usage accumulated in memory, written to RunRow on run completion +- Caller identification via tags injection (lead_agent / subagent:{name} / middleware:{name}) +""" + +from __future__ import annotations + +import asyncio +import logging +import time +from datetime import UTC, datetime +from typing import TYPE_CHECKING, Any +from uuid import UUID + +from langchain_core.callbacks import BaseCallbackHandler + +if TYPE_CHECKING: + from deerflow.runtime.events.store.base import RunEventStore + +logger = logging.getLogger(__name__) + + +class RunJournal(BaseCallbackHandler): + """LangChain callback handler that captures events to RunEventStore.""" + + def __init__( + self, + run_id: str, + thread_id: str, + event_store: RunEventStore, + *, + track_token_usage: bool = True, + flush_threshold: int = 20, + ): + super().__init__() + self.run_id = run_id + self.thread_id = thread_id + self._store = event_store + self._track_tokens = track_token_usage + self._flush_threshold = flush_threshold + + # Write buffer + self._buffer: list[dict] = [] + + # Token accumulators + self._total_input_tokens = 0 + self._total_output_tokens = 0 + self._total_tokens = 0 + self._llm_call_count = 0 + self._lead_agent_tokens = 0 + self._subagent_tokens = 0 + self._middleware_tokens = 0 + + # Convenience fields + self._last_ai_msg: str | None = None + self._first_human_msg: str | None = None + self._msg_count = 0 + + # Latency tracking + self._llm_start_times: dict[str, float] = {} # langchain run_id -> start time + + # LLM request/response tracking + self._llm_call_index = 0 + self._cached_prompts: dict[str, list[dict]] = {} # langchain run_id -> OpenAI messages + self._cached_models: dict[str, str] = {} # langchain run_id -> model name + + # Tool call ID cache + self._tool_call_ids: dict[str, str] = {} # langchain run_id -> tool_call_id + + # -- Lifecycle callbacks -- + + def on_chain_start(self, serialized: dict, inputs: Any, *, run_id: UUID, **kwargs: Any) -> None: + if kwargs.get("parent_run_id") is not None: + return + self._put( + event_type="run_start", + category="lifecycle", + metadata={"input_preview": str(inputs)[:500]}, + ) + + def on_chain_end(self, outputs: Any, *, run_id: UUID, **kwargs: Any) -> None: + if kwargs.get("parent_run_id") is not None: + return + self._put(event_type="run_end", category="lifecycle", metadata={"status": "success"}) + self._flush_sync() + + def on_chain_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None: + if kwargs.get("parent_run_id") is not None: + return + self._put( + event_type="run_error", + category="lifecycle", + content=str(error), + metadata={"error_type": type(error).__name__}, + ) + self._flush_sync() + + # -- LLM callbacks -- + + def on_chat_model_start(self, serialized: dict, messages: list[list], *, run_id: UUID, **kwargs: Any) -> None: + """Capture structured prompt messages for llm_request event.""" + from deerflow.runtime.converters import langchain_messages_to_openai + + rid = str(run_id) + self._llm_start_times[rid] = time.monotonic() + self._llm_call_index += 1 + + model_name = serialized.get("name", "") + self._cached_models[rid] = model_name + + # Convert the first message list (LangChain passes list-of-lists) + prompt_msgs = messages[0] if messages else [] + openai_msgs = langchain_messages_to_openai(prompt_msgs) + self._cached_prompts[rid] = openai_msgs + + caller = self._identify_caller(kwargs) + self._put( + event_type="llm_request", + category="trace", + content={"model": model_name, "messages": openai_msgs}, + metadata={"caller": caller, "llm_call_index": self._llm_call_index}, + ) + + def on_llm_start(self, serialized: dict, prompts: list[str], *, run_id: UUID, **kwargs: Any) -> None: + # Fallback: on_chat_model_start is preferred. This just tracks latency. + self._llm_start_times[str(run_id)] = time.monotonic() + + def on_llm_end(self, response: Any, *, run_id: UUID, **kwargs: Any) -> None: + from deerflow.runtime.converters import langchain_to_openai_completion + + try: + message = response.generations[0][0].message + except (IndexError, AttributeError): + logger.debug("on_llm_end: could not extract message from response") + return + + caller = self._identify_caller(kwargs) + + # Latency + rid = str(run_id) + start = self._llm_start_times.pop(rid, None) + latency_ms = int((time.monotonic() - start) * 1000) if start else None + + # Token usage from message + usage = getattr(message, "usage_metadata", None) + usage_dict = dict(usage) if usage else {} + + # Resolve call index + call_index = self._llm_call_index + if rid not in self._cached_prompts: + # Fallback: on_chat_model_start was not called + self._llm_call_index += 1 + call_index = self._llm_call_index + + # Clean up caches + self._cached_prompts.pop(rid, None) + self._cached_models.pop(rid, None) + + # Trace event: llm_response (OpenAI completion format) + content = getattr(message, "content", "") + self._put( + event_type="llm_response", + category="trace", + content=langchain_to_openai_completion(message), + metadata={ + "caller": caller, + "usage": usage_dict, + "latency_ms": latency_ms, + "llm_call_index": call_index, + }, + ) + + # Message events: only lead_agent gets message-category events. + # Content uses message.model_dump() to align with checkpoint format. + tool_calls = getattr(message, "tool_calls", None) or [] + if caller == "lead_agent": + resp_meta = getattr(message, "response_metadata", None) or {} + model_name = resp_meta.get("model_name") if isinstance(resp_meta, dict) else None + if tool_calls: + # ai_tool_call: agent decided to use tools + self._put( + event_type="ai_tool_call", + category="message", + content=message.model_dump(), + metadata={"model_name": model_name, "finish_reason": "tool_calls"}, + ) + elif isinstance(content, str) and content: + # ai_message: final text reply + self._put( + event_type="ai_message", + category="message", + content=message.model_dump(), + metadata={"model_name": model_name, "finish_reason": "stop"}, + ) + self._last_ai_msg = content + self._msg_count += 1 + + # Token accumulation + if self._track_tokens: + input_tk = usage_dict.get("input_tokens", 0) or 0 + output_tk = usage_dict.get("output_tokens", 0) or 0 + total_tk = usage_dict.get("total_tokens", 0) or 0 + if total_tk == 0: + total_tk = input_tk + output_tk + if total_tk > 0: + self._total_input_tokens += input_tk + self._total_output_tokens += output_tk + self._total_tokens += total_tk + self._llm_call_count += 1 + if caller.startswith("subagent:"): + self._subagent_tokens += total_tk + elif caller.startswith("middleware:"): + self._middleware_tokens += total_tk + else: + self._lead_agent_tokens += total_tk + + def on_llm_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None: + self._llm_start_times.pop(str(run_id), None) + self._put(event_type="llm_error", category="trace", content=str(error)) + + # -- Tool callbacks -- + + def on_tool_start(self, serialized: dict, input_str: str, *, run_id: UUID, **kwargs: Any) -> None: + tool_call_id = kwargs.get("tool_call_id") + if tool_call_id: + self._tool_call_ids[str(run_id)] = tool_call_id + self._put( + event_type="tool_start", + category="trace", + metadata={ + "tool_name": serialized.get("name", ""), + "tool_call_id": tool_call_id, + "args": str(input_str)[:2000], + }, + ) + + def on_tool_end(self, output: Any, *, run_id: UUID, **kwargs: Any) -> None: + from langchain_core.messages import ToolMessage + + # Extract fields from ToolMessage object when LangChain provides one. + # LangChain's _format_output wraps tool results into a ToolMessage + # with tool_call_id, name, status, and artifact — more complete than + # what kwargs alone provides. + if isinstance(output, ToolMessage): + tool_call_id = output.tool_call_id or kwargs.get("tool_call_id") or self._tool_call_ids.pop(str(run_id), None) + tool_name = output.name or kwargs.get("name", "") + status = getattr(output, "status", "success") or "success" + content_str = output.content if isinstance(output.content, str) else str(output.content) + # Use model_dump() for checkpoint-aligned message content. + # Override tool_call_id if it was resolved from cache. + msg_content = output.model_dump() + if msg_content.get("tool_call_id") != tool_call_id: + msg_content["tool_call_id"] = tool_call_id + else: + tool_call_id = kwargs.get("tool_call_id") or self._tool_call_ids.pop(str(run_id), None) + tool_name = kwargs.get("name", "") + status = "success" + content_str = str(output) + # Construct checkpoint-aligned dict when output is a plain string. + msg_content = ToolMessage( + content=content_str, + tool_call_id=tool_call_id or "", + name=tool_name, + status=status, + ).model_dump() + + # Trace event (always) + self._put( + event_type="tool_end", + category="trace", + content=content_str, + metadata={ + "tool_name": tool_name, + "tool_call_id": tool_call_id, + "status": status, + }, + ) + + # Message event: tool_result (checkpoint-aligned model_dump format) + self._put( + event_type="tool_result", + category="message", + content=msg_content, + metadata={"tool_name": tool_name, "status": status}, + ) + + def on_tool_error(self, error: BaseException, *, run_id: UUID, **kwargs: Any) -> None: + from langchain_core.messages import ToolMessage + + tool_call_id = kwargs.get("tool_call_id") or self._tool_call_ids.pop(str(run_id), None) + tool_name = kwargs.get("name", "") + + # Trace event + self._put( + event_type="tool_error", + category="trace", + content=str(error), + metadata={ + "tool_name": tool_name, + "tool_call_id": tool_call_id, + }, + ) + + # Message event: tool_result with error status (checkpoint-aligned) + msg_content = ToolMessage( + content=str(error), + tool_call_id=tool_call_id or "", + name=tool_name, + status="error", + ).model_dump() + self._put( + event_type="tool_result", + category="message", + content=msg_content, + metadata={"tool_name": tool_name, "status": "error"}, + ) + + # -- Custom event callback -- + + def on_custom_event(self, name: str, data: Any, *, run_id: UUID, **kwargs: Any) -> None: + from deerflow.runtime.serialization import serialize_lc_object + + if name == "summarization": + data_dict = data if isinstance(data, dict) else {} + self._put( + event_type="summarization", + category="trace", + content=data_dict.get("summary", ""), + metadata={ + "replaced_message_ids": data_dict.get("replaced_message_ids", []), + "replaced_count": data_dict.get("replaced_count", 0), + }, + ) + self._put( + event_type="middleware:summarize", + category="middleware", + content={"role": "system", "content": data_dict.get("summary", "")}, + metadata={"replaced_count": data_dict.get("replaced_count", 0)}, + ) + else: + event_data = serialize_lc_object(data) if not isinstance(data, dict) else data + self._put( + event_type=name, + category="trace", + metadata=event_data if isinstance(event_data, dict) else {"data": event_data}, + ) + + # -- Internal methods -- + + def _put(self, *, event_type: str, category: str, content: str | dict = "", metadata: dict | None = None) -> None: + self._buffer.append( + { + "thread_id": self.thread_id, + "run_id": self.run_id, + "event_type": event_type, + "category": category, + "content": content, + "metadata": metadata or {}, + "created_at": datetime.now(UTC).isoformat(), + } + ) + if len(self._buffer) >= self._flush_threshold: + self._flush_sync() + + def _flush_sync(self) -> None: + """Best-effort flush of buffer to RunEventStore. + + BaseCallbackHandler methods are synchronous. If an event loop is + running we schedule an async ``put_batch``; otherwise the events + stay in the buffer and are flushed later by the async ``flush()`` + call in the worker's ``finally`` block. + """ + if not self._buffer: + return + try: + loop = asyncio.get_running_loop() + except RuntimeError: + # No event loop — keep events in buffer for later async flush. + return + batch = self._buffer.copy() + self._buffer.clear() + task = loop.create_task(self._flush_async(batch)) + task.add_done_callback(self._on_flush_done) + + async def _flush_async(self, batch: list[dict]) -> None: + try: + await self._store.put_batch(batch) + except Exception: + logger.warning( + "Failed to flush %d events for run %s — returning to buffer", + len(batch), + self.run_id, + exc_info=True, + ) + # Return failed events to buffer for retry on next flush + self._buffer = batch + self._buffer + + @staticmethod + def _on_flush_done(task: asyncio.Task) -> None: + if task.cancelled(): + return + exc = task.exception() + if exc: + logger.warning("Journal flush task failed: %s", exc) + + def _identify_caller(self, kwargs: dict) -> str: + for tag in kwargs.get("tags") or []: + if isinstance(tag, str) and (tag.startswith("subagent:") or tag.startswith("middleware:") or tag == "lead_agent"): + return tag + # Default to lead_agent: the main agent graph does not inject + # callback tags, while subagents and middleware explicitly tag + # themselves. + return "lead_agent" + + # -- Public methods (called by worker) -- + + def set_first_human_message(self, content: str) -> None: + """Record the first human message for convenience fields.""" + self._first_human_msg = content[:2000] if content else None + + def record_middleware(self, tag: str, *, name: str, hook: str, action: str, changes: dict) -> None: + """Record a middleware state-change event. + + Called by middleware implementations when they perform a meaningful + state change (e.g., title generation, summarization, HITL approval). + Pure-observation middleware should not call this. + + Args: + tag: Short identifier for the middleware (e.g., "title", "summarize", + "guardrail"). Used to form event_type="middleware:{tag}". + name: Full middleware class name. + hook: Lifecycle hook that triggered the action (e.g., "after_model"). + action: Specific action performed (e.g., "generate_title"). + changes: Dict describing the state changes made. + """ + self._put( + event_type=f"middleware:{tag}", + category="middleware", + content={"name": name, "hook": hook, "action": action, "changes": changes}, + ) + + async def flush(self) -> None: + """Force flush remaining buffer. Called in worker's finally block.""" + if self._buffer: + batch = self._buffer.copy() + self._buffer.clear() + await self._store.put_batch(batch) + + def get_completion_data(self) -> dict: + """Return accumulated token and message data for run completion.""" + return { + "total_input_tokens": self._total_input_tokens, + "total_output_tokens": self._total_output_tokens, + "total_tokens": self._total_tokens, + "llm_call_count": self._llm_call_count, + "lead_agent_tokens": self._lead_agent_tokens, + "subagent_tokens": self._subagent_tokens, + "middleware_tokens": self._middleware_tokens, + "message_count": self._msg_count, + "last_ai_message": self._last_ai_msg, + "first_human_message": self._first_human_msg, + } diff --git a/backend/packages/harness/deerflow/runtime/runs/__init__.py b/backend/packages/harness/deerflow/runtime/runs/__init__.py index afed90f48..9faa30c17 100644 --- a/backend/packages/harness/deerflow/runtime/runs/__init__.py +++ b/backend/packages/harness/deerflow/runtime/runs/__init__.py @@ -2,11 +2,12 @@ from .manager import ConflictError, RunManager, RunRecord, UnsupportedStrategyError from .schemas import DisconnectMode, RunStatus -from .worker import run_agent +from .worker import RunContext, run_agent __all__ = [ "ConflictError", "DisconnectMode", + "RunContext", "RunManager", "RunRecord", "RunStatus", diff --git a/backend/packages/harness/deerflow/runtime/runs/manager.py b/backend/packages/harness/deerflow/runtime/runs/manager.py index e61a1707f..0a0794d87 100644 --- a/backend/packages/harness/deerflow/runtime/runs/manager.py +++ b/backend/packages/harness/deerflow/runtime/runs/manager.py @@ -1,4 +1,4 @@ -"""In-memory run registry.""" +"""In-memory run registry with optional persistent RunStore backing.""" from __future__ import annotations @@ -7,9 +7,13 @@ import logging import uuid from dataclasses import dataclass, field from datetime import UTC, datetime +from typing import TYPE_CHECKING from .schemas import DisconnectMode, RunStatus +if TYPE_CHECKING: + from deerflow.runtime.runs.store.base import RunStore + logger = logging.getLogger(__name__) @@ -38,11 +42,44 @@ class RunRecord: class RunManager: - """In-memory run registry. All mutations are protected by an asyncio lock.""" + """In-memory run registry with optional persistent RunStore backing. - def __init__(self) -> None: + All mutations are protected by an asyncio lock. When a ``store`` is + provided, serializable metadata is also persisted to the store so + that run history survives process restarts. + """ + + def __init__(self, store: RunStore | None = None) -> None: self._runs: dict[str, RunRecord] = {} self._lock = asyncio.Lock() + self._store = store + + async def _persist_to_store(self, record: RunRecord, *, follow_up_to_run_id: str | None = None) -> None: + """Best-effort persist run record to backing store.""" + if self._store is None: + return + try: + await self._store.put( + record.run_id, + thread_id=record.thread_id, + assistant_id=record.assistant_id, + status=record.status.value, + multitask_strategy=record.multitask_strategy, + metadata=record.metadata or {}, + kwargs=record.kwargs or {}, + created_at=record.created_at, + follow_up_to_run_id=follow_up_to_run_id, + ) + except Exception: + logger.warning("Failed to persist run %s to store", record.run_id, exc_info=True) + + async def update_run_completion(self, run_id: str, **kwargs) -> None: + """Persist token usage and completion data to the backing store.""" + if self._store is not None: + try: + await self._store.update_run_completion(run_id, **kwargs) + except Exception: + logger.warning("Failed to persist run completion for %s", run_id, exc_info=True) async def create( self, @@ -53,6 +90,7 @@ class RunManager: metadata: dict | None = None, kwargs: dict | None = None, multitask_strategy: str = "reject", + follow_up_to_run_id: str | None = None, ) -> RunRecord: """Create a new pending run and register it.""" run_id = str(uuid.uuid4()) @@ -71,6 +109,7 @@ class RunManager: ) async with self._lock: self._runs[run_id] = record + await self._persist_to_store(record, follow_up_to_run_id=follow_up_to_run_id) logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id) return record @@ -96,6 +135,11 @@ class RunManager: record.updated_at = _now_iso() if error is not None: record.error = error + if self._store is not None: + try: + await self._store.update_status(run_id, status.value, error=error) + except Exception: + logger.warning("Failed to persist status update for run %s", run_id, exc_info=True) logger.info("Run %s -> %s", run_id, status.value) async def cancel(self, run_id: str, *, action: str = "interrupt") -> bool: @@ -132,6 +176,7 @@ class RunManager: metadata: dict | None = None, kwargs: dict | None = None, multitask_strategy: str = "reject", + follow_up_to_run_id: str | None = None, ) -> RunRecord: """Atomically check for inflight runs and create a new one. @@ -185,6 +230,7 @@ class RunManager: ) self._runs[run_id] = record + await self._persist_to_store(record, follow_up_to_run_id=follow_up_to_run_id) logger.info("Run created: run_id=%s thread_id=%s", run_id, thread_id) return record diff --git a/backend/packages/harness/deerflow/runtime/runs/store/__init__.py b/backend/packages/harness/deerflow/runtime/runs/store/__init__.py new file mode 100644 index 000000000..265a6fffb --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/runs/store/__init__.py @@ -0,0 +1,4 @@ +from deerflow.runtime.runs.store.base import RunStore +from deerflow.runtime.runs.store.memory import MemoryRunStore + +__all__ = ["MemoryRunStore", "RunStore"] diff --git a/backend/packages/harness/deerflow/runtime/runs/store/base.py b/backend/packages/harness/deerflow/runtime/runs/store/base.py new file mode 100644 index 000000000..9ba1caca3 --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/runs/store/base.py @@ -0,0 +1,96 @@ +"""Abstract interface for run metadata storage. + +RunManager depends on this interface. Implementations: +- MemoryRunStore: in-memory dict (development, tests) +- Future: RunRepository backed by SQLAlchemy ORM + +All methods accept an optional owner_id for user isolation. +When owner_id is None, no user filtering is applied (single-user mode). +""" + +from __future__ import annotations + +import abc +from typing import Any + + +class RunStore(abc.ABC): + @abc.abstractmethod + async def put( + self, + run_id: str, + *, + thread_id: str, + assistant_id: str | None = None, + owner_id: str | None = None, + status: str = "pending", + multitask_strategy: str = "reject", + metadata: dict[str, Any] | None = None, + kwargs: dict[str, Any] | None = None, + error: str | None = None, + created_at: str | None = None, + follow_up_to_run_id: str | None = None, + ) -> None: + pass + + @abc.abstractmethod + async def get(self, run_id: str) -> dict[str, Any] | None: + pass + + @abc.abstractmethod + async def list_by_thread( + self, + thread_id: str, + *, + owner_id: str | None = None, + limit: int = 100, + ) -> list[dict[str, Any]]: + pass + + @abc.abstractmethod + async def update_status( + self, + run_id: str, + status: str, + *, + error: str | None = None, + ) -> None: + pass + + @abc.abstractmethod + async def delete(self, run_id: str) -> None: + pass + + @abc.abstractmethod + async def update_run_completion( + self, + run_id: str, + *, + status: str, + total_input_tokens: int = 0, + total_output_tokens: int = 0, + total_tokens: int = 0, + llm_call_count: int = 0, + lead_agent_tokens: int = 0, + subagent_tokens: int = 0, + middleware_tokens: int = 0, + message_count: int = 0, + last_ai_message: str | None = None, + first_human_message: str | None = None, + error: str | None = None, + ) -> None: + pass + + @abc.abstractmethod + async def list_pending(self, *, before: str | None = None) -> list[dict[str, Any]]: + pass + + @abc.abstractmethod + async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]: + """Aggregate token usage for completed runs in a thread. + + Returns a dict with keys: total_tokens, total_input_tokens, + total_output_tokens, total_runs, by_model (model_name → {tokens, runs}), + by_caller ({lead_agent, subagent, middleware}). + """ + pass diff --git a/backend/packages/harness/deerflow/runtime/runs/store/memory.py b/backend/packages/harness/deerflow/runtime/runs/store/memory.py new file mode 100644 index 000000000..6f9d1dfb4 --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/runs/store/memory.py @@ -0,0 +1,100 @@ +"""In-memory RunStore. Used when database.backend=memory (default) and in tests. + +Equivalent to the original RunManager._runs dict behavior. +""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +from deerflow.runtime.runs.store.base import RunStore + + +class MemoryRunStore(RunStore): + def __init__(self) -> None: + self._runs: dict[str, dict[str, Any]] = {} + + async def put( + self, + run_id, + *, + thread_id, + assistant_id=None, + owner_id=None, + status="pending", + multitask_strategy="reject", + metadata=None, + kwargs=None, + error=None, + created_at=None, + follow_up_to_run_id=None, + ): + now = datetime.now(UTC).isoformat() + self._runs[run_id] = { + "run_id": run_id, + "thread_id": thread_id, + "assistant_id": assistant_id, + "owner_id": owner_id, + "status": status, + "multitask_strategy": multitask_strategy, + "metadata": metadata or {}, + "kwargs": kwargs or {}, + "error": error, + "follow_up_to_run_id": follow_up_to_run_id, + "created_at": created_at or now, + "updated_at": now, + } + + async def get(self, run_id): + return self._runs.get(run_id) + + async def list_by_thread(self, thread_id, *, owner_id=None, limit=100): + results = [r for r in self._runs.values() if r["thread_id"] == thread_id and (owner_id is None or r.get("owner_id") == owner_id)] + results.sort(key=lambda r: r["created_at"], reverse=True) + return results[:limit] + + async def update_status(self, run_id, status, *, error=None): + if run_id in self._runs: + self._runs[run_id]["status"] = status + if error is not None: + self._runs[run_id]["error"] = error + self._runs[run_id]["updated_at"] = datetime.now(UTC).isoformat() + + async def delete(self, run_id): + self._runs.pop(run_id, None) + + async def update_run_completion(self, run_id, *, status, **kwargs): + if run_id in self._runs: + self._runs[run_id]["status"] = status + for key, value in kwargs.items(): + if value is not None: + self._runs[run_id][key] = value + self._runs[run_id]["updated_at"] = datetime.now(UTC).isoformat() + + async def list_pending(self, *, before=None): + now = before or datetime.now(UTC).isoformat() + results = [r for r in self._runs.values() if r["status"] == "pending" and r["created_at"] <= now] + results.sort(key=lambda r: r["created_at"]) + return results + + async def aggregate_tokens_by_thread(self, thread_id: str) -> dict[str, Any]: + completed = [r for r in self._runs.values() if r["thread_id"] == thread_id and r.get("status") in ("success", "error")] + by_model: dict[str, dict] = {} + for r in completed: + model = r.get("model_name") or "unknown" + entry = by_model.setdefault(model, {"tokens": 0, "runs": 0}) + entry["tokens"] += r.get("total_tokens", 0) + entry["runs"] += 1 + return { + "total_tokens": sum(r.get("total_tokens", 0) for r in completed), + "total_input_tokens": sum(r.get("total_input_tokens", 0) for r in completed), + "total_output_tokens": sum(r.get("total_output_tokens", 0) for r in completed), + "total_runs": len(completed), + "by_model": by_model, + "by_caller": { + "lead_agent": sum(r.get("lead_agent_tokens", 0) for r in completed), + "subagent": sum(r.get("subagent_tokens", 0) for r in completed), + "middleware": sum(r.get("middleware_tokens", 0) for r in completed), + }, + } diff --git a/backend/packages/harness/deerflow/runtime/runs/worker.py b/backend/packages/harness/deerflow/runtime/runs/worker.py index c8b074f7a..4e042f256 100644 --- a/backend/packages/harness/deerflow/runtime/runs/worker.py +++ b/backend/packages/harness/deerflow/runtime/runs/worker.py @@ -19,7 +19,11 @@ import asyncio import copy import inspect import logging -from typing import Any, Literal +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Literal + +if TYPE_CHECKING: + from langchain_core.messages import HumanMessage from deerflow.runtime.serialization import serialize from deerflow.runtime.stream_bridge import StreamBridge @@ -33,13 +37,29 @@ logger = logging.getLogger(__name__) _VALID_LG_MODES = {"values", "updates", "checkpoints", "tasks", "debug", "messages", "custom"} +@dataclass(frozen=True) +class RunContext: + """Infrastructure dependencies for a single agent run. + + Groups checkpointer, store, and persistence-related singletons so that + ``run_agent`` (and any future callers) receive one object instead of a + growing list of keyword arguments. + """ + + checkpointer: Any + store: Any | None = field(default=None) + event_store: Any | None = field(default=None) + run_events_config: Any | None = field(default=None) + thread_meta_repo: Any | None = field(default=None) + follow_up_to_run_id: str | None = field(default=None) + + async def run_agent( bridge: StreamBridge, run_manager: RunManager, record: RunRecord, *, - checkpointer: Any, - store: Any | None = None, + ctx: RunContext, agent_factory: Any, graph_input: dict, config: dict, @@ -50,6 +70,14 @@ async def run_agent( ) -> None: """Execute an agent in the background, publishing events to *bridge*.""" + # Unpack infrastructure dependencies from RunContext. + checkpointer = ctx.checkpointer + store = ctx.store + event_store = ctx.event_store + run_events_config = ctx.run_events_config + thread_meta_repo = ctx.thread_meta_repo + follow_up_to_run_id = ctx.follow_up_to_run_id + run_id = record.run_id thread_id = record.thread_id requested_modes: set[str] = set(stream_modes or ["values"]) @@ -57,6 +85,35 @@ async def run_agent( pre_run_snapshot: dict[str, Any] | None = None snapshot_capture_failed = False + # Initialize RunJournal for event capture + journal = None + if event_store is not None: + from deerflow.runtime.journal import RunJournal + + journal = RunJournal( + run_id=run_id, + thread_id=thread_id, + event_store=event_store, + track_token_usage=getattr(run_events_config, "track_token_usage", True), + ) + + # Write human_message event (model_dump format, aligned with checkpoint) + human_msg = _extract_human_message(graph_input) + if human_msg is not None: + msg_metadata = {} + if follow_up_to_run_id: + msg_metadata["follow_up_to_run_id"] = follow_up_to_run_id + await event_store.put( + thread_id=thread_id, + run_id=run_id, + event_type="human_message", + category="message", + content=human_msg.model_dump(), + metadata=msg_metadata or None, + ) + content = human_msg.content + journal.set_first_human_message(content if isinstance(content, str) else str(content)) + # Track whether "events" was requested but skipped if "events" in requested_modes: logger.info( @@ -110,6 +167,11 @@ async def run_agent( config["context"].setdefault("thread_id", thread_id) config.setdefault("configurable", {})["__pregel_runtime"] = runtime + # Inject RunJournal as a LangChain callback handler. + # on_llm_end captures token usage; on_chain_start/end captures lifecycle. + if journal is not None: + config.setdefault("callbacks", []).append(journal) + runnable_config = RunnableConfig(**config) agent = agent_factory(config=runnable_config) @@ -236,6 +298,37 @@ async def run_agent( ) finally: + # Flush any buffered journal events and persist completion data + if journal is not None: + try: + await journal.flush() + except Exception: + logger.warning("Failed to flush journal for run %s", run_id, exc_info=True) + + # Persist token usage + convenience fields to RunStore + completion = journal.get_completion_data() + await run_manager.update_run_completion(run_id, status=record.status.value, **completion) + + # Sync title from checkpoint to threads_meta.display_name + if checkpointer is not None: + try: + ckpt_config = {"configurable": {"thread_id": thread_id, "checkpoint_ns": ""}} + ckpt_tuple = await checkpointer.aget_tuple(ckpt_config) + if ckpt_tuple is not None: + ckpt = getattr(ckpt_tuple, "checkpoint", {}) or {} + title = ckpt.get("channel_values", {}).get("title") + if title: + await thread_meta_repo.update_display_name(thread_id, title) + except Exception: + logger.debug("Failed to sync title for thread %s (non-fatal)", thread_id) + + # Update threads_meta status based on run outcome + try: + final_status = "idle" if record.status == RunStatus.success else record.status.value + await thread_meta_repo.update_status(thread_id, final_status) + except Exception: + logger.debug("Failed to update thread_meta status for %s (non-fatal)", thread_id) + await bridge.publish_end(run_id) asyncio.create_task(bridge.cleanup(run_id, delay=60)) @@ -355,6 +448,31 @@ def _lg_mode_to_sse_event(mode: str) -> str: return mode +def _extract_human_message(graph_input: dict) -> HumanMessage | None: + """Extract or construct a HumanMessage from graph_input for event recording. + + Returns a LangChain HumanMessage so callers can use .model_dump() to get + the checkpoint-aligned serialization format. + """ + from langchain_core.messages import HumanMessage + + messages = graph_input.get("messages") + if not messages: + return None + last = messages[-1] if isinstance(messages, list) else messages + if isinstance(last, HumanMessage): + return last + if isinstance(last, str): + return HumanMessage(content=last) if last else None + if hasattr(last, "content"): + content = last.content + return HumanMessage(content=content) + if isinstance(last, dict): + content = last.get("content", "") + return HumanMessage(content=content) if content else None + return None + + def _unpack_stream_item( item: Any, lg_modes: list[str], diff --git a/backend/packages/harness/pyproject.toml b/backend/packages/harness/pyproject.toml index e7a81ff7b..87e571dd3 100644 --- a/backend/packages/harness/pyproject.toml +++ b/backend/packages/harness/pyproject.toml @@ -33,10 +33,19 @@ dependencies = [ "langchain-google-genai>=4.2.1", "langgraph-checkpoint-sqlite>=3.0.3", "langgraph-sdk>=0.1.51", + "sqlalchemy[asyncio]>=2.0,<3.0", + "aiosqlite>=0.19", + "alembic>=1.13", ] [project.optional-dependencies] ollama = ["langchain-ollama>=0.3.0"] +postgres = [ + "asyncpg>=0.29", + "langgraph-checkpoint-postgres>=3.0.5", + "psycopg[binary]>=3.3.3", + "psycopg-pool>=3.3.0", +] pymupdf = ["pymupdf4llm>=0.0.17"] [build-system] diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 11d1065b3..a13970ba0 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -19,6 +19,11 @@ dependencies = [ "wecom-aibot-python-sdk>=0.1.6", ] +[project.optional-dependencies] +postgres = [ + "deerflow-harness[postgres]", +] + [dependency-groups] dev = ["pytest>=8.0.0", "ruff>=0.14.11"] diff --git a/backend/tests/test_checkpointer_none_fix.py b/backend/tests/test_checkpointer_none_fix.py index 4e128adbc..1da435c85 100644 --- a/backend/tests/test_checkpointer_none_fix.py +++ b/backend/tests/test_checkpointer_none_fix.py @@ -14,9 +14,10 @@ class TestCheckpointerNoneFix: """make_checkpointer should return InMemorySaver when config.checkpointer is None.""" from deerflow.agents.checkpointer.async_provider import make_checkpointer - # Mock get_app_config to return a config with checkpointer=None + # Mock get_app_config to return a config with checkpointer=None and database=None mock_config = MagicMock() mock_config.checkpointer = None + mock_config.database = None with patch("deerflow.agents.checkpointer.async_provider.get_app_config", return_value=mock_config): async with make_checkpointer() as checkpointer: diff --git a/backend/tests/test_converters.py b/backend/tests/test_converters.py new file mode 100644 index 000000000..2c2167e01 --- /dev/null +++ b/backend/tests/test_converters.py @@ -0,0 +1,188 @@ +"""Tests for LangChain-to-OpenAI message format converters.""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock + +from deerflow.runtime.converters import ( + langchain_messages_to_openai, + langchain_to_openai_completion, + langchain_to_openai_message, +) + + +def _make_ai_message(content="", tool_calls=None, id="msg-123", usage_metadata=None, response_metadata=None): + msg = MagicMock() + msg.type = "ai" + msg.content = content + msg.tool_calls = tool_calls or [] + msg.id = id + msg.usage_metadata = usage_metadata + msg.response_metadata = response_metadata or {} + return msg + + +def _make_human_message(content="Hello"): + msg = MagicMock() + msg.type = "human" + msg.content = content + return msg + + +def _make_system_message(content="You are an assistant."): + msg = MagicMock() + msg.type = "system" + msg.content = content + return msg + + +def _make_tool_message(content="result", tool_call_id="call-abc"): + msg = MagicMock() + msg.type = "tool" + msg.content = content + msg.tool_call_id = tool_call_id + return msg + + +class TestLangchainToOpenaiMessage: + def test_ai_message_text_only(self): + msg = _make_ai_message(content="Hello world") + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == "Hello world" + assert "tool_calls" not in result + + def test_ai_message_with_tool_calls(self): + tool_calls = [ + {"id": "call-1", "name": "bash", "args": {"command": "ls"}}, + ] + msg = _make_ai_message(content="", tool_calls=tool_calls) + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] is None + assert len(result["tool_calls"]) == 1 + tc = result["tool_calls"][0] + assert tc["id"] == "call-1" + assert tc["type"] == "function" + assert tc["function"]["name"] == "bash" + # arguments must be a JSON string + args = json.loads(tc["function"]["arguments"]) + assert args == {"command": "ls"} + + def test_ai_message_text_and_tool_calls(self): + tool_calls = [ + {"id": "call-2", "name": "read_file", "args": {"path": "/tmp/x"}}, + ] + msg = _make_ai_message(content="Reading the file", tool_calls=tool_calls) + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == "Reading the file" + assert len(result["tool_calls"]) == 1 + + def test_ai_message_empty_content_no_tools(self): + msg = _make_ai_message(content="") + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == "" + assert "tool_calls" not in result + + def test_ai_message_list_content(self): + # Multimodal content is preserved as-is + list_content = [ + {"type": "text", "text": "Here is an image"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + ] + msg = _make_ai_message(content=list_content) + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == list_content + + def test_human_message(self): + msg = _make_human_message("Tell me a joke") + result = langchain_to_openai_message(msg) + assert result["role"] == "user" + assert result["content"] == "Tell me a joke" + + def test_tool_message(self): + msg = _make_tool_message(content="file contents here", tool_call_id="call-xyz") + result = langchain_to_openai_message(msg) + assert result["role"] == "tool" + assert result["tool_call_id"] == "call-xyz" + assert result["content"] == "file contents here" + + def test_system_message(self): + msg = _make_system_message("You are a helpful assistant.") + result = langchain_to_openai_message(msg) + assert result["role"] == "system" + assert result["content"] == "You are a helpful assistant." + + +class TestLangchainToOpenaiCompletion: + def test_basic_completion(self): + usage_metadata = {"input_tokens": 10, "output_tokens": 20} + msg = _make_ai_message( + content="Hello", + id="msg-abc", + usage_metadata=usage_metadata, + response_metadata={"model_name": "gpt-4o", "finish_reason": "stop"}, + ) + result = langchain_to_openai_completion(msg) + assert result["id"] == "msg-abc" + assert result["model"] == "gpt-4o" + assert len(result["choices"]) == 1 + choice = result["choices"][0] + assert choice["index"] == 0 + assert choice["finish_reason"] == "stop" + assert choice["message"]["role"] == "assistant" + assert choice["message"]["content"] == "Hello" + assert result["usage"] is not None + assert result["usage"]["prompt_tokens"] == 10 + assert result["usage"]["completion_tokens"] == 20 + assert result["usage"]["total_tokens"] == 30 + + def test_completion_with_tool_calls(self): + tool_calls = [{"id": "call-1", "name": "bash", "args": {}}] + msg = _make_ai_message( + content="", + tool_calls=tool_calls, + id="msg-tc", + response_metadata={"model_name": "gpt-4o"}, + ) + result = langchain_to_openai_completion(msg) + assert result["choices"][0]["finish_reason"] == "tool_calls" + + def test_completion_no_usage(self): + msg = _make_ai_message(content="Hi", id="msg-nousage", usage_metadata=None) + result = langchain_to_openai_completion(msg) + assert result["usage"] is None + + def test_finish_reason_from_response_metadata(self): + msg = _make_ai_message( + content="Done", + id="msg-fr", + response_metadata={"model_name": "claude-3", "finish_reason": "end_turn"}, + ) + result = langchain_to_openai_completion(msg) + assert result["choices"][0]["finish_reason"] == "end_turn" + + def test_finish_reason_default_stop(self): + msg = _make_ai_message(content="Done", id="msg-defstop", response_metadata={}) + result = langchain_to_openai_completion(msg) + assert result["choices"][0]["finish_reason"] == "stop" + + +class TestMessagesToOpenai: + def test_convert_message_list(self): + human = _make_human_message("Hi") + ai = _make_ai_message(content="Hello!") + tool_msg = _make_tool_message("result", "call-1") + messages = [human, ai, tool_msg] + result = langchain_messages_to_openai(messages) + assert len(result) == 3 + assert result[0]["role"] == "user" + assert result[1]["role"] == "assistant" + assert result[2]["role"] == "tool" + + def test_empty_list(self): + assert langchain_messages_to_openai([]) == [] diff --git a/backend/tests/test_feedback.py b/backend/tests/test_feedback.py new file mode 100644 index 000000000..ed6c09f44 --- /dev/null +++ b/backend/tests/test_feedback.py @@ -0,0 +1,215 @@ +"""Tests for FeedbackRepository and follow-up association. + +Uses temp SQLite DB for ORM tests. +""" + +import pytest + +from deerflow.persistence.feedback import FeedbackRepository + + +async def _make_feedback_repo(tmp_path): + from deerflow.persistence.engine import get_session_factory, init_engine + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + return FeedbackRepository(get_session_factory()) + + +async def _cleanup(): + from deerflow.persistence.engine import close_engine + + await close_engine() + + +# -- FeedbackRepository -- + + +class TestFeedbackRepository: + @pytest.mark.anyio + async def test_create_positive(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + record = await repo.create(run_id="r1", thread_id="t1", rating=1) + assert record["feedback_id"] + assert record["rating"] == 1 + assert record["run_id"] == "r1" + assert record["thread_id"] == "t1" + assert "created_at" in record + await _cleanup() + + @pytest.mark.anyio + async def test_create_negative_with_comment(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + record = await repo.create( + run_id="r1", + thread_id="t1", + rating=-1, + comment="Response was inaccurate", + ) + assert record["rating"] == -1 + assert record["comment"] == "Response was inaccurate" + await _cleanup() + + @pytest.mark.anyio + async def test_create_with_message_id(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + record = await repo.create(run_id="r1", thread_id="t1", rating=1, message_id="msg-42") + assert record["message_id"] == "msg-42" + await _cleanup() + + @pytest.mark.anyio + async def test_create_with_owner(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + record = await repo.create(run_id="r1", thread_id="t1", rating=1, owner_id="user-1") + assert record["owner_id"] == "user-1" + await _cleanup() + + @pytest.mark.anyio + async def test_create_invalid_rating_zero(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + with pytest.raises(ValueError): + await repo.create(run_id="r1", thread_id="t1", rating=0) + await _cleanup() + + @pytest.mark.anyio + async def test_create_invalid_rating_five(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + with pytest.raises(ValueError): + await repo.create(run_id="r1", thread_id="t1", rating=5) + await _cleanup() + + @pytest.mark.anyio + async def test_get(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + created = await repo.create(run_id="r1", thread_id="t1", rating=1) + fetched = await repo.get(created["feedback_id"]) + assert fetched is not None + assert fetched["feedback_id"] == created["feedback_id"] + assert fetched["rating"] == 1 + await _cleanup() + + @pytest.mark.anyio + async def test_get_nonexistent(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + assert await repo.get("nonexistent") is None + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_run(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + await repo.create(run_id="r1", thread_id="t1", rating=1) + await repo.create(run_id="r1", thread_id="t1", rating=-1) + await repo.create(run_id="r2", thread_id="t1", rating=1) + results = await repo.list_by_run("t1", "r1") + assert len(results) == 2 + assert all(r["run_id"] == "r1" for r in results) + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_thread(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + await repo.create(run_id="r1", thread_id="t1", rating=1) + await repo.create(run_id="r2", thread_id="t1", rating=-1) + await repo.create(run_id="r3", thread_id="t2", rating=1) + results = await repo.list_by_thread("t1") + assert len(results) == 2 + assert all(r["thread_id"] == "t1" for r in results) + await _cleanup() + + @pytest.mark.anyio + async def test_delete(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + created = await repo.create(run_id="r1", thread_id="t1", rating=1) + deleted = await repo.delete(created["feedback_id"]) + assert deleted is True + assert await repo.get(created["feedback_id"]) is None + await _cleanup() + + @pytest.mark.anyio + async def test_delete_nonexistent(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + deleted = await repo.delete("nonexistent") + assert deleted is False + await _cleanup() + + @pytest.mark.anyio + async def test_aggregate_by_run(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + await repo.create(run_id="r1", thread_id="t1", rating=1) + await repo.create(run_id="r1", thread_id="t1", rating=1) + await repo.create(run_id="r1", thread_id="t1", rating=-1) + stats = await repo.aggregate_by_run("t1", "r1") + assert stats["total"] == 3 + assert stats["positive"] == 2 + assert stats["negative"] == 1 + assert stats["run_id"] == "r1" + await _cleanup() + + @pytest.mark.anyio + async def test_aggregate_empty(self, tmp_path): + repo = await _make_feedback_repo(tmp_path) + stats = await repo.aggregate_by_run("t1", "r1") + assert stats["total"] == 0 + assert stats["positive"] == 0 + assert stats["negative"] == 0 + await _cleanup() + + +# -- Follow-up association -- + + +class TestFollowUpAssociation: + @pytest.mark.anyio + async def test_run_records_follow_up_via_memory_store(self): + """MemoryRunStore stores follow_up_to_run_id in kwargs.""" + from deerflow.runtime.runs.store.memory import MemoryRunStore + + store = MemoryRunStore() + await store.put("r1", thread_id="t1", status="success") + # MemoryRunStore doesn't have follow_up_to_run_id as a top-level param, + # but it can be passed via metadata + await store.put("r2", thread_id="t1", metadata={"follow_up_to_run_id": "r1"}) + run = await store.get("r2") + assert run["metadata"]["follow_up_to_run_id"] == "r1" + + @pytest.mark.anyio + async def test_human_message_has_follow_up_metadata(self): + """human_message event metadata includes follow_up_to_run_id.""" + from deerflow.runtime.events.store.memory import MemoryRunEventStore + + event_store = MemoryRunEventStore() + await event_store.put( + thread_id="t1", + run_id="r2", + event_type="human_message", + category="message", + content="Tell me more about that", + metadata={"follow_up_to_run_id": "r1"}, + ) + messages = await event_store.list_messages("t1") + assert messages[0]["metadata"]["follow_up_to_run_id"] == "r1" + + @pytest.mark.anyio + async def test_follow_up_auto_detection_logic(self): + """Simulate the auto-detection: latest successful run becomes follow_up_to.""" + from deerflow.runtime.runs.store.memory import MemoryRunStore + + store = MemoryRunStore() + await store.put("r1", thread_id="t1", status="success") + await store.put("r2", thread_id="t1", status="error") + + # Auto-detect: list_by_thread returns newest first + recent = await store.list_by_thread("t1", limit=1) + follow_up = None + if recent and recent[0].get("status") == "success": + follow_up = recent[0]["run_id"] + # r2 (error) is newest, so no follow_up detected + assert follow_up is None + + # Now add a successful run + await store.put("r3", thread_id="t1", status="success") + recent = await store.list_by_thread("t1", limit=1) + follow_up = None + if recent and recent[0].get("status") == "success": + follow_up = recent[0]["run_id"] + assert follow_up == "r3" diff --git a/backend/tests/test_lead_agent_model_resolution.py b/backend/tests/test_lead_agent_model_resolution.py index 9373c2895..093baaa92 100644 --- a/backend/tests/test_lead_agent_model_resolution.py +++ b/backend/tests/test_lead_agent_model_resolution.py @@ -146,8 +146,11 @@ def test_create_summarization_middleware_uses_configured_model_alias(monkeypatch lambda: SummarizationConfig(enabled=True, model_name="model-masswork"), ) + from unittest.mock import MagicMock + captured: dict[str, object] = {} - fake_model = object() + fake_model = MagicMock() + fake_model.with_config.return_value = fake_model def _fake_create_chat_model(*, name=None, thinking_enabled, reasoning_effort=None): captured["name"] = name @@ -163,3 +166,4 @@ def test_create_summarization_middleware_uses_configured_model_alias(monkeypatch assert captured["name"] == "model-masswork" assert captured["thinking_enabled"] is False assert middleware["model"] is fake_model + fake_model.with_config.assert_called_once_with(tags=["middleware:summarize"]) diff --git a/backend/tests/test_model_factory.py b/backend/tests/test_model_factory.py index 9bb6915b0..5d29db47d 100644 --- a/backend/tests/test_model_factory.py +++ b/backend/tests/test_model_factory.py @@ -793,6 +793,84 @@ def test_thinking_disabled_vllm_enable_thinking_format(monkeypatch): assert captured.get("reasoning_effort") is None +# --------------------------------------------------------------------------- +# stream_usage injection +# --------------------------------------------------------------------------- + + +class _FakeWithStreamUsage(FakeChatModel): + """Fake model that declares stream_usage in model_fields (like BaseChatOpenAI).""" + + stream_usage: bool | None = None + + +def test_stream_usage_injected_for_openai_compatible_model(monkeypatch): + """Factory should set stream_usage=True for models with stream_usage field.""" + cfg = _make_app_config([_make_model("deepseek", use="langchain_deepseek:ChatDeepSeek")]) + _patch_factory(monkeypatch, cfg, model_class=_FakeWithStreamUsage) + + captured: dict = {} + + class CapturingModel(_FakeWithStreamUsage): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="deepseek") + + assert captured.get("stream_usage") is True + + +def test_stream_usage_not_injected_for_non_openai_model(monkeypatch): + """Factory should NOT inject stream_usage for models without the field.""" + cfg = _make_app_config([_make_model("claude", use="langchain_anthropic:ChatAnthropic")]) + _patch_factory(monkeypatch, cfg) + + captured: dict = {} + + class CapturingModel(FakeChatModel): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + factory_module.create_chat_model(name="claude") + + assert "stream_usage" not in captured + + +def test_stream_usage_not_overridden_when_explicitly_set_in_config(monkeypatch): + """If config dumps stream_usage=False, factory should respect it.""" + cfg = _make_app_config([_make_model("deepseek", use="langchain_deepseek:ChatDeepSeek")]) + _patch_factory(monkeypatch, cfg, model_class=_FakeWithStreamUsage) + + captured: dict = {} + + class CapturingModel(_FakeWithStreamUsage): + def __init__(self, **kwargs): + captured.update(kwargs) + BaseChatModel.__init__(self, **kwargs) + + monkeypatch.setattr(factory_module, "resolve_class", lambda path, base: CapturingModel) + + # Simulate config having stream_usage explicitly set by patching model_dump + original_get_model_config = cfg.get_model_config + + def patched_get_model_config(name): + mc = original_get_model_config(name) + mc.stream_usage = False # type: ignore[attr-defined] + return mc + + monkeypatch.setattr(cfg, "get_model_config", patched_get_model_config) + + factory_module.create_chat_model(name="deepseek") + + assert captured.get("stream_usage") is False + + def test_openai_responses_api_settings_are_passed_to_chatopenai(monkeypatch): model = ModelConfig( name="gpt-5-responses", diff --git a/backend/tests/test_persistence_scaffold.py b/backend/tests/test_persistence_scaffold.py new file mode 100644 index 000000000..bd098c707 --- /dev/null +++ b/backend/tests/test_persistence_scaffold.py @@ -0,0 +1,232 @@ +"""Tests for the persistence layer scaffolding. + +Tests: +1. DatabaseConfig property derivation (paths, URLs) +2. MemoryRunStore CRUD + owner_id filtering +3. Base.to_dict() via inspect mixin +4. Engine init/close lifecycle (memory + SQLite) +5. Postgres missing-dep error message +""" + +from datetime import UTC, datetime + +import pytest + +from deerflow.config.database_config import DatabaseConfig +from deerflow.runtime.runs.store.memory import MemoryRunStore + +# -- DatabaseConfig -- + + +class TestDatabaseConfig: + def test_defaults(self): + c = DatabaseConfig() + assert c.backend == "memory" + assert c.pool_size == 5 + + def test_sqlite_paths_are_different(self): + c = DatabaseConfig(backend="sqlite", sqlite_dir="./mydata") + assert c.checkpointer_sqlite_path.endswith("checkpoints.db") + assert c.app_sqlite_path.endswith("app.db") + assert "mydata" in c.checkpointer_sqlite_path + assert c.checkpointer_sqlite_path != c.app_sqlite_path + + def test_app_sqlalchemy_url_sqlite(self): + c = DatabaseConfig(backend="sqlite", sqlite_dir="./data") + url = c.app_sqlalchemy_url + assert url.startswith("sqlite+aiosqlite:///") + assert "app.db" in url + + def test_app_sqlalchemy_url_postgres(self): + c = DatabaseConfig( + backend="postgres", + postgres_url="postgresql://u:p@h:5432/db", + ) + url = c.app_sqlalchemy_url + assert url.startswith("postgresql+asyncpg://") + assert "u:p@h:5432/db" in url + + def test_app_sqlalchemy_url_postgres_already_asyncpg(self): + c = DatabaseConfig( + backend="postgres", + postgres_url="postgresql+asyncpg://u:p@h:5432/db", + ) + url = c.app_sqlalchemy_url + assert url.count("asyncpg") == 1 + + def test_memory_has_no_url(self): + c = DatabaseConfig(backend="memory") + with pytest.raises(ValueError, match="No SQLAlchemy URL"): + _ = c.app_sqlalchemy_url + + +# -- MemoryRunStore -- + + +class TestMemoryRunStore: + @pytest.fixture + def store(self): + return MemoryRunStore() + + @pytest.mark.anyio + async def test_put_and_get(self, store): + await store.put("r1", thread_id="t1", status="pending") + row = await store.get("r1") + assert row is not None + assert row["run_id"] == "r1" + assert row["status"] == "pending" + + @pytest.mark.anyio + async def test_get_missing_returns_none(self, store): + assert await store.get("nope") is None + + @pytest.mark.anyio + async def test_update_status(self, store): + await store.put("r1", thread_id="t1") + await store.update_status("r1", "running") + assert (await store.get("r1"))["status"] == "running" + + @pytest.mark.anyio + async def test_update_status_with_error(self, store): + await store.put("r1", thread_id="t1") + await store.update_status("r1", "error", error="boom") + row = await store.get("r1") + assert row["status"] == "error" + assert row["error"] == "boom" + + @pytest.mark.anyio + async def test_list_by_thread(self, store): + await store.put("r1", thread_id="t1") + await store.put("r2", thread_id="t1") + await store.put("r3", thread_id="t2") + rows = await store.list_by_thread("t1") + assert len(rows) == 2 + assert all(r["thread_id"] == "t1" for r in rows) + + @pytest.mark.anyio + async def test_list_by_thread_owner_filter(self, store): + await store.put("r1", thread_id="t1", owner_id="alice") + await store.put("r2", thread_id="t1", owner_id="bob") + rows = await store.list_by_thread("t1", owner_id="alice") + assert len(rows) == 1 + assert rows[0]["owner_id"] == "alice" + + @pytest.mark.anyio + async def test_owner_none_returns_all(self, store): + await store.put("r1", thread_id="t1", owner_id="alice") + await store.put("r2", thread_id="t1", owner_id="bob") + rows = await store.list_by_thread("t1", owner_id=None) + assert len(rows) == 2 + + @pytest.mark.anyio + async def test_delete(self, store): + await store.put("r1", thread_id="t1") + await store.delete("r1") + assert await store.get("r1") is None + + @pytest.mark.anyio + async def test_delete_nonexistent_is_noop(self, store): + await store.delete("nope") # should not raise + + @pytest.mark.anyio + async def test_list_pending(self, store): + await store.put("r1", thread_id="t1", status="pending") + await store.put("r2", thread_id="t1", status="running") + await store.put("r3", thread_id="t2", status="pending") + pending = await store.list_pending() + assert len(pending) == 2 + assert all(r["status"] == "pending" for r in pending) + + @pytest.mark.anyio + async def test_list_pending_respects_before(self, store): + past = "2020-01-01T00:00:00+00:00" + future = "2099-01-01T00:00:00+00:00" + await store.put("r1", thread_id="t1", status="pending", created_at=past) + await store.put("r2", thread_id="t1", status="pending", created_at=future) + pending = await store.list_pending(before=datetime.now(UTC).isoformat()) + assert len(pending) == 1 + assert pending[0]["run_id"] == "r1" + + @pytest.mark.anyio + async def test_list_pending_fifo_order(self, store): + await store.put("r2", thread_id="t1", status="pending", created_at="2024-01-02T00:00:00+00:00") + await store.put("r1", thread_id="t1", status="pending", created_at="2024-01-01T00:00:00+00:00") + pending = await store.list_pending() + assert pending[0]["run_id"] == "r1" + + +# -- Base.to_dict mixin -- + + +class TestBaseToDictMixin: + @pytest.mark.anyio + async def test_to_dict_and_exclude(self, tmp_path): + """Create a temp SQLite DB with a minimal model, verify to_dict.""" + from sqlalchemy import String + from sqlalchemy.ext.asyncio import async_sessionmaker, create_async_engine + from sqlalchemy.orm import Mapped, mapped_column + + from deerflow.persistence.base import Base + + class _Tmp(Base): + __tablename__ = "_tmp_test" + id: Mapped[str] = mapped_column(String(64), primary_key=True) + name: Mapped[str] = mapped_column(String(128)) + + engine = create_async_engine(f"sqlite+aiosqlite:///{tmp_path / 'test.db'}") + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + sf = async_sessionmaker(engine, expire_on_commit=False) + async with sf() as session: + session.add(_Tmp(id="1", name="hello")) + await session.commit() + obj = await session.get(_Tmp, "1") + + assert obj.to_dict() == {"id": "1", "name": "hello"} + assert obj.to_dict(exclude={"name"}) == {"id": "1"} + assert "_Tmp" in repr(obj) + + await engine.dispose() + + +# -- Engine lifecycle -- + + +class TestEngineLifecycle: + @pytest.mark.anyio + async def test_memory_is_noop(self): + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + + await init_engine("memory") + assert get_session_factory() is None + await close_engine() + + @pytest.mark.anyio + async def test_sqlite_creates_engine(self, tmp_path): + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + sf = get_session_factory() + assert sf is not None + async with sf() as session: + assert session is not None + await close_engine() + assert get_session_factory() is None + + @pytest.mark.anyio + async def test_postgres_without_asyncpg_gives_actionable_error(self): + """If asyncpg is not installed, error message tells user what to do.""" + from deerflow.persistence.engine import init_engine + + try: + import asyncpg # noqa: F401 + + pytest.skip("asyncpg is installed -- cannot test missing-dep path") + except ImportError: + # asyncpg is not installed — this is the expected state for this test. + # We proceed to verify that init_engine raises an actionable ImportError. + pass # noqa: S110 — intentionally ignored + with pytest.raises(ImportError, match="uv sync --extra postgres"): + await init_engine("postgres", url="postgresql+asyncpg://x:x@localhost/x") diff --git a/backend/tests/test_run_event_store.py b/backend/tests/test_run_event_store.py new file mode 100644 index 000000000..2b22b2c6f --- /dev/null +++ b/backend/tests/test_run_event_store.py @@ -0,0 +1,500 @@ +"""Tests for RunEventStore contract across all backends. + +Uses a helper to create the store for each backend type. +Memory tests run directly; DB and JSONL tests create stores inside each test. +""" + +import pytest + +from deerflow.runtime.events.store.memory import MemoryRunEventStore + + +@pytest.fixture +def store(): + return MemoryRunEventStore() + + +# -- Basic write and query -- + + +class TestPutAndSeq: + @pytest.mark.anyio + async def test_put_returns_dict_with_seq(self, store): + record = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="hello") + assert "seq" in record + assert record["seq"] == 1 + assert record["thread_id"] == "t1" + assert record["run_id"] == "r1" + assert record["event_type"] == "human_message" + assert record["category"] == "message" + assert record["content"] == "hello" + assert "created_at" in record + + @pytest.mark.anyio + async def test_seq_strictly_increasing_same_thread(self, store): + r1 = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + r2 = await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message") + r3 = await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace") + assert r1["seq"] == 1 + assert r2["seq"] == 2 + assert r3["seq"] == 3 + + @pytest.mark.anyio + async def test_seq_independent_across_threads(self, store): + r1 = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + r2 = await store.put(thread_id="t2", run_id="r2", event_type="human_message", category="message") + assert r1["seq"] == 1 + assert r2["seq"] == 1 + + @pytest.mark.anyio + async def test_put_respects_provided_created_at(self, store): + ts = "2024-06-01T12:00:00+00:00" + record = await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", created_at=ts) + assert record["created_at"] == ts + + @pytest.mark.anyio + async def test_put_metadata_preserved(self, store): + meta = {"model": "gpt-4", "tokens": 100} + record = await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace", metadata=meta) + assert record["metadata"] == meta + + +# -- list_messages -- + + +class TestListMessages: + @pytest.mark.anyio + async def test_only_returns_message_category(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace") + await store.put(thread_id="t1", run_id="r1", event_type="run_start", category="lifecycle") + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["category"] == "message" + + @pytest.mark.anyio + async def test_ascending_seq_order(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="first") + await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="second") + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="third") + messages = await store.list_messages("t1") + seqs = [m["seq"] for m in messages] + assert seqs == sorted(seqs) + + @pytest.mark.anyio + async def test_before_seq_pagination(self, store): + for i in range(10): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content=str(i)) + messages = await store.list_messages("t1", before_seq=6, limit=3) + assert len(messages) == 3 + assert [m["seq"] for m in messages] == [3, 4, 5] + + @pytest.mark.anyio + async def test_after_seq_pagination(self, store): + for i in range(10): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content=str(i)) + messages = await store.list_messages("t1", after_seq=7, limit=3) + assert len(messages) == 3 + assert [m["seq"] for m in messages] == [8, 9, 10] + + @pytest.mark.anyio + async def test_limit_restricts_count(self, store): + for _ in range(20): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + messages = await store.list_messages("t1", limit=5) + assert len(messages) == 5 + + @pytest.mark.anyio + async def test_cross_run_unified_ordering(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message") + await store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message") + messages = await store.list_messages("t1") + assert [m["seq"] for m in messages] == [1, 2, 3, 4] + assert messages[0]["run_id"] == "r1" + assert messages[2]["run_id"] == "r2" + + @pytest.mark.anyio + async def test_default_returns_latest(self, store): + for _ in range(10): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + messages = await store.list_messages("t1", limit=3) + assert [m["seq"] for m in messages] == [8, 9, 10] + + +# -- list_events -- + + +class TestListEvents: + @pytest.mark.anyio + async def test_returns_all_categories_for_run(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace") + await store.put(thread_id="t1", run_id="r1", event_type="run_start", category="lifecycle") + events = await store.list_events("t1", "r1") + assert len(events) == 3 + + @pytest.mark.anyio + async def test_event_types_filter(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="llm_start", category="trace") + await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace") + await store.put(thread_id="t1", run_id="r1", event_type="tool_start", category="trace") + events = await store.list_events("t1", "r1", event_types=["llm_end"]) + assert len(events) == 1 + assert events[0]["event_type"] == "llm_end" + + @pytest.mark.anyio + async def test_only_returns_specified_run(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace") + await store.put(thread_id="t1", run_id="r2", event_type="llm_end", category="trace") + events = await store.list_events("t1", "r1") + assert len(events) == 1 + assert events[0]["run_id"] == "r1" + + +# -- list_messages_by_run -- + + +class TestListMessagesByRun: + @pytest.mark.anyio + async def test_only_messages_for_specified_run(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace") + await store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message") + messages = await store.list_messages_by_run("t1", "r1") + assert len(messages) == 1 + assert messages[0]["run_id"] == "r1" + assert messages[0]["category"] == "message" + + +# -- count_messages -- + + +class TestCountMessages: + @pytest.mark.anyio + async def test_counts_only_message_category(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message") + await store.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace") + assert await store.count_messages("t1") == 2 + + +# -- put_batch -- + + +class TestPutBatch: + @pytest.mark.anyio + async def test_batch_assigns_seq(self, store): + events = [ + {"thread_id": "t1", "run_id": "r1", "event_type": "human_message", "category": "message", "content": "a"}, + {"thread_id": "t1", "run_id": "r1", "event_type": "ai_message", "category": "message", "content": "b"}, + {"thread_id": "t1", "run_id": "r1", "event_type": "llm_end", "category": "trace"}, + ] + results = await store.put_batch(events) + assert len(results) == 3 + assert all("seq" in r for r in results) + + @pytest.mark.anyio + async def test_batch_seq_strictly_increasing(self, store): + events = [ + {"thread_id": "t1", "run_id": "r1", "event_type": "human_message", "category": "message"}, + {"thread_id": "t1", "run_id": "r1", "event_type": "ai_message", "category": "message"}, + ] + results = await store.put_batch(events) + assert results[0]["seq"] == 1 + assert results[1]["seq"] == 2 + + +# -- delete -- + + +class TestDelete: + @pytest.mark.anyio + async def test_delete_by_thread(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message") + await store.put(thread_id="t1", run_id="r2", event_type="llm_end", category="trace") + count = await store.delete_by_thread("t1") + assert count == 3 + assert await store.list_messages("t1") == [] + assert await store.count_messages("t1") == 0 + + @pytest.mark.anyio + async def test_delete_by_run(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r2", event_type="human_message", category="message") + await store.put(thread_id="t1", run_id="r2", event_type="llm_end", category="trace") + count = await store.delete_by_run("t1", "r2") + assert count == 2 + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["run_id"] == "r1" + + @pytest.mark.anyio + async def test_delete_nonexistent_thread_returns_zero(self, store): + assert await store.delete_by_thread("nope") == 0 + + @pytest.mark.anyio + async def test_delete_nonexistent_run_returns_zero(self, store): + await store.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + assert await store.delete_by_run("t1", "nope") == 0 + + @pytest.mark.anyio + async def test_delete_nonexistent_thread_for_run_returns_zero(self, store): + assert await store.delete_by_run("nope", "r1") == 0 + + +# -- Edge cases -- + + +class TestEdgeCases: + @pytest.mark.anyio + async def test_empty_thread_list_messages(self, store): + assert await store.list_messages("empty") == [] + + @pytest.mark.anyio + async def test_empty_run_list_events(self, store): + assert await store.list_events("empty", "r1") == [] + + @pytest.mark.anyio + async def test_empty_thread_count_messages(self, store): + assert await store.count_messages("empty") == 0 + + +# -- DB-specific tests -- + + +class TestDbRunEventStore: + """Tests for DbRunEventStore with temp SQLite.""" + + @pytest.mark.anyio + async def test_basic_crud(self, tmp_path): + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + s = DbRunEventStore(get_session_factory()) + + r = await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="hi") + assert r["seq"] == 1 + r2 = await s.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content="hello") + assert r2["seq"] == 2 + + messages = await s.list_messages("t1") + assert len(messages) == 2 + + count = await s.count_messages("t1") + assert count == 2 + + await close_engine() + + @pytest.mark.anyio + async def test_trace_content_truncation(self, tmp_path): + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + s = DbRunEventStore(get_session_factory(), max_trace_content=100) + + long = "x" * 200 + r = await s.put(thread_id="t1", run_id="r1", event_type="llm_end", category="trace", content=long) + assert len(r["content"]) == 100 + assert r["metadata"].get("content_truncated") is True + + # message content NOT truncated + m = await s.put(thread_id="t1", run_id="r1", event_type="ai_message", category="message", content=long) + assert len(m["content"]) == 200 + + await close_engine() + + @pytest.mark.anyio + async def test_pagination(self, tmp_path): + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + s = DbRunEventStore(get_session_factory()) + + for i in range(10): + await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content=str(i)) + + # before_seq + msgs = await s.list_messages("t1", before_seq=6, limit=3) + assert [m["seq"] for m in msgs] == [3, 4, 5] + + # after_seq + msgs = await s.list_messages("t1", after_seq=7, limit=3) + assert [m["seq"] for m in msgs] == [8, 9, 10] + + # default (latest) + msgs = await s.list_messages("t1", limit=3) + assert [m["seq"] for m in msgs] == [8, 9, 10] + + await close_engine() + + @pytest.mark.anyio + async def test_delete(self, tmp_path): + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + s = DbRunEventStore(get_session_factory()) + + await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await s.put(thread_id="t1", run_id="r2", event_type="ai_message", category="message") + c = await s.delete_by_run("t1", "r2") + assert c == 1 + assert await s.count_messages("t1") == 1 + + c = await s.delete_by_thread("t1") + assert c == 1 + assert await s.count_messages("t1") == 0 + + await close_engine() + + @pytest.mark.anyio + async def test_put_batch_seq_continuity(self, tmp_path): + """Batch write produces continuous seq values with no gaps.""" + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + s = DbRunEventStore(get_session_factory()) + + events = [{"thread_id": "t1", "run_id": "r1", "event_type": "trace", "category": "trace"} for _ in range(50)] + results = await s.put_batch(events) + seqs = [r["seq"] for r in results] + assert seqs == list(range(1, 51)) + await close_engine() + + +# -- Factory tests -- + + +class TestMakeRunEventStore: + """Tests for the make_run_event_store factory function.""" + + @pytest.mark.anyio + async def test_memory_backend_default(self): + from deerflow.runtime.events.store import make_run_event_store + + store = make_run_event_store(None) + assert type(store).__name__ == "MemoryRunEventStore" + + @pytest.mark.anyio + async def test_memory_backend_explicit(self): + from unittest.mock import MagicMock + + from deerflow.runtime.events.store import make_run_event_store + + config = MagicMock() + config.backend = "memory" + store = make_run_event_store(config) + assert type(store).__name__ == "MemoryRunEventStore" + + @pytest.mark.anyio + async def test_db_backend_with_engine(self, tmp_path): + from unittest.mock import MagicMock + + from deerflow.persistence.engine import close_engine, init_engine + from deerflow.runtime.events.store import make_run_event_store + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + + config = MagicMock() + config.backend = "db" + config.max_trace_content = 10240 + store = make_run_event_store(config) + assert type(store).__name__ == "DbRunEventStore" + await close_engine() + + @pytest.mark.anyio + async def test_db_backend_no_engine_falls_back(self): + """db backend without engine falls back to memory.""" + from unittest.mock import MagicMock + + from deerflow.persistence.engine import close_engine, init_engine + from deerflow.runtime.events.store import make_run_event_store + + await init_engine("memory") # no engine created + + config = MagicMock() + config.backend = "db" + store = make_run_event_store(config) + assert type(store).__name__ == "MemoryRunEventStore" + await close_engine() + + @pytest.mark.anyio + async def test_jsonl_backend(self): + from unittest.mock import MagicMock + + from deerflow.runtime.events.store import make_run_event_store + + config = MagicMock() + config.backend = "jsonl" + store = make_run_event_store(config) + assert type(store).__name__ == "JsonlRunEventStore" + + @pytest.mark.anyio + async def test_unknown_backend_raises(self): + from unittest.mock import MagicMock + + from deerflow.runtime.events.store import make_run_event_store + + config = MagicMock() + config.backend = "redis" + with pytest.raises(ValueError, match="Unknown"): + make_run_event_store(config) + + +# -- JSONL-specific tests -- + + +class TestJsonlRunEventStore: + @pytest.mark.anyio + async def test_basic_crud(self, tmp_path): + from deerflow.runtime.events.store.jsonl import JsonlRunEventStore + + s = JsonlRunEventStore(base_dir=tmp_path / "jsonl") + r = await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message", content="hi") + assert r["seq"] == 1 + messages = await s.list_messages("t1") + assert len(messages) == 1 + + @pytest.mark.anyio + async def test_file_at_correct_path(self, tmp_path): + from deerflow.runtime.events.store.jsonl import JsonlRunEventStore + + s = JsonlRunEventStore(base_dir=tmp_path / "jsonl") + await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + assert (tmp_path / "jsonl" / "threads" / "t1" / "runs" / "r1.jsonl").exists() + + @pytest.mark.anyio + async def test_cross_run_messages(self, tmp_path): + from deerflow.runtime.events.store.jsonl import JsonlRunEventStore + + s = JsonlRunEventStore(base_dir=tmp_path / "jsonl") + await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await s.put(thread_id="t1", run_id="r2", event_type="human_message", category="message") + messages = await s.list_messages("t1") + assert len(messages) == 2 + assert [m["seq"] for m in messages] == [1, 2] + + @pytest.mark.anyio + async def test_delete_by_run(self, tmp_path): + from deerflow.runtime.events.store.jsonl import JsonlRunEventStore + + s = JsonlRunEventStore(base_dir=tmp_path / "jsonl") + await s.put(thread_id="t1", run_id="r1", event_type="human_message", category="message") + await s.put(thread_id="t1", run_id="r2", event_type="human_message", category="message") + c = await s.delete_by_run("t1", "r2") + assert c == 1 + assert not (tmp_path / "jsonl" / "threads" / "t1" / "runs" / "r2.jsonl").exists() + assert await s.count_messages("t1") == 1 diff --git a/backend/tests/test_run_journal.py b/backend/tests/test_run_journal.py new file mode 100644 index 000000000..dbb307a55 --- /dev/null +++ b/backend/tests/test_run_journal.py @@ -0,0 +1,1042 @@ +"""Tests for RunJournal callback handler. + +Uses MemoryRunEventStore as the backend for direct event inspection. +""" + +import asyncio +from unittest.mock import MagicMock +from uuid import uuid4 + +import pytest + +from deerflow.runtime.events.store.memory import MemoryRunEventStore +from deerflow.runtime.journal import RunJournal + + +@pytest.fixture +def journal_setup(): + store = MemoryRunEventStore() + j = RunJournal("r1", "t1", store, flush_threshold=100) + return j, store + + +def _make_llm_response(content="Hello", usage=None, tool_calls=None, additional_kwargs=None): + """Create a mock LLM response with a message. + + model_dump() returns checkpoint-aligned format matching real AIMessage. + """ + msg = MagicMock() + msg.type = "ai" + msg.content = content + msg.id = f"msg-{id(msg)}" + msg.tool_calls = tool_calls or [] + msg.invalid_tool_calls = [] + msg.response_metadata = {"model_name": "test-model"} + msg.usage_metadata = usage + msg.additional_kwargs = additional_kwargs or {} + msg.name = None + # model_dump returns checkpoint-aligned format + msg.model_dump.return_value = { + "content": content, + "additional_kwargs": additional_kwargs or {}, + "response_metadata": {"model_name": "test-model"}, + "type": "ai", + "name": None, + "id": msg.id, + "tool_calls": tool_calls or [], + "invalid_tool_calls": [], + "usage_metadata": usage, + } + + gen = MagicMock() + gen.message = msg + + response = MagicMock() + response.generations = [[gen]] + return response + + +class TestLlmCallbacks: + @pytest.mark.anyio + async def test_on_llm_end_produces_trace_event(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"]) + j.on_llm_end(_make_llm_response("Hi"), run_id=run_id, tags=["lead_agent"]) + await j.flush() + events = await store.list_events("t1", "r1") + trace_events = [e for e in events if e["event_type"] == "llm_response"] + assert len(trace_events) == 1 + assert trace_events[0]["category"] == "trace" + + @pytest.mark.anyio + async def test_on_llm_end_lead_agent_produces_ai_message(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"]) + j.on_llm_end(_make_llm_response("Answer"), run_id=run_id, tags=["lead_agent"]) + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["event_type"] == "ai_message" + # Content is checkpoint-aligned model_dump format + assert messages[0]["content"]["type"] == "ai" + assert messages[0]["content"]["content"] == "Answer" + + @pytest.mark.anyio + async def test_on_llm_end_with_tool_calls_produces_ai_tool_call(self, journal_setup): + """LLM response with pending tool_calls should produce ai_tool_call event.""" + j, store = journal_setup + run_id = uuid4() + j.on_llm_end( + _make_llm_response("Let me search", tool_calls=[{"id": "call_1", "name": "search", "args": {}}]), + run_id=run_id, + tags=["lead_agent"], + ) + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["event_type"] == "ai_tool_call" + + @pytest.mark.anyio + async def test_on_llm_end_subagent_no_ai_message(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_llm_start({}, [], run_id=run_id, tags=["subagent:research"]) + j.on_llm_end(_make_llm_response("Sub answer"), run_id=run_id, tags=["subagent:research"]) + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 0 + + @pytest.mark.anyio + async def test_token_accumulation(self, journal_setup): + j, store = journal_setup + usage1 = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15} + usage2 = {"input_tokens": 20, "output_tokens": 10, "total_tokens": 30} + j.on_llm_end(_make_llm_response("A", usage=usage1), run_id=uuid4(), tags=["lead_agent"]) + j.on_llm_end(_make_llm_response("B", usage=usage2), run_id=uuid4(), tags=["lead_agent"]) + assert j._total_input_tokens == 30 + assert j._total_output_tokens == 15 + assert j._total_tokens == 45 + assert j._llm_call_count == 2 + + @pytest.mark.anyio + async def test_total_tokens_computed_from_input_output(self, journal_setup): + """If total_tokens is 0, it should be computed from input + output.""" + j, store = journal_setup + j.on_llm_end( + _make_llm_response("Hi", usage={"input_tokens": 100, "output_tokens": 50, "total_tokens": 0}), + run_id=uuid4(), + tags=["lead_agent"], + ) + assert j._total_tokens == 150 + assert j._lead_agent_tokens == 150 + + @pytest.mark.anyio + async def test_caller_token_classification(self, journal_setup): + j, store = journal_setup + usage = {"input_tokens": 10, "output_tokens": 5, "total_tokens": 15} + j.on_llm_end(_make_llm_response("A", usage=usage), run_id=uuid4(), tags=["lead_agent"]) + j.on_llm_end(_make_llm_response("B", usage=usage), run_id=uuid4(), tags=["subagent:research"]) + j.on_llm_end(_make_llm_response("C", usage=usage), run_id=uuid4(), tags=["middleware:summarization"]) + assert j._lead_agent_tokens == 15 + assert j._subagent_tokens == 15 + assert j._middleware_tokens == 15 + + @pytest.mark.anyio + async def test_usage_metadata_none_no_crash(self, journal_setup): + j, store = journal_setup + j.on_llm_end(_make_llm_response("No usage", usage=None), run_id=uuid4(), tags=["lead_agent"]) + await j.flush() + + @pytest.mark.anyio + async def test_latency_tracking(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"]) + j.on_llm_end(_make_llm_response("Fast"), run_id=run_id, tags=["lead_agent"]) + await j.flush() + events = await store.list_events("t1", "r1") + llm_resp = [e for e in events if e["event_type"] == "llm_response"][0] + assert "latency_ms" in llm_resp["metadata"] + assert llm_resp["metadata"]["latency_ms"] is not None + + +class TestLifecycleCallbacks: + @pytest.mark.anyio + async def test_chain_start_end_produce_lifecycle_events(self, journal_setup): + j, store = journal_setup + j.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None) + j.on_chain_end({}, run_id=uuid4(), parent_run_id=None) + await asyncio.sleep(0.05) + await j.flush() + events = await store.list_events("t1", "r1") + types = [e["event_type"] for e in events if e["category"] == "lifecycle"] + assert "run_start" in types + assert "run_end" in types + + @pytest.mark.anyio + async def test_nested_chain_ignored(self, journal_setup): + j, store = journal_setup + parent_id = uuid4() + j.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=parent_id) + j.on_chain_end({}, run_id=uuid4(), parent_run_id=parent_id) + await j.flush() + events = await store.list_events("t1", "r1") + lifecycle = [e for e in events if e["category"] == "lifecycle"] + assert len(lifecycle) == 0 + + +class TestToolCallbacks: + @pytest.mark.anyio + async def test_tool_start_end_produce_trace(self, journal_setup): + j, store = journal_setup + j.on_tool_start({"name": "web_search"}, "query", run_id=uuid4()) + j.on_tool_end("results", run_id=uuid4(), name="web_search") + await j.flush() + events = await store.list_events("t1", "r1") + trace_types = {e["event_type"] for e in events if e["category"] == "trace"} + assert "tool_start" in trace_types + assert "tool_end" in trace_types + + @pytest.mark.anyio + async def test_on_tool_error(self, journal_setup): + j, store = journal_setup + j.on_tool_error(TimeoutError("timeout"), run_id=uuid4(), name="web_fetch") + await j.flush() + events = await store.list_events("t1", "r1") + assert any(e["event_type"] == "tool_error" for e in events) + + +class TestCustomEvents: + @pytest.mark.anyio + async def test_summarization_event(self, journal_setup): + j, store = journal_setup + j.on_custom_event( + "summarization", + {"summary": "Context was summarized.", "replaced_count": 5, "replaced_message_ids": ["a", "b"]}, + run_id=uuid4(), + ) + await j.flush() + events = await store.list_events("t1", "r1") + trace = [e for e in events if e["event_type"] == "summarization"] + assert len(trace) == 1 + # Summarization goes to middleware category, not message + mw_events = [e for e in events if e["event_type"] == "middleware:summarize"] + assert len(mw_events) == 1 + assert mw_events[0]["category"] == "middleware" + assert mw_events[0]["content"] == {"role": "system", "content": "Context was summarized."} + # No message events from summarization + messages = await store.list_messages("t1") + assert len(messages) == 0 + + @pytest.mark.anyio + async def test_non_summarization_custom_event(self, journal_setup): + j, store = journal_setup + j.on_custom_event("task_running", {"task_id": "t1", "status": "running"}, run_id=uuid4()) + await j.flush() + events = await store.list_events("t1", "r1") + assert any(e["event_type"] == "task_running" for e in events) + + +class TestBufferFlush: + @pytest.mark.anyio + async def test_flush_threshold(self, journal_setup): + j, store = journal_setup + j._flush_threshold = 3 + j.on_tool_start({"name": "a"}, "x", run_id=uuid4()) + j.on_tool_start({"name": "b"}, "x", run_id=uuid4()) + assert len(j._buffer) == 2 + j.on_tool_start({"name": "c"}, "x", run_id=uuid4()) + await asyncio.sleep(0.1) + events = await store.list_events("t1", "r1") + assert len(events) >= 3 + + @pytest.mark.anyio + async def test_events_retained_when_no_loop(self, journal_setup): + """Events buffered in a sync (no-loop) context should survive + until the async flush() in the finally block.""" + j, store = journal_setup + j._flush_threshold = 1 + + original = asyncio.get_running_loop + + def no_loop(): + raise RuntimeError("no running event loop") + + asyncio.get_running_loop = no_loop + try: + j._put(event_type="llm_response", category="trace", content="test") + finally: + asyncio.get_running_loop = original + + assert len(j._buffer) == 1 + await j.flush() + events = await store.list_events("t1", "r1") + assert any(e["event_type"] == "llm_response" for e in events) + + +class TestIdentifyCaller: + def test_lead_agent_tag(self, journal_setup): + j, _ = journal_setup + assert j._identify_caller({"tags": ["lead_agent"]}) == "lead_agent" + + def test_subagent_tag(self, journal_setup): + j, _ = journal_setup + assert j._identify_caller({"tags": ["subagent:research"]}) == "subagent:research" + + def test_middleware_tag(self, journal_setup): + j, _ = journal_setup + assert j._identify_caller({"tags": ["middleware:summarization"]}) == "middleware:summarization" + + def test_no_tags_returns_lead_agent(self, journal_setup): + j, _ = journal_setup + assert j._identify_caller({"tags": []}) == "lead_agent" + assert j._identify_caller({}) == "lead_agent" + + +class TestChainErrorCallback: + @pytest.mark.anyio + async def test_on_chain_error_writes_run_error(self, journal_setup): + j, store = journal_setup + j.on_chain_error(ValueError("boom"), run_id=uuid4(), parent_run_id=None) + await asyncio.sleep(0.05) + await j.flush() + events = await store.list_events("t1", "r1") + error_events = [e for e in events if e["event_type"] == "run_error"] + assert len(error_events) == 1 + assert "boom" in error_events[0]["content"] + assert error_events[0]["metadata"]["error_type"] == "ValueError" + + +class TestTokenTrackingDisabled: + @pytest.mark.anyio + async def test_track_token_usage_false(self): + store = MemoryRunEventStore() + j = RunJournal("r1", "t1", store, track_token_usage=False, flush_threshold=100) + j.on_llm_end( + _make_llm_response("X", usage={"input_tokens": 50, "output_tokens": 50, "total_tokens": 100}), + run_id=uuid4(), + tags=["lead_agent"], + ) + data = j.get_completion_data() + assert data["total_tokens"] == 0 + assert data["llm_call_count"] == 0 + + +class TestConvenienceFields: + @pytest.mark.anyio + async def test_last_ai_message_tracks_latest(self, journal_setup): + j, store = journal_setup + j.on_llm_end(_make_llm_response("First"), run_id=uuid4(), tags=["lead_agent"]) + j.on_llm_end(_make_llm_response("Second"), run_id=uuid4(), tags=["lead_agent"]) + data = j.get_completion_data() + assert data["last_ai_message"] == "Second" + assert data["message_count"] == 2 + + @pytest.mark.anyio + async def test_first_human_message_via_set(self, journal_setup): + j, _ = journal_setup + j.set_first_human_message("What is AI?") + data = j.get_completion_data() + assert data["first_human_message"] == "What is AI?" + + @pytest.mark.anyio + async def test_get_completion_data(self, journal_setup): + j, _ = journal_setup + j._total_tokens = 100 + j._msg_count = 5 + data = j.get_completion_data() + assert data["total_tokens"] == 100 + assert data["message_count"] == 5 + + +class TestUnknownCallerTokens: + @pytest.mark.anyio + async def test_unknown_caller_tokens_go_to_lead(self, journal_setup): + j, store = journal_setup + j.on_llm_end( + _make_llm_response("X", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}), + run_id=uuid4(), + tags=[], + ) + assert j._lead_agent_tokens == 15 + + +# --------------------------------------------------------------------------- +# SQLite-backed end-to-end test +# --------------------------------------------------------------------------- + + +class TestDbBackedLifecycle: + @pytest.mark.anyio + async def test_full_lifecycle_with_sqlite(self, tmp_path): + """Full lifecycle with SQLite-backed RunRepository + DbRunEventStore.""" + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.persistence.run import RunRepository + from deerflow.runtime.events.store.db import DbRunEventStore + from deerflow.runtime.runs.manager import RunManager + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + sf = get_session_factory() + + run_store = RunRepository(sf) + event_store = DbRunEventStore(sf) + mgr = RunManager(store=run_store) + + # Create run + record = await mgr.create("t1", "lead_agent") + run_id = record.run_id + + # Write human_message (checkpoint-aligned format) + from langchain_core.messages import HumanMessage + + human_msg = HumanMessage(content="Hello DB") + await event_store.put(thread_id="t1", run_id=run_id, event_type="human_message", category="message", content=human_msg.model_dump()) + + # Simulate journal + journal = RunJournal(run_id, "t1", event_store, flush_threshold=100) + journal.set_first_human_message("Hello DB") + + journal.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None) + llm_rid = uuid4() + journal.on_llm_start({"name": "test"}, [], run_id=llm_rid, tags=["lead_agent"]) + journal.on_llm_end( + _make_llm_response("DB response", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}), + run_id=llm_rid, + tags=["lead_agent"], + ) + journal.on_chain_end({}, run_id=uuid4(), parent_run_id=None) + await asyncio.sleep(0.05) + await journal.flush() + + # Verify run persisted + row = await run_store.get(run_id) + assert row is not None + assert row["status"] == "pending" + + # Update completion + completion = journal.get_completion_data() + await run_store.update_run_completion(run_id, status="success", **completion) + row = await run_store.get(run_id) + assert row["status"] == "success" + assert row["total_tokens"] == 15 + + # Verify messages from DB (checkpoint-aligned format) + messages = await event_store.list_messages("t1") + assert len(messages) == 2 + assert messages[0]["event_type"] == "human_message" + assert messages[0]["content"]["type"] == "human" + assert messages[1]["event_type"] == "ai_message" + assert messages[1]["content"]["type"] == "ai" + assert messages[1]["content"]["content"] == "DB response" + + # Verify events from DB + events = await event_store.list_events("t1", run_id) + event_types = {e["event_type"] for e in events} + assert "run_start" in event_types + assert "llm_response" in event_types + assert "run_end" in event_types + + await close_engine() + + +class TestDictContentFlag: + """Verify that content_is_dict metadata flag controls deserialization.""" + + @pytest.mark.anyio + async def test_db_store_str_starting_with_brace_not_deserialized(self, tmp_path): + """Plain string content starting with { should NOT be deserialized.""" + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + sf = get_session_factory() + store = DbRunEventStore(sf) + + await store.put( + thread_id="t1", + run_id="r1", + event_type="tool_end", + category="trace", + content="{not json, just a string}", + ) + events = await store.list_events("t1", "r1") + assert events[0]["content"] == "{not json, just a string}" + assert isinstance(events[0]["content"], str) + + await close_engine() + + @pytest.mark.anyio + async def test_db_store_str_starting_with_bracket_not_deserialized(self, tmp_path): + """Plain string content like '[1, 2, 3]' should NOT be deserialized.""" + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + sf = get_session_factory() + store = DbRunEventStore(sf) + + await store.put( + thread_id="t1", + run_id="r1", + event_type="tool_end", + category="trace", + content="[1, 2, 3]", + ) + events = await store.list_events("t1", "r1") + assert events[0]["content"] == "[1, 2, 3]" + assert isinstance(events[0]["content"], str) + + await close_engine() + + +class TestDictContent: + """Verify that store backends accept str | dict content.""" + + @pytest.mark.anyio + async def test_memory_store_dict_content(self): + store = MemoryRunEventStore() + record = await store.put( + thread_id="t1", + run_id="r1", + event_type="ai_message", + category="message", + content={"role": "assistant", "content": "Hello"}, + ) + assert record["content"] == {"role": "assistant", "content": "Hello"} + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["content"] == {"role": "assistant", "content": "Hello"} + + @pytest.mark.anyio + async def test_memory_store_str_content_unchanged(self): + store = MemoryRunEventStore() + record = await store.put( + thread_id="t1", + run_id="r1", + event_type="ai_message", + category="message", + content="plain string", + ) + assert record["content"] == "plain string" + assert isinstance(record["content"], str) + + @pytest.mark.anyio + async def test_db_store_dict_content_roundtrip(self, tmp_path): + """Dict content survives DB roundtrip (JSON serialize on write, deserialize on read).""" + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + sf = get_session_factory() + store = DbRunEventStore(sf) + + nested = {"role": "assistant", "content": "Hi", "metadata": {"model": "gpt-4", "tokens": [1, 2, 3]}} + record = await store.put( + thread_id="t1", + run_id="r1", + event_type="ai_message", + category="message", + content=nested, + ) + assert record["content"] == nested + + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["content"] == nested + + await close_engine() + + @pytest.mark.anyio + async def test_db_store_trace_dict_truncation(self, tmp_path): + """Large dict trace content is truncated with metadata flag.""" + from deerflow.persistence.engine import close_engine, get_session_factory, init_engine + from deerflow.runtime.events.store.db import DbRunEventStore + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + sf = get_session_factory() + store = DbRunEventStore(sf, max_trace_content=100) + + large_dict = {"role": "assistant", "content": "x" * 200} + record = await store.put( + thread_id="t1", + run_id="r1", + event_type="llm_end", + category="trace", + content=large_dict, + ) + assert record["metadata"].get("content_truncated") is True + # Content should be a truncated string (serialized JSON was too long) + assert isinstance(record["content"], str) + assert len(record["content"]) <= 100 + + await close_engine() + + +class TestCheckpointAlignedHumanMessage: + @pytest.mark.anyio + async def test_human_message_checkpoint_format(self): + """human_message content uses model_dump() checkpoint format.""" + from langchain_core.messages import HumanMessage + + store = MemoryRunEventStore() + human_msg = HumanMessage(content="What is AI?") + await store.put( + thread_id="t1", + run_id="r1", + event_type="human_message", + category="message", + content=human_msg.model_dump(), + metadata={"message_id": "msg_001"}, + ) + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["content"]["type"] == "human" + assert messages[0]["content"]["content"] == "What is AI?" + + +class TestCheckpointAlignedMessageFormat: + @pytest.mark.anyio + async def test_ai_message_checkpoint_format(self, journal_setup): + """ai_message content should be checkpoint-aligned model_dump dict.""" + j, store = journal_setup + j.on_llm_end(_make_llm_response("Answer"), run_id=uuid4(), tags=["lead_agent"]) + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["content"]["type"] == "ai" + assert messages[0]["content"]["content"] == "Answer" + assert "response_metadata" in messages[0]["content"] + assert "additional_kwargs" in messages[0]["content"] + + @pytest.mark.anyio + async def test_ai_tool_call_event(self, journal_setup): + """LLM response with tool_calls should produce ai_tool_call with model_dump content.""" + j, store = journal_setup + tool_calls = [{"id": "call_1", "name": "search", "args": {"query": "test"}}] + j.on_llm_end( + _make_llm_response("Let me search", tool_calls=tool_calls), + run_id=uuid4(), + tags=["lead_agent"], + ) + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["event_type"] == "ai_tool_call" + assert messages[0]["content"]["type"] == "ai" + assert messages[0]["content"]["content"] == "Let me search" + assert len(messages[0]["content"]["tool_calls"]) == 1 + tc = messages[0]["content"]["tool_calls"][0] + assert tc["id"] == "call_1" + assert tc["name"] == "search" + + @pytest.mark.anyio + async def test_ai_tool_call_only_from_lead_agent(self, journal_setup): + """ai_tool_call should only be emitted for lead_agent, not subagents.""" + j, store = journal_setup + tool_calls = [{"id": "call_1", "name": "search", "args": {}}] + j.on_llm_end( + _make_llm_response("searching", tool_calls=tool_calls), + run_id=uuid4(), + tags=["subagent:research"], + ) + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 0 + + +class TestToolResultMessage: + @pytest.mark.anyio + async def test_tool_end_produces_tool_result_message(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_tool_start({"name": "web_search"}, '{"query": "test"}', run_id=run_id, tool_call_id="call_abc") + j.on_tool_end("search results here", run_id=run_id, name="web_search", tool_call_id="call_abc") + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["event_type"] == "tool_result" + # Content is checkpoint-aligned model_dump format + assert messages[0]["content"]["type"] == "tool" + assert messages[0]["content"]["tool_call_id"] == "call_abc" + assert messages[0]["content"]["content"] == "search results here" + assert messages[0]["content"]["name"] == "web_search" + + @pytest.mark.anyio + async def test_tool_result_missing_tool_call_id(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_tool_start({"name": "bash"}, "ls", run_id=run_id) + j.on_tool_end("file1.txt", run_id=run_id, name="bash") + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["content"]["type"] == "tool" + + @pytest.mark.anyio + async def test_tool_end_extracts_from_tool_message_object(self, journal_setup): + """When LangChain passes a ToolMessage object as output, extract fields from it.""" + from langchain_core.messages import ToolMessage + + j, store = journal_setup + run_id = uuid4() + tool_msg = ToolMessage( + content="search results", + tool_call_id="call_from_obj", + name="web_search", + status="success", + ) + j.on_tool_end(tool_msg, run_id=run_id) + await j.flush() + + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["content"]["type"] == "tool" + assert messages[0]["content"]["tool_call_id"] == "call_from_obj" + assert messages[0]["content"]["content"] == "search results" + assert messages[0]["content"]["name"] == "web_search" + assert messages[0]["metadata"]["tool_name"] == "web_search" + assert messages[0]["metadata"]["status"] == "success" + + events = await store.list_events("t1", "r1") + tool_end = [e for e in events if e["event_type"] == "tool_end"][0] + assert tool_end["metadata"]["tool_call_id"] == "call_from_obj" + assert tool_end["metadata"]["tool_name"] == "web_search" + + @pytest.mark.anyio + async def test_tool_message_object_overrides_kwargs(self, journal_setup): + """ToolMessage object fields take priority over kwargs.""" + from langchain_core.messages import ToolMessage + + j, store = journal_setup + run_id = uuid4() + tool_msg = ToolMessage( + content="result", + tool_call_id="call_obj", + name="tool_a", + status="success", + ) + # Pass different values in kwargs — ToolMessage should win + j.on_tool_end(tool_msg, run_id=run_id, name="tool_b", tool_call_id="call_kwarg") + await j.flush() + + messages = await store.list_messages("t1") + assert messages[0]["content"]["tool_call_id"] == "call_obj" + assert messages[0]["content"]["name"] == "tool_a" + assert messages[0]["metadata"]["tool_name"] == "tool_a" + + @pytest.mark.anyio + async def test_tool_message_error_status(self, journal_setup): + """ToolMessage with status='error' propagates status to metadata.""" + from langchain_core.messages import ToolMessage + + j, store = journal_setup + run_id = uuid4() + tool_msg = ToolMessage( + content="something went wrong", + tool_call_id="call_err", + name="web_fetch", + status="error", + ) + j.on_tool_end(tool_msg, run_id=run_id) + await j.flush() + + events = await store.list_events("t1", "r1") + tool_end = [e for e in events if e["event_type"] == "tool_end"][0] + assert tool_end["metadata"]["status"] == "error" + + messages = await store.list_messages("t1") + assert messages[0]["content"]["status"] == "error" + assert messages[0]["metadata"]["status"] == "error" + + @pytest.mark.anyio + async def test_tool_message_fallback_to_cache(self, journal_setup): + """If ToolMessage has empty tool_call_id, fall back to cache from on_tool_start.""" + from langchain_core.messages import ToolMessage + + j, store = journal_setup + run_id = uuid4() + j.on_tool_start({"name": "bash"}, "ls", run_id=run_id, tool_call_id="call_cached") + tool_msg = ToolMessage( + content="file list", + tool_call_id="", + name="bash", + ) + j.on_tool_end(tool_msg, run_id=run_id) + await j.flush() + + messages = await store.list_messages("t1") + assert messages[0]["content"]["tool_call_id"] == "call_cached" + + @pytest.mark.anyio + async def test_tool_error_produces_tool_result_message(self, journal_setup): + j, store = journal_setup + j.on_tool_error(TimeoutError("timeout"), run_id=uuid4(), name="web_fetch", tool_call_id="call_1") + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["event_type"] == "tool_result" + assert messages[0]["content"]["type"] == "tool" + assert messages[0]["content"]["tool_call_id"] == "call_1" + assert "timeout" in messages[0]["content"]["content"] + assert messages[0]["content"]["status"] == "error" + assert messages[0]["metadata"]["status"] == "error" + + @pytest.mark.anyio + async def test_tool_error_uses_cached_tool_call_id(self, journal_setup): + """on_tool_error should fall back to cached tool_call_id from on_tool_start.""" + j, store = journal_setup + run_id = uuid4() + j.on_tool_start({"name": "web_fetch"}, "url", run_id=run_id, tool_call_id="call_cached") + j.on_tool_error(TimeoutError("timeout"), run_id=run_id, name="web_fetch") + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 1 + assert messages[0]["content"]["tool_call_id"] == "call_cached" + + +def _make_base_messages(): + """Create mock LangChain BaseMessages for on_chat_model_start.""" + sys_msg = MagicMock() + sys_msg.content = "You are helpful." + sys_msg.type = "system" + sys_msg.tool_calls = [] + sys_msg.tool_call_id = None + + user_msg = MagicMock() + user_msg.content = "Hello" + user_msg.type = "human" + user_msg.tool_calls = [] + user_msg.tool_call_id = None + + return [sys_msg, user_msg] + + +class TestLlmRequestResponse: + @pytest.mark.anyio + async def test_llm_request_event(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + messages = _make_base_messages() + j.on_chat_model_start({"name": "gpt-4o"}, [messages], run_id=run_id, tags=["lead_agent"]) + await j.flush() + events = await store.list_events("t1", "r1") + req_events = [e for e in events if e["event_type"] == "llm_request"] + assert len(req_events) == 1 + content = req_events[0]["content"] + assert content["model"] == "gpt-4o" + assert len(content["messages"]) == 2 + assert content["messages"][0]["role"] == "system" + assert content["messages"][1]["role"] == "user" + + @pytest.mark.anyio + async def test_llm_response_event(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_llm_start({}, [], run_id=run_id, tags=["lead_agent"]) + j.on_llm_end( + _make_llm_response("Answer", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}), + run_id=run_id, + tags=["lead_agent"], + ) + await j.flush() + events = await store.list_events("t1", "r1") + assert not any(e["event_type"] == "llm_end" for e in events) + resp_events = [e for e in events if e["event_type"] == "llm_response"] + assert len(resp_events) == 1 + content = resp_events[0]["content"] + assert "choices" in content + assert content["choices"][0]["message"]["role"] == "assistant" + assert content["choices"][0]["message"]["content"] == "Answer" + assert content["usage"]["prompt_tokens"] == 10 + + @pytest.mark.anyio + async def test_llm_request_response_paired(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + messages = _make_base_messages() + j.on_chat_model_start({"name": "gpt-4o"}, [messages], run_id=run_id, tags=["lead_agent"]) + j.on_llm_end( + _make_llm_response("Hi", usage={"input_tokens": 10, "output_tokens": 5, "total_tokens": 15}), + run_id=run_id, + tags=["lead_agent"], + ) + await j.flush() + events = await store.list_events("t1", "r1") + req = [e for e in events if e["event_type"] == "llm_request"][0] + resp = [e for e in events if e["event_type"] == "llm_response"][0] + assert req["metadata"]["llm_call_index"] == resp["metadata"]["llm_call_index"] + + @pytest.mark.anyio + async def test_no_llm_start_event(self, journal_setup): + j, store = journal_setup + run_id = uuid4() + j.on_llm_start({"name": "test"}, [], run_id=run_id, tags=["lead_agent"]) + await j.flush() + events = await store.list_events("t1", "r1") + assert not any(e["event_type"] == "llm_start" for e in events) + + +class TestMiddlewareEvents: + @pytest.mark.anyio + async def test_record_middleware_uses_middleware_category(self, journal_setup): + j, store = journal_setup + j.record_middleware( + "title", + name="TitleMiddleware", + hook="after_model", + action="generate_title", + changes={"title": "Test Title", "thread_id": "t1"}, + ) + await j.flush() + events = await store.list_events("t1", "r1") + mw_events = [e for e in events if e["event_type"] == "middleware:title"] + assert len(mw_events) == 1 + assert mw_events[0]["category"] == "middleware" + assert mw_events[0]["content"]["name"] == "TitleMiddleware" + assert mw_events[0]["content"]["hook"] == "after_model" + assert mw_events[0]["content"]["action"] == "generate_title" + assert mw_events[0]["content"]["changes"]["title"] == "Test Title" + + @pytest.mark.anyio + async def test_middleware_events_not_in_messages(self, journal_setup): + """Middleware events should not appear in list_messages().""" + j, store = journal_setup + j.record_middleware( + "title", + name="TitleMiddleware", + hook="after_model", + action="generate_title", + changes={"title": "Test"}, + ) + await j.flush() + messages = await store.list_messages("t1") + assert len(messages) == 0 + + @pytest.mark.anyio + async def test_middleware_tag_variants(self, journal_setup): + """Different middleware tags produce distinct event_types.""" + j, store = journal_setup + j.record_middleware("title", name="TitleMiddleware", hook="after_model", action="generate_title", changes={}) + j.record_middleware("guardrail", name="GuardrailMiddleware", hook="before_tool", action="deny", changes={}) + await j.flush() + events = await store.list_events("t1", "r1") + event_types = {e["event_type"] for e in events} + assert "middleware:title" in event_types + assert "middleware:guardrail" in event_types + + +class TestFullRunSequence: + @pytest.mark.anyio + async def test_complete_run_event_sequence(self): + """Simulate a full run: user -> LLM -> tool_call -> tool_result -> LLM -> final reply. + + All message events use checkpoint-aligned model_dump format. + """ + from langchain_core.messages import HumanMessage + + store = MemoryRunEventStore() + j = RunJournal("r1", "t1", store, flush_threshold=100) + + # 1. Human message (written by worker, using model_dump format) + human_msg = HumanMessage(content="Search for quantum computing") + await store.put( + thread_id="t1", + run_id="r1", + event_type="human_message", + category="message", + content=human_msg.model_dump(), + ) + j.set_first_human_message("Search for quantum computing") + + # 2. Run start + j.on_chain_start({}, {}, run_id=uuid4(), parent_run_id=None) + + # 3. First LLM call -> tool_calls + llm1_id = uuid4() + sys_msg = MagicMock(content="You are helpful.", type="system", tool_calls=[], tool_call_id=None) + user_msg = MagicMock(content="Search for quantum computing", type="human", tool_calls=[], tool_call_id=None) + j.on_chat_model_start({"name": "gpt-4o"}, [[sys_msg, user_msg]], run_id=llm1_id, tags=["lead_agent"]) + j.on_llm_end( + _make_llm_response( + "Let me search", + tool_calls=[{"id": "call_1", "name": "web_search", "args": {"query": "quantum computing"}}], + usage={"input_tokens": 100, "output_tokens": 20, "total_tokens": 120}, + ), + run_id=llm1_id, + tags=["lead_agent"], + ) + + # 4. Tool execution + tool_id = uuid4() + j.on_tool_start({"name": "web_search"}, '{"query": "quantum computing"}', run_id=tool_id, tool_call_id="call_1") + j.on_tool_end("Quantum computing results...", run_id=tool_id, name="web_search", tool_call_id="call_1") + + # 5. Middleware: title generation + j.record_middleware("title", name="TitleMiddleware", hook="after_model", action="generate_title", changes={"title": "Quantum Computing"}) + + # 6. Second LLM call -> final reply + llm2_id = uuid4() + j.on_chat_model_start({"name": "gpt-4o"}, [[sys_msg, user_msg]], run_id=llm2_id, tags=["lead_agent"]) + j.on_llm_end( + _make_llm_response( + "Here are the results about quantum computing...", + usage={"input_tokens": 200, "output_tokens": 100, "total_tokens": 300}, + ), + run_id=llm2_id, + tags=["lead_agent"], + ) + + # 7. Run end + j.on_chain_end({}, run_id=uuid4(), parent_run_id=None) + await asyncio.sleep(0.05) + await j.flush() + + # Verify message sequence + messages = await store.list_messages("t1") + msg_types = [m["event_type"] for m in messages] + assert msg_types == ["human_message", "ai_tool_call", "tool_result", "ai_message"] + + # Verify checkpoint-aligned format: all messages use "type" not "role" + assert messages[0]["content"]["type"] == "human" + assert messages[0]["content"]["content"] == "Search for quantum computing" + assert messages[1]["content"]["type"] == "ai" + assert "tool_calls" in messages[1]["content"] + assert messages[2]["content"]["type"] == "tool" + assert messages[2]["content"]["tool_call_id"] == "call_1" + assert messages[3]["content"]["type"] == "ai" + assert messages[3]["content"]["content"] == "Here are the results about quantum computing..." + + # Verify trace events + events = await store.list_events("t1", "r1") + trace_types = [e["event_type"] for e in events if e["category"] == "trace"] + assert "llm_request" in trace_types + assert "llm_response" in trace_types + assert "tool_start" in trace_types + assert "tool_end" in trace_types + assert "llm_start" not in trace_types + assert "llm_end" not in trace_types + + # Verify middleware events are in their own category + mw_events = [e for e in events if e["category"] == "middleware"] + assert len(mw_events) == 1 + assert mw_events[0]["event_type"] == "middleware:title" + + # Verify token accumulation + data = j.get_completion_data() + assert data["total_tokens"] == 420 # 120 + 300 + assert data["llm_call_count"] == 2 + assert data["lead_agent_tokens"] == 420 + assert data["message_count"] == 1 # only final ai_message counts + assert data["last_ai_message"] == "Here are the results about quantum computing..." + + # Verify all message contents are checkpoint-aligned dicts with "type" field + for m in messages: + assert isinstance(m["content"], dict) + assert "type" in m["content"] diff --git a/backend/tests/test_run_repository.py b/backend/tests/test_run_repository.py new file mode 100644 index 000000000..0a3ddc7dc --- /dev/null +++ b/backend/tests/test_run_repository.py @@ -0,0 +1,196 @@ +"""Tests for RunRepository (SQLAlchemy-backed RunStore). + +Uses a temp SQLite DB to test ORM-backed CRUD operations. +""" + +import pytest + +from deerflow.persistence.run import RunRepository + + +async def _make_repo(tmp_path): + from deerflow.persistence.engine import get_session_factory, init_engine + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + return RunRepository(get_session_factory()) + + +async def _cleanup(): + from deerflow.persistence.engine import close_engine + + await close_engine() + + +class TestRunRepository: + @pytest.mark.anyio + async def test_put_and_get(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", status="pending") + row = await repo.get("r1") + assert row is not None + assert row["run_id"] == "r1" + assert row["thread_id"] == "t1" + assert row["status"] == "pending" + await _cleanup() + + @pytest.mark.anyio + async def test_get_missing_returns_none(self, tmp_path): + repo = await _make_repo(tmp_path) + assert await repo.get("nope") is None + await _cleanup() + + @pytest.mark.anyio + async def test_update_status(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1") + await repo.update_status("r1", "running") + row = await repo.get("r1") + assert row["status"] == "running" + await _cleanup() + + @pytest.mark.anyio + async def test_update_status_with_error(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1") + await repo.update_status("r1", "error", error="boom") + row = await repo.get("r1") + assert row["status"] == "error" + assert row["error"] == "boom" + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_thread(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1") + await repo.put("r2", thread_id="t1") + await repo.put("r3", thread_id="t2") + rows = await repo.list_by_thread("t1") + assert len(rows) == 2 + assert all(r["thread_id"] == "t1" for r in rows) + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_thread_owner_filter(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", owner_id="alice") + await repo.put("r2", thread_id="t1", owner_id="bob") + rows = await repo.list_by_thread("t1", owner_id="alice") + assert len(rows) == 1 + assert rows[0]["owner_id"] == "alice" + await _cleanup() + + @pytest.mark.anyio + async def test_delete(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1") + await repo.delete("r1") + assert await repo.get("r1") is None + await _cleanup() + + @pytest.mark.anyio + async def test_delete_nonexistent_is_noop(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.delete("nope") # should not raise + await _cleanup() + + @pytest.mark.anyio + async def test_list_pending(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", status="pending") + await repo.put("r2", thread_id="t1", status="running") + await repo.put("r3", thread_id="t2", status="pending") + pending = await repo.list_pending() + assert len(pending) == 2 + assert all(r["status"] == "pending" for r in pending) + await _cleanup() + + @pytest.mark.anyio + async def test_update_run_completion(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", status="running") + await repo.update_run_completion( + "r1", + status="success", + total_input_tokens=100, + total_output_tokens=50, + total_tokens=150, + llm_call_count=2, + lead_agent_tokens=120, + subagent_tokens=20, + middleware_tokens=10, + message_count=3, + last_ai_message="The answer is 42", + first_human_message="What is the meaning?", + ) + row = await repo.get("r1") + assert row["status"] == "success" + assert row["total_tokens"] == 150 + assert row["llm_call_count"] == 2 + assert row["lead_agent_tokens"] == 120 + assert row["message_count"] == 3 + assert row["last_ai_message"] == "The answer is 42" + assert row["first_human_message"] == "What is the meaning?" + await _cleanup() + + @pytest.mark.anyio + async def test_metadata_preserved(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", metadata={"key": "value"}) + row = await repo.get("r1") + assert row["metadata"] == {"key": "value"} + await _cleanup() + + @pytest.mark.anyio + async def test_kwargs_with_non_serializable(self, tmp_path): + """kwargs containing non-JSON-serializable objects should be safely handled.""" + repo = await _make_repo(tmp_path) + + class Dummy: + pass + + await repo.put("r1", thread_id="t1", kwargs={"obj": Dummy()}) + row = await repo.get("r1") + assert "obj" in row["kwargs"] + await _cleanup() + + @pytest.mark.anyio + async def test_update_run_completion_preserves_existing_fields(self, tmp_path): + """update_run_completion does not overwrite thread_id or assistant_id.""" + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", assistant_id="agent1", status="running") + await repo.update_run_completion("r1", status="success", total_tokens=100) + row = await repo.get("r1") + assert row["thread_id"] == "t1" + assert row["assistant_id"] == "agent1" + assert row["total_tokens"] == 100 + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_thread_ordered_desc(self, tmp_path): + """list_by_thread returns newest first.""" + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", created_at="2024-01-01T00:00:00+00:00") + await repo.put("r2", thread_id="t1", created_at="2024-01-02T00:00:00+00:00") + rows = await repo.list_by_thread("t1") + assert rows[0]["run_id"] == "r2" + assert rows[1]["run_id"] == "r1" + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_thread_limit(self, tmp_path): + repo = await _make_repo(tmp_path) + for i in range(5): + await repo.put(f"r{i}", thread_id="t1") + rows = await repo.list_by_thread("t1", limit=2) + assert len(rows) == 2 + await _cleanup() + + @pytest.mark.anyio + async def test_owner_none_returns_all(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.put("r1", thread_id="t1", owner_id="alice") + await repo.put("r2", thread_id="t1", owner_id="bob") + rows = await repo.list_by_thread("t1", owner_id=None) + assert len(rows) == 2 + await _cleanup() diff --git a/backend/tests/test_thread_meta_repo.py b/backend/tests/test_thread_meta_repo.py new file mode 100644 index 000000000..dbb747a26 --- /dev/null +++ b/backend/tests/test_thread_meta_repo.py @@ -0,0 +1,157 @@ +"""Tests for ThreadMetaRepository (SQLAlchemy-backed).""" + +import pytest + +from deerflow.persistence.thread_meta import ThreadMetaRepository + + +async def _make_repo(tmp_path): + from deerflow.persistence.engine import get_session_factory, init_engine + + url = f"sqlite+aiosqlite:///{tmp_path / 'test.db'}" + await init_engine("sqlite", url=url, sqlite_dir=str(tmp_path)) + return ThreadMetaRepository(get_session_factory()) + + +async def _cleanup(): + from deerflow.persistence.engine import close_engine + + await close_engine() + + +class TestThreadMetaRepository: + @pytest.mark.anyio + async def test_create_and_get(self, tmp_path): + repo = await _make_repo(tmp_path) + record = await repo.create("t1") + assert record["thread_id"] == "t1" + assert record["status"] == "idle" + assert "created_at" in record + + fetched = await repo.get("t1") + assert fetched is not None + assert fetched["thread_id"] == "t1" + await _cleanup() + + @pytest.mark.anyio + async def test_create_with_assistant_id(self, tmp_path): + repo = await _make_repo(tmp_path) + record = await repo.create("t1", assistant_id="agent1") + assert record["assistant_id"] == "agent1" + await _cleanup() + + @pytest.mark.anyio + async def test_create_with_owner_and_display_name(self, tmp_path): + repo = await _make_repo(tmp_path) + record = await repo.create("t1", owner_id="user1", display_name="My Thread") + assert record["owner_id"] == "user1" + assert record["display_name"] == "My Thread" + await _cleanup() + + @pytest.mark.anyio + async def test_create_with_metadata(self, tmp_path): + repo = await _make_repo(tmp_path) + record = await repo.create("t1", metadata={"key": "value"}) + assert record["metadata"] == {"key": "value"} + await _cleanup() + + @pytest.mark.anyio + async def test_get_nonexistent(self, tmp_path): + repo = await _make_repo(tmp_path) + assert await repo.get("nonexistent") is None + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_owner(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1", owner_id="user1") + await repo.create("t2", owner_id="user1") + await repo.create("t3", owner_id="user2") + results = await repo.list_by_owner("user1") + assert len(results) == 2 + assert all(r["owner_id"] == "user1" for r in results) + await _cleanup() + + @pytest.mark.anyio + async def test_list_by_owner_with_limit_and_offset(self, tmp_path): + repo = await _make_repo(tmp_path) + for i in range(5): + await repo.create(f"t{i}", owner_id="user1") + results = await repo.list_by_owner("user1", limit=2, offset=1) + assert len(results) == 2 + await _cleanup() + + @pytest.mark.anyio + async def test_check_access_no_record_allows(self, tmp_path): + repo = await _make_repo(tmp_path) + assert await repo.check_access("unknown", "user1") is True + await _cleanup() + + @pytest.mark.anyio + async def test_check_access_owner_matches(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1", owner_id="user1") + assert await repo.check_access("t1", "user1") is True + await _cleanup() + + @pytest.mark.anyio + async def test_check_access_owner_mismatch(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1", owner_id="user1") + assert await repo.check_access("t1", "user2") is False + await _cleanup() + + @pytest.mark.anyio + async def test_check_access_no_owner_allows_all(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1") # owner_id=None + assert await repo.check_access("t1", "anyone") is True + await _cleanup() + + @pytest.mark.anyio + async def test_update_status(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1") + await repo.update_status("t1", "busy") + record = await repo.get("t1") + assert record["status"] == "busy" + await _cleanup() + + @pytest.mark.anyio + async def test_delete(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1") + await repo.delete("t1") + assert await repo.get("t1") is None + await _cleanup() + + @pytest.mark.anyio + async def test_delete_nonexistent_is_noop(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.delete("nonexistent") # should not raise + await _cleanup() + + @pytest.mark.anyio + async def test_update_metadata_merges(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1", metadata={"a": 1, "b": 2}) + await repo.update_metadata("t1", {"b": 99, "c": 3}) + record = await repo.get("t1") + # Existing key preserved, overlapping key overwritten, new key added + assert record["metadata"] == {"a": 1, "b": 99, "c": 3} + await _cleanup() + + @pytest.mark.anyio + async def test_update_metadata_on_empty(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.create("t1") + await repo.update_metadata("t1", {"k": "v"}) + record = await repo.get("t1") + assert record["metadata"] == {"k": "v"} + await _cleanup() + + @pytest.mark.anyio + async def test_update_metadata_nonexistent_is_noop(self, tmp_path): + repo = await _make_repo(tmp_path) + await repo.update_metadata("nonexistent", {"k": "v"}) # should not raise + await _cleanup() diff --git a/backend/uv.lock b/backend/uv.lock index 92a20393e..d6145bef9 100644 --- a/backend/uv.lock +++ b/backend/uv.lock @@ -158,6 +158,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" }, ] +[[package]] +name = "alembic" +version = "1.18.4" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "mako" }, + { name = "sqlalchemy" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/94/13/8b084e0f2efb0275a1d534838844926f798bd766566b1375174e2448cd31/alembic-1.18.4.tar.gz", hash = "sha256:cb6e1fd84b6174ab8dbb2329f86d631ba9559dd78df550b57804d607672cedbc", size = 2056725, upload-time = "2026-02-10T16:00:47.195Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/29/6533c317b74f707ea28f8d633734dbda2119bbadfc61b2f3640ba835d0f7/alembic-1.18.4-py3-none-any.whl", hash = "sha256:a5ed4adcf6d8a4cb575f3d759f071b03cd6e5c7618eb796cb52497be25bfe19a", size = 263893, upload-time = "2026-02-10T16:00:49.997Z" }, +] + [[package]] name = "annotated-doc" version = "0.0.4" @@ -208,6 +222,46 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" }, ] +[[package]] +name = "asyncpg" +version = "0.31.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fe/cc/d18065ce2380d80b1bcce927c24a2642efd38918e33fd724bc4bca904877/asyncpg-0.31.0.tar.gz", hash = "sha256:c989386c83940bfbd787180f2b1519415e2d3d6277a70d9d0f0145ac73500735", size = 993667, upload-time = "2025-11-24T23:27:00.812Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/a6/59d0a146e61d20e18db7396583242e32e0f120693b67a8de43f1557033e2/asyncpg-0.31.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b44c31e1efc1c15188ef183f287c728e2046abb1d26af4d20858215d50d91fad", size = 662042, upload-time = "2025-11-24T23:25:49.578Z" }, + { url = "https://files.pythonhosted.org/packages/36/01/ffaa189dcb63a2471720615e60185c3f6327716fdc0fc04334436fbb7c65/asyncpg-0.31.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:0c89ccf741c067614c9b5fc7f1fc6f3b61ab05ae4aaa966e6fd6b93097c7d20d", size = 638504, upload-time = "2025-11-24T23:25:51.501Z" }, + { url = "https://files.pythonhosted.org/packages/9f/62/3f699ba45d8bd24c5d65392190d19656d74ff0185f42e19d0bbd973bb371/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:12b3b2e39dc5470abd5e98c8d3373e4b1d1234d9fbdedf538798b2c13c64460a", size = 3426241, upload-time = "2025-11-24T23:25:53.278Z" }, + { url = "https://files.pythonhosted.org/packages/8c/d1/a867c2150f9c6e7af6462637f613ba67f78a314b00db220cd26ff559d532/asyncpg-0.31.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:aad7a33913fb8bcb5454313377cc330fbb19a0cd5faa7272407d8a0c4257b671", size = 3520321, upload-time = "2025-11-24T23:25:54.982Z" }, + { url = "https://files.pythonhosted.org/packages/7a/1a/cce4c3f246805ecd285a3591222a2611141f1669d002163abef999b60f98/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3df118d94f46d85b2e434fd62c84cb66d5834d5a890725fe625f498e72e4d5ec", size = 3316685, upload-time = "2025-11-24T23:25:57.43Z" }, + { url = "https://files.pythonhosted.org/packages/40/ae/0fc961179e78cc579e138fad6eb580448ecae64908f95b8cb8ee2f241f67/asyncpg-0.31.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bd5b6efff3c17c3202d4b37189969acf8927438a238c6257f66be3c426beba20", size = 3471858, upload-time = "2025-11-24T23:25:59.636Z" }, + { url = "https://files.pythonhosted.org/packages/52/b2/b20e09670be031afa4cbfabd645caece7f85ec62d69c312239de568e058e/asyncpg-0.31.0-cp312-cp312-win32.whl", hash = "sha256:027eaa61361ec735926566f995d959ade4796f6a49d3bde17e5134b9964f9ba8", size = 527852, upload-time = "2025-11-24T23:26:01.084Z" }, + { url = "https://files.pythonhosted.org/packages/b5/f0/f2ed1de154e15b107dc692262395b3c17fc34eafe2a78fc2115931561730/asyncpg-0.31.0-cp312-cp312-win_amd64.whl", hash = "sha256:72d6bdcbc93d608a1158f17932de2321f68b1a967a13e014998db87a72ed3186", size = 597175, upload-time = "2025-11-24T23:26:02.564Z" }, + { url = "https://files.pythonhosted.org/packages/95/11/97b5c2af72a5d0b9bc3fa30cd4b9ce22284a9a943a150fdc768763caf035/asyncpg-0.31.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c204fab1b91e08b0f47e90a75d1b3c62174dab21f670ad6c5d0f243a228f015b", size = 661111, upload-time = "2025-11-24T23:26:04.467Z" }, + { url = "https://files.pythonhosted.org/packages/1b/71/157d611c791a5e2d0423f09f027bd499935f0906e0c2a416ce712ba51ef3/asyncpg-0.31.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:54a64f91839ba59008eccf7aad2e93d6e3de688d796f35803235ea1c4898ae1e", size = 636928, upload-time = "2025-11-24T23:26:05.944Z" }, + { url = "https://files.pythonhosted.org/packages/2e/fc/9e3486fb2bbe69d4a867c0b76d68542650a7ff1574ca40e84c3111bb0c6e/asyncpg-0.31.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c0e0822b1038dc7253b337b0f3f676cadc4ac31b126c5d42691c39691962e403", size = 3424067, upload-time = "2025-11-24T23:26:07.957Z" }, + { url = "https://files.pythonhosted.org/packages/12/c6/8c9d076f73f07f995013c791e018a1cd5f31823c2a3187fc8581706aa00f/asyncpg-0.31.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bef056aa502ee34204c161c72ca1f3c274917596877f825968368b2c33f585f4", size = 3518156, upload-time = "2025-11-24T23:26:09.591Z" }, + { url = "https://files.pythonhosted.org/packages/ae/3b/60683a0baf50fbc546499cfb53132cb6835b92b529a05f6a81471ab60d0c/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0bfbcc5b7ffcd9b75ab1558f00db2ae07db9c80637ad1b2469c43df79d7a5ae2", size = 3319636, upload-time = "2025-11-24T23:26:11.168Z" }, + { url = "https://files.pythonhosted.org/packages/50/dc/8487df0f69bd398a61e1792b3cba0e47477f214eff085ba0efa7eac9ce87/asyncpg-0.31.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:22bc525ebbdc24d1261ecbf6f504998244d4e3be1721784b5f64664d61fbe602", size = 3472079, upload-time = "2025-11-24T23:26:13.164Z" }, + { url = "https://files.pythonhosted.org/packages/13/a1/c5bbeeb8531c05c89135cb8b28575ac2fac618bcb60119ee9696c3faf71c/asyncpg-0.31.0-cp313-cp313-win32.whl", hash = "sha256:f890de5e1e4f7e14023619399a471ce4b71f5418cd67a51853b9910fdfa73696", size = 527606, upload-time = "2025-11-24T23:26:14.78Z" }, + { url = "https://files.pythonhosted.org/packages/91/66/b25ccb84a246b470eb943b0107c07edcae51804912b824054b3413995a10/asyncpg-0.31.0-cp313-cp313-win_amd64.whl", hash = "sha256:dc5f2fa9916f292e5c5c8b2ac2813763bcd7f58e130055b4ad8a0531314201ab", size = 596569, upload-time = "2025-11-24T23:26:16.189Z" }, + { url = "https://files.pythonhosted.org/packages/3c/36/e9450d62e84a13aea6580c83a47a437f26c7ca6fa0f0fd40b6670793ea30/asyncpg-0.31.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:f6b56b91bb0ffc328c4e3ed113136cddd9deefdf5f79ab448598b9772831df44", size = 660867, upload-time = "2025-11-24T23:26:17.631Z" }, + { url = "https://files.pythonhosted.org/packages/82/4b/1d0a2b33b3102d210439338e1beea616a6122267c0df459ff0265cd5807a/asyncpg-0.31.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:334dec28cf20d7f5bb9e45b39546ddf247f8042a690bff9b9573d00086e69cb5", size = 638349, upload-time = "2025-11-24T23:26:19.689Z" }, + { url = "https://files.pythonhosted.org/packages/41/aa/e7f7ac9a7974f08eff9183e392b2d62516f90412686532d27e196c0f0eeb/asyncpg-0.31.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:98cc158c53f46de7bb677fd20c417e264fc02b36d901cc2a43bd6cb0dc6dbfd2", size = 3410428, upload-time = "2025-11-24T23:26:21.275Z" }, + { url = "https://files.pythonhosted.org/packages/6f/de/bf1b60de3dede5c2731e6788617a512bc0ebd9693eac297ee74086f101d7/asyncpg-0.31.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9322b563e2661a52e3cdbc93eed3be7748b289f792e0011cb2720d278b366ce2", size = 3471678, upload-time = "2025-11-24T23:26:23.627Z" }, + { url = "https://files.pythonhosted.org/packages/46/78/fc3ade003e22d8bd53aaf8f75f4be48f0b460fa73738f0391b9c856a9147/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:19857a358fc811d82227449b7ca40afb46e75b33eb8897240c3839dd8b744218", size = 3313505, upload-time = "2025-11-24T23:26:25.235Z" }, + { url = "https://files.pythonhosted.org/packages/bf/e9/73eb8a6789e927816f4705291be21f2225687bfa97321e40cd23055e903a/asyncpg-0.31.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ba5f8886e850882ff2c2ace5732300e99193823e8107e2c53ef01c1ebfa1e85d", size = 3434744, upload-time = "2025-11-24T23:26:26.944Z" }, + { url = "https://files.pythonhosted.org/packages/08/4b/f10b880534413c65c5b5862f79b8e81553a8f364e5238832ad4c0af71b7f/asyncpg-0.31.0-cp314-cp314-win32.whl", hash = "sha256:cea3a0b2a14f95834cee29432e4ddc399b95700eb1d51bbc5bfee8f31fa07b2b", size = 532251, upload-time = "2025-11-24T23:26:28.404Z" }, + { url = "https://files.pythonhosted.org/packages/d3/2d/7aa40750b7a19efa5d66e67fc06008ca0f27ba1bd082e457ad82f59aba49/asyncpg-0.31.0-cp314-cp314-win_amd64.whl", hash = "sha256:04d19392716af6b029411a0264d92093b6e5e8285ae97a39957b9a9c14ea72be", size = 604901, upload-time = "2025-11-24T23:26:30.34Z" }, + { url = "https://files.pythonhosted.org/packages/ce/fe/b9dfe349b83b9dee28cc42360d2c86b2cdce4cb551a2c2d27e156bcac84d/asyncpg-0.31.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:bdb957706da132e982cc6856bb2f7b740603472b54c3ebc77fe60ea3e57e1bd2", size = 702280, upload-time = "2025-11-24T23:26:32Z" }, + { url = "https://files.pythonhosted.org/packages/6a/81/e6be6e37e560bd91e6c23ea8a6138a04fd057b08cf63d3c5055c98e81c1d/asyncpg-0.31.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:6d11b198111a72f47154fa03b85799f9be63701e068b43f84ac25da0bda9cb31", size = 682931, upload-time = "2025-11-24T23:26:33.572Z" }, + { url = "https://files.pythonhosted.org/packages/a6/45/6009040da85a1648dd5bc75b3b0a062081c483e75a1a29041ae63a0bf0dc/asyncpg-0.31.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18c83b03bc0d1b23e6230f5bf8d4f217dc9bc08644ce0502a9d91dc9e634a9c7", size = 3581608, upload-time = "2025-11-24T23:26:35.638Z" }, + { url = "https://files.pythonhosted.org/packages/7e/06/2e3d4d7608b0b2b3adbee0d0bd6a2d29ca0fc4d8a78f8277df04e2d1fd7b/asyncpg-0.31.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e009abc333464ff18b8f6fd146addffd9aaf63e79aa3bb40ab7a4c332d0c5e9e", size = 3498738, upload-time = "2025-11-24T23:26:37.275Z" }, + { url = "https://files.pythonhosted.org/packages/7d/aa/7d75ede780033141c51d83577ea23236ba7d3a23593929b32b49db8ed36e/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:3b1fbcb0e396a5ca435a8826a87e5c2c2cc0c8c68eb6fadf82168056b0e53a8c", size = 3401026, upload-time = "2025-11-24T23:26:39.423Z" }, + { url = "https://files.pythonhosted.org/packages/ba/7a/15e37d45e7f7c94facc1e9148c0e455e8f33c08f0b8a0b1deb2c5171771b/asyncpg-0.31.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:8df714dba348efcc162d2adf02d213e5fab1bd9f557e1305633e851a61814a7a", size = 3429426, upload-time = "2025-11-24T23:26:41.032Z" }, + { url = "https://files.pythonhosted.org/packages/13/d5/71437c5f6ae5f307828710efbe62163974e71237d5d46ebd2869ea052d10/asyncpg-0.31.0-cp314-cp314t-win32.whl", hash = "sha256:1b41f1afb1033f2b44f3234993b15096ddc9cd71b21a42dbd87fc6a57b43d65d", size = 614495, upload-time = "2025-11-24T23:26:42.659Z" }, + { url = "https://files.pythonhosted.org/packages/3c/d7/8fb3044eaef08a310acfe23dae9a8e2e07d305edc29a53497e52bc76eca7/asyncpg-0.31.0-cp314-cp314t-win_amd64.whl", hash = "sha256:bd4107bb7cdd0e9e65fae66a62afd3a249663b844fa34d479f6d5b3bef9c04c3", size = 706062, upload-time = "2025-11-24T23:26:44.086Z" }, +] + [[package]] name = "attrs" version = "25.4.0" @@ -684,6 +738,11 @@ dependencies = [ { name = "wecom-aibot-python-sdk" }, ] +[package.optional-dependencies] +postgres = [ + { name = "deerflow-harness", extra = ["postgres"] }, +] + [package.dev-dependencies] dev = [ { name = "pytest" }, @@ -693,6 +752,7 @@ dev = [ [package.metadata] requires-dist = [ { name = "deerflow-harness", editable = "packages/harness" }, + { name = "deerflow-harness", extras = ["postgres"], marker = "extra == 'postgres'", editable = "packages/harness" }, { name = "fastapi", specifier = ">=0.115.0" }, { name = "httpx", specifier = ">=0.28.0" }, { name = "langgraph-sdk", specifier = ">=0.1.51" }, @@ -705,6 +765,7 @@ requires-dist = [ { name = "uvicorn", extras = ["standard"], specifier = ">=0.34.0" }, { name = "wecom-aibot-python-sdk", specifier = ">=0.1.6" }, ] +provides-extras = ["postgres"] [package.metadata.requires-dev] dev = [ @@ -719,6 +780,8 @@ source = { editable = "packages/harness" } dependencies = [ { name = "agent-client-protocol" }, { name = "agent-sandbox" }, + { name = "aiosqlite" }, + { name = "alembic" }, { name = "ddgs" }, { name = "dotenv" }, { name = "duckdb" }, @@ -744,11 +807,18 @@ dependencies = [ { name = "pydantic" }, { name = "pyyaml" }, { name = "readabilipy" }, + { name = "sqlalchemy", extra = ["asyncio"] }, { name = "tavily-python" }, { name = "tiktoken" }, ] [package.optional-dependencies] +postgres = [ + { name = "asyncpg" }, + { name = "langgraph-checkpoint-postgres" }, + { name = "psycopg", extra = ["binary"] }, + { name = "psycopg-pool" }, +] pymupdf = [ { name = "pymupdf4llm" }, ] @@ -757,6 +827,9 @@ pymupdf = [ requires-dist = [ { name = "agent-client-protocol", specifier = ">=0.4.0" }, { name = "agent-sandbox", specifier = ">=0.0.19" }, + { name = "aiosqlite", specifier = ">=0.19" }, + { name = "alembic", specifier = ">=1.13" }, + { name = "asyncpg", marker = "extra == 'postgres'", specifier = ">=0.29" }, { name = "ddgs", specifier = ">=9.10.0" }, { name = "dotenv", specifier = ">=0.9.9" }, { name = "duckdb", specifier = ">=1.4.4" }, @@ -773,20 +846,24 @@ requires-dist = [ { name = "langfuse", specifier = ">=3.4.1" }, { name = "langgraph", specifier = ">=1.0.6,<1.0.10" }, { name = "langgraph-api", specifier = ">=0.7.0,<0.8.0" }, + { name = "langgraph-checkpoint-postgres", marker = "extra == 'postgres'", specifier = ">=3.0.5" }, { name = "langgraph-checkpoint-sqlite", specifier = ">=3.0.3" }, { name = "langgraph-cli", specifier = ">=0.4.14" }, { name = "langgraph-runtime-inmem", specifier = ">=0.22.1" }, { name = "langgraph-sdk", specifier = ">=0.1.51" }, { name = "markdownify", specifier = ">=1.2.2" }, { name = "markitdown", extras = ["all", "xlsx"], specifier = ">=0.0.1a2" }, + { name = "psycopg", extras = ["binary"], marker = "extra == 'postgres'", specifier = ">=3.3.3" }, + { name = "psycopg-pool", marker = "extra == 'postgres'", specifier = ">=3.3.0" }, { name = "pydantic", specifier = ">=2.12.5" }, { name = "pymupdf4llm", marker = "extra == 'pymupdf'", specifier = ">=0.0.17" }, { name = "pyyaml", specifier = ">=6.0.3" }, { name = "readabilipy", specifier = ">=0.3.0" }, + { name = "sqlalchemy", extras = ["asyncio"], specifier = ">=2.0,<3.0" }, { name = "tavily-python", specifier = ">=0.7.17" }, { name = "tiktoken", specifier = ">=0.8.0" }, ] -provides-extras = ["pymupdf"] +provides-extras = ["postgres", "pymupdf"] [[package]] name = "defusedxml" @@ -1097,6 +1174,49 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c4/ab/09169d5a4612a5f92490806649ac8d41e3ec9129c636754575b3553f4ea4/googleapis_common_protos-1.72.0-py3-none-any.whl", hash = "sha256:4299c5a82d5ae1a9702ada957347726b167f9f8d1fc352477702a1e851ff4038", size = 297515, upload-time = "2025-11-06T18:29:13.14Z" }, ] +[[package]] +name = "greenlet" +version = "3.3.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a3/51/1664f6b78fc6ebbd98019a1fd730e83fa78f2db7058f72b1463d3612b8db/greenlet-3.3.2.tar.gz", hash = "sha256:2eaf067fc6d886931c7962e8c6bede15d2f01965560f3359b27c80bde2d151f2", size = 188267, upload-time = "2026-02-20T20:54:15.531Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ea/ab/1608e5a7578e62113506740b88066bf09888322a311cff602105e619bd87/greenlet-3.3.2-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:ac8d61d4343b799d1e526db579833d72f23759c71e07181c2d2944e429eb09cd", size = 280358, upload-time = "2026-02-20T20:17:43.971Z" }, + { url = "https://files.pythonhosted.org/packages/a5/23/0eae412a4ade4e6623ff7626e38998cb9b11e9ff1ebacaa021e4e108ec15/greenlet-3.3.2-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3ceec72030dae6ac0c8ed7591b96b70410a8be370b6a477b1dbc072856ad02bd", size = 601217, upload-time = "2026-02-20T20:47:31.462Z" }, + { url = "https://files.pythonhosted.org/packages/f8/16/5b1678a9c07098ecb9ab2dd159fafaf12e963293e61ee8d10ecb55273e5e/greenlet-3.3.2-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a2a5be83a45ce6188c045bcc44b0ee037d6a518978de9a5d97438548b953a1ac", size = 611792, upload-time = "2026-02-20T20:55:58.423Z" }, + { url = "https://files.pythonhosted.org/packages/5c/c5/cc09412a29e43406eba18d61c70baa936e299bc27e074e2be3806ed29098/greenlet-3.3.2-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ae9e21c84035c490506c17002f5c8ab25f980205c3e61ddb3a2a2a2e6c411fcb", size = 626250, upload-time = "2026-02-20T21:02:46.596Z" }, + { url = "https://files.pythonhosted.org/packages/50/1f/5155f55bd71cabd03765a4aac9ac446be129895271f73872c36ebd4b04b6/greenlet-3.3.2-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:43e99d1749147ac21dde49b99c9abffcbc1e2d55c67501465ef0930d6e78e070", size = 613875, upload-time = "2026-02-20T20:21:01.102Z" }, + { url = "https://files.pythonhosted.org/packages/fc/dd/845f249c3fcd69e32df80cdab059b4be8b766ef5830a3d0aa9d6cad55beb/greenlet-3.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:4c956a19350e2c37f2c48b336a3afb4bff120b36076d9d7fb68cb44e05d95b79", size = 1571467, upload-time = "2026-02-20T20:49:33.495Z" }, + { url = "https://files.pythonhosted.org/packages/2a/50/2649fe21fcc2b56659a452868e695634722a6655ba245d9f77f5656010bf/greenlet-3.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6c6f8ba97d17a1e7d664151284cb3315fc5f8353e75221ed4324f84eb162b395", size = 1640001, upload-time = "2026-02-20T20:21:09.154Z" }, + { url = "https://files.pythonhosted.org/packages/9b/40/cc802e067d02af8b60b6771cea7d57e21ef5e6659912814babb42b864713/greenlet-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:34308836d8370bddadb41f5a7ce96879b72e2fdfb4e87729330c6ab52376409f", size = 231081, upload-time = "2026-02-20T20:17:28.121Z" }, + { url = "https://files.pythonhosted.org/packages/58/2e/fe7f36ff1982d6b10a60d5e0740c759259a7d6d2e1dc41da6d96de32fff6/greenlet-3.3.2-cp312-cp312-win_arm64.whl", hash = "sha256:d3a62fa76a32b462a97198e4c9e99afb9ab375115e74e9a83ce180e7a496f643", size = 230331, upload-time = "2026-02-20T20:17:23.34Z" }, + { url = "https://files.pythonhosted.org/packages/ac/48/f8b875fa7dea7dd9b33245e37f065af59df6a25af2f9561efa8d822fde51/greenlet-3.3.2-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:aa6ac98bdfd716a749b84d4034486863fd81c3abde9aa3cf8eff9127981a4ae4", size = 279120, upload-time = "2026-02-20T20:19:01.9Z" }, + { url = "https://files.pythonhosted.org/packages/49/8d/9771d03e7a8b1ee456511961e1b97a6d77ae1dea4a34a5b98eee706689d3/greenlet-3.3.2-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab0c7e7901a00bc0a7284907273dc165b32e0d109a6713babd04471327ff7986", size = 603238, upload-time = "2026-02-20T20:47:32.873Z" }, + { url = "https://files.pythonhosted.org/packages/59/0e/4223c2bbb63cd5c97f28ffb2a8aee71bdfb30b323c35d409450f51b91e3e/greenlet-3.3.2-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d248d8c23c67d2291ffd47af766e2a3aa9fa1c6703155c099feb11f526c63a92", size = 614219, upload-time = "2026-02-20T20:55:59.817Z" }, + { url = "https://files.pythonhosted.org/packages/94/2b/4d012a69759ac9d77210b8bfb128bc621125f5b20fc398bce3940d036b1c/greenlet-3.3.2-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ccd21bb86944ca9be6d967cf7691e658e43417782bce90b5d2faeda0ff78a7dd", size = 628268, upload-time = "2026-02-20T21:02:48.024Z" }, + { url = "https://files.pythonhosted.org/packages/7a/34/259b28ea7a2a0c904b11cd36c79b8cef8019b26ee5dbe24e73b469dea347/greenlet-3.3.2-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b6997d360a4e6a4e936c0f9625b1c20416b8a0ea18a8e19cabbefc712e7397ab", size = 616774, upload-time = "2026-02-20T20:21:02.454Z" }, + { url = "https://files.pythonhosted.org/packages/0a/03/996c2d1689d486a6e199cb0f1cf9e4aa940c500e01bdf201299d7d61fa69/greenlet-3.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:64970c33a50551c7c50491671265d8954046cb6e8e2999aacdd60e439b70418a", size = 1571277, upload-time = "2026-02-20T20:49:34.795Z" }, + { url = "https://files.pythonhosted.org/packages/d9/c4/2570fc07f34a39f2caf0bf9f24b0a1a0a47bc2e8e465b2c2424821389dfc/greenlet-3.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:1a9172f5bf6bd88e6ba5a84e0a68afeac9dc7b6b412b245dd64f52d83c81e55b", size = 1640455, upload-time = "2026-02-20T20:21:10.261Z" }, + { url = "https://files.pythonhosted.org/packages/91/39/5ef5aa23bc545aa0d31e1b9b55822b32c8da93ba657295840b6b34124009/greenlet-3.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:a7945dd0eab63ded0a48e4dcade82939783c172290a7903ebde9e184333ca124", size = 230961, upload-time = "2026-02-20T20:16:58.461Z" }, + { url = "https://files.pythonhosted.org/packages/62/6b/a89f8456dcb06becff288f563618e9f20deed8dd29beea14f9a168aef64b/greenlet-3.3.2-cp313-cp313-win_arm64.whl", hash = "sha256:394ead29063ee3515b4e775216cb756b2e3b4a7e55ae8fd884f17fa579e6b327", size = 230221, upload-time = "2026-02-20T20:17:37.152Z" }, + { url = "https://files.pythonhosted.org/packages/3f/ae/8bffcbd373b57a5992cd077cbe8858fff39110480a9d50697091faea6f39/greenlet-3.3.2-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:8d1658d7291f9859beed69a776c10822a0a799bc4bfe1bd4272bb60e62507dab", size = 279650, upload-time = "2026-02-20T20:18:00.783Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c0/45f93f348fa49abf32ac8439938726c480bd96b2a3c6f4d949ec0124b69f/greenlet-3.3.2-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:18cb1b7337bca281915b3c5d5ae19f4e76d35e1df80f4ad3c1a7be91fadf1082", size = 650295, upload-time = "2026-02-20T20:47:34.036Z" }, + { url = "https://files.pythonhosted.org/packages/b3/de/dd7589b3f2b8372069ab3e4763ea5329940fc7ad9dcd3e272a37516d7c9b/greenlet-3.3.2-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c2e47408e8ce1c6f1ceea0dffcdf6ebb85cc09e55c7af407c99f1112016e45e9", size = 662163, upload-time = "2026-02-20T20:56:01.295Z" }, + { url = "https://files.pythonhosted.org/packages/cd/ac/85804f74f1ccea31ba518dcc8ee6f14c79f73fe36fa1beba38930806df09/greenlet-3.3.2-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e3cb43ce200f59483eb82949bf1835a99cf43d7571e900d7c8d5c62cdf25d2f9", size = 675371, upload-time = "2026-02-20T21:02:49.664Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d8/09bfa816572a4d83bccd6750df1926f79158b1c36c5f73786e26dbe4ee38/greenlet-3.3.2-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63d10328839d1973e5ba35e98cccbca71b232b14051fd957b6f8b6e8e80d0506", size = 664160, upload-time = "2026-02-20T20:21:04.015Z" }, + { url = "https://files.pythonhosted.org/packages/48/cf/56832f0c8255d27f6c35d41b5ec91168d74ec721d85f01a12131eec6b93c/greenlet-3.3.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:8e4ab3cfb02993c8cc248ea73d7dae6cec0253e9afa311c9b37e603ca9fad2ce", size = 1619181, upload-time = "2026-02-20T20:49:36.052Z" }, + { url = "https://files.pythonhosted.org/packages/0a/23/b90b60a4aabb4cec0796e55f25ffbfb579a907c3898cd2905c8918acaa16/greenlet-3.3.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:94ad81f0fd3c0c0681a018a976e5c2bd2ca2d9d94895f23e7bb1af4e8af4e2d5", size = 1687713, upload-time = "2026-02-20T20:21:11.684Z" }, + { url = "https://files.pythonhosted.org/packages/f3/ca/2101ca3d9223a1dc125140dbc063644dca76df6ff356531eb27bc267b446/greenlet-3.3.2-cp314-cp314-win_amd64.whl", hash = "sha256:8c4dd0f3997cf2512f7601563cc90dfb8957c0cff1e3a1b23991d4ea1776c492", size = 232034, upload-time = "2026-02-20T20:20:08.186Z" }, + { url = "https://files.pythonhosted.org/packages/f6/4a/ecf894e962a59dea60f04877eea0fd5724618da89f1867b28ee8b91e811f/greenlet-3.3.2-cp314-cp314-win_arm64.whl", hash = "sha256:cd6f9e2bbd46321ba3bbb4c8a15794d32960e3b0ae2cc4d49a1a53d314805d71", size = 231437, upload-time = "2026-02-20T20:18:59.722Z" }, + { url = "https://files.pythonhosted.org/packages/98/6d/8f2ef704e614bcf58ed43cfb8d87afa1c285e98194ab2cfad351bf04f81e/greenlet-3.3.2-cp314-cp314t-macosx_11_0_universal2.whl", hash = "sha256:e26e72bec7ab387ac80caa7496e0f908ff954f31065b0ffc1f8ecb1338b11b54", size = 286617, upload-time = "2026-02-20T20:19:29.856Z" }, + { url = "https://files.pythonhosted.org/packages/5e/0d/93894161d307c6ea237a43988f27eba0947b360b99ac5239ad3fe09f0b47/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8b466dff7a4ffda6ca975979bab80bdadde979e29fc947ac3be4451428d8b0e4", size = 655189, upload-time = "2026-02-20T20:47:35.742Z" }, + { url = "https://files.pythonhosted.org/packages/f5/2c/d2d506ebd8abcb57386ec4f7ba20f4030cbe56eae541bc6fd6ef399c0b41/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b8bddc5b73c9720bea487b3bffdb1840fe4e3656fba3bd40aa1489e9f37877ff", size = 658225, upload-time = "2026-02-20T20:56:02.527Z" }, + { url = "https://files.pythonhosted.org/packages/d1/67/8197b7e7e602150938049d8e7f30de1660cfb87e4c8ee349b42b67bdb2e1/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:59b3e2c40f6706b05a9cd299c836c6aa2378cabe25d021acd80f13abf81181cf", size = 666581, upload-time = "2026-02-20T21:02:51.526Z" }, + { url = "https://files.pythonhosted.org/packages/8e/30/3a09155fbf728673a1dea713572d2d31159f824a37c22da82127056c44e4/greenlet-3.3.2-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b26b0f4428b871a751968285a1ac9648944cea09807177ac639b030bddebcea4", size = 657907, upload-time = "2026-02-20T20:21:05.259Z" }, + { url = "https://files.pythonhosted.org/packages/f3/fd/d05a4b7acd0154ed758797f0a43b4c0962a843bedfe980115e842c5b2d08/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1fb39a11ee2e4d94be9a76671482be9398560955c9e568550de0224e41104727", size = 1618857, upload-time = "2026-02-20T20:49:37.309Z" }, + { url = "https://files.pythonhosted.org/packages/6f/e1/50ee92a5db521de8f35075b5eff060dd43d39ebd46c2181a2042f7070385/greenlet-3.3.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:20154044d9085151bc309e7689d6f7ba10027f8f5a8c0676ad398b951913d89e", size = 1680010, upload-time = "2026-02-20T20:21:13.427Z" }, + { url = "https://files.pythonhosted.org/packages/29/4b/45d90626aef8e65336bed690106d1382f7a43665e2249017e9527df8823b/greenlet-3.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c04c5e06ec3e022cbfe2cd4a846e1d4e50087444f875ff6d2c2ad8445495cf1a", size = 237086, upload-time = "2026-02-20T20:20:45.786Z" }, +] + [[package]] name = "grpcio" version = "1.78.0" @@ -1724,6 +1844,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4a/de/ddd53b7032e623f3c7bcdab2b44e8bf635e468f62e10e5ff1946f62c9356/langgraph_checkpoint-4.0.0-py3-none-any.whl", hash = "sha256:3fa9b2635a7c5ac28b338f631abf6a030c3b508b7b9ce17c22611513b589c784", size = 46329, upload-time = "2026-01-12T20:30:25.2Z" }, ] +[[package]] +name = "langgraph-checkpoint-postgres" +version = "3.0.5" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "langgraph-checkpoint" }, + { name = "orjson" }, + { name = "psycopg" }, + { name = "psycopg-pool" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/95/7a/8f439966643d32111248a225e6cb33a182d07c90de780c4dbfc1e0377832/langgraph_checkpoint_postgres-3.0.5.tar.gz", hash = "sha256:a8fd7278a63f4f849b5cbc7884a15ca8f41e7d5f7467d0a66b31e8c24492f7eb", size = 127856, upload-time = "2026-03-18T21:25:29.785Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e8/87/b0f98b33a67204bca9d5619bcd9574222f6b025cf3c125eedcec9a50ecbc/langgraph_checkpoint_postgres-3.0.5-py3-none-any.whl", hash = "sha256:86d7040a88fd70087eaafb72251d796696a0a2d856168f5c11ef620771411552", size = 42907, upload-time = "2026-03-18T21:25:28.75Z" }, +] + [[package]] name = "langgraph-checkpoint-sqlite" version = "3.0.3" @@ -1934,6 +2069,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7b/9e/f8ee7d644affa3b80efdd623a3d75865c8f058f3950cb87fb0c48e3559bc/magika-0.6.3-py3-none-win_amd64.whl", hash = "sha256:e57f75674447b20cab4db928ae58ab264d7d8582b55183a0b876711c2b2787f3", size = 12692831, upload-time = "2025-10-30T15:22:32.063Z" }, ] +[[package]] +name = "mako" +version = "1.3.10" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markupsafe" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload-time = "2025-04-10T12:44:31.16Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" }, +] + [[package]] name = "mammoth" version = "1.11.0" @@ -2008,6 +2155,69 @@ xlsx = [ { name = "pandas" }, ] +[[package]] +name = "markupsafe" +version = "3.0.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5a/72/147da192e38635ada20e0a2e1a51cf8823d2119ce8883f7053879c2199b5/markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e", size = 11615, upload-time = "2025-09-27T18:36:30.854Z" }, + { url = "https://files.pythonhosted.org/packages/9a/81/7e4e08678a1f98521201c3079f77db69fb552acd56067661f8c2f534a718/markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce", size = 12020, upload-time = "2025-09-27T18:36:31.971Z" }, + { url = "https://files.pythonhosted.org/packages/1e/2c/799f4742efc39633a1b54a92eec4082e4f815314869865d876824c257c1e/markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d", size = 24332, upload-time = "2025-09-27T18:36:32.813Z" }, + { url = "https://files.pythonhosted.org/packages/3c/2e/8d0c2ab90a8c1d9a24f0399058ab8519a3279d1bd4289511d74e909f060e/markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d", size = 22947, upload-time = "2025-09-27T18:36:33.86Z" }, + { url = "https://files.pythonhosted.org/packages/2c/54/887f3092a85238093a0b2154bd629c89444f395618842e8b0c41783898ea/markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a", size = 21962, upload-time = "2025-09-27T18:36:35.099Z" }, + { url = "https://files.pythonhosted.org/packages/c9/2f/336b8c7b6f4a4d95e91119dc8521402461b74a485558d8f238a68312f11c/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b", size = 23760, upload-time = "2025-09-27T18:36:36.001Z" }, + { url = "https://files.pythonhosted.org/packages/32/43/67935f2b7e4982ffb50a4d169b724d74b62a3964bc1a9a527f5ac4f1ee2b/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f", size = 21529, upload-time = "2025-09-27T18:36:36.906Z" }, + { url = "https://files.pythonhosted.org/packages/89/e0/4486f11e51bbba8b0c041098859e869e304d1c261e59244baa3d295d47b7/markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b", size = 23015, upload-time = "2025-09-27T18:36:37.868Z" }, + { url = "https://files.pythonhosted.org/packages/2f/e1/78ee7a023dac597a5825441ebd17170785a9dab23de95d2c7508ade94e0e/markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d", size = 14540, upload-time = "2025-09-27T18:36:38.761Z" }, + { url = "https://files.pythonhosted.org/packages/aa/5b/bec5aa9bbbb2c946ca2733ef9c4ca91c91b6a24580193e891b5f7dbe8e1e/markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c", size = 15105, upload-time = "2025-09-27T18:36:39.701Z" }, + { url = "https://files.pythonhosted.org/packages/e5/f1/216fc1bbfd74011693a4fd837e7026152e89c4bcf3e77b6692fba9923123/markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f", size = 13906, upload-time = "2025-09-27T18:36:40.689Z" }, + { url = "https://files.pythonhosted.org/packages/38/2f/907b9c7bbba283e68f20259574b13d005c121a0fa4c175f9bed27c4597ff/markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795", size = 11622, upload-time = "2025-09-27T18:36:41.777Z" }, + { url = "https://files.pythonhosted.org/packages/9c/d9/5f7756922cdd676869eca1c4e3c0cd0df60ed30199ffd775e319089cb3ed/markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219", size = 12029, upload-time = "2025-09-27T18:36:43.257Z" }, + { url = "https://files.pythonhosted.org/packages/00/07/575a68c754943058c78f30db02ee03a64b3c638586fba6a6dd56830b30a3/markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6", size = 24374, upload-time = "2025-09-27T18:36:44.508Z" }, + { url = "https://files.pythonhosted.org/packages/a9/21/9b05698b46f218fc0e118e1f8168395c65c8a2c750ae2bab54fc4bd4e0e8/markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676", size = 22980, upload-time = "2025-09-27T18:36:45.385Z" }, + { url = "https://files.pythonhosted.org/packages/7f/71/544260864f893f18b6827315b988c146b559391e6e7e8f7252839b1b846a/markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9", size = 21990, upload-time = "2025-09-27T18:36:46.916Z" }, + { url = "https://files.pythonhosted.org/packages/c2/28/b50fc2f74d1ad761af2f5dcce7492648b983d00a65b8c0e0cb457c82ebbe/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1", size = 23784, upload-time = "2025-09-27T18:36:47.884Z" }, + { url = "https://files.pythonhosted.org/packages/ed/76/104b2aa106a208da8b17a2fb72e033a5a9d7073c68f7e508b94916ed47a9/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc", size = 21588, upload-time = "2025-09-27T18:36:48.82Z" }, + { url = "https://files.pythonhosted.org/packages/b5/99/16a5eb2d140087ebd97180d95249b00a03aa87e29cc224056274f2e45fd6/markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12", size = 23041, upload-time = "2025-09-27T18:36:49.797Z" }, + { url = "https://files.pythonhosted.org/packages/19/bc/e7140ed90c5d61d77cea142eed9f9c303f4c4806f60a1044c13e3f1471d0/markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed", size = 14543, upload-time = "2025-09-27T18:36:51.584Z" }, + { url = "https://files.pythonhosted.org/packages/05/73/c4abe620b841b6b791f2edc248f556900667a5a1cf023a6646967ae98335/markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5", size = 15113, upload-time = "2025-09-27T18:36:52.537Z" }, + { url = "https://files.pythonhosted.org/packages/f0/3a/fa34a0f7cfef23cf9500d68cb7c32dd64ffd58a12b09225fb03dd37d5b80/markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485", size = 13911, upload-time = "2025-09-27T18:36:53.513Z" }, + { url = "https://files.pythonhosted.org/packages/e4/d7/e05cd7efe43a88a17a37b3ae96e79a19e846f3f456fe79c57ca61356ef01/markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73", size = 11658, upload-time = "2025-09-27T18:36:54.819Z" }, + { url = "https://files.pythonhosted.org/packages/99/9e/e412117548182ce2148bdeacdda3bb494260c0b0184360fe0d56389b523b/markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37", size = 12066, upload-time = "2025-09-27T18:36:55.714Z" }, + { url = "https://files.pythonhosted.org/packages/bc/e6/fa0ffcda717ef64a5108eaa7b4f5ed28d56122c9a6d70ab8b72f9f715c80/markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19", size = 25639, upload-time = "2025-09-27T18:36:56.908Z" }, + { url = "https://files.pythonhosted.org/packages/96/ec/2102e881fe9d25fc16cb4b25d5f5cde50970967ffa5dddafdb771237062d/markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025", size = 23569, upload-time = "2025-09-27T18:36:57.913Z" }, + { url = "https://files.pythonhosted.org/packages/4b/30/6f2fce1f1f205fc9323255b216ca8a235b15860c34b6798f810f05828e32/markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6", size = 23284, upload-time = "2025-09-27T18:36:58.833Z" }, + { url = "https://files.pythonhosted.org/packages/58/47/4a0ccea4ab9f5dcb6f79c0236d954acb382202721e704223a8aafa38b5c8/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f", size = 24801, upload-time = "2025-09-27T18:36:59.739Z" }, + { url = "https://files.pythonhosted.org/packages/6a/70/3780e9b72180b6fecb83a4814d84c3bf4b4ae4bf0b19c27196104149734c/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb", size = 22769, upload-time = "2025-09-27T18:37:00.719Z" }, + { url = "https://files.pythonhosted.org/packages/98/c5/c03c7f4125180fc215220c035beac6b9cb684bc7a067c84fc69414d315f5/markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009", size = 23642, upload-time = "2025-09-27T18:37:01.673Z" }, + { url = "https://files.pythonhosted.org/packages/80/d6/2d1b89f6ca4bff1036499b1e29a1d02d282259f3681540e16563f27ebc23/markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354", size = 14612, upload-time = "2025-09-27T18:37:02.639Z" }, + { url = "https://files.pythonhosted.org/packages/2b/98/e48a4bfba0a0ffcf9925fe2d69240bfaa19c6f7507b8cd09c70684a53c1e/markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218", size = 15200, upload-time = "2025-09-27T18:37:03.582Z" }, + { url = "https://files.pythonhosted.org/packages/0e/72/e3cc540f351f316e9ed0f092757459afbc595824ca724cbc5a5d4263713f/markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287", size = 13973, upload-time = "2025-09-27T18:37:04.929Z" }, + { url = "https://files.pythonhosted.org/packages/33/8a/8e42d4838cd89b7dde187011e97fe6c3af66d8c044997d2183fbd6d31352/markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe", size = 11619, upload-time = "2025-09-27T18:37:06.342Z" }, + { url = "https://files.pythonhosted.org/packages/b5/64/7660f8a4a8e53c924d0fa05dc3a55c9cee10bbd82b11c5afb27d44b096ce/markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026", size = 12029, upload-time = "2025-09-27T18:37:07.213Z" }, + { url = "https://files.pythonhosted.org/packages/da/ef/e648bfd021127bef5fa12e1720ffed0c6cbb8310c8d9bea7266337ff06de/markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737", size = 24408, upload-time = "2025-09-27T18:37:09.572Z" }, + { url = "https://files.pythonhosted.org/packages/41/3c/a36c2450754618e62008bf7435ccb0f88053e07592e6028a34776213d877/markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97", size = 23005, upload-time = "2025-09-27T18:37:10.58Z" }, + { url = "https://files.pythonhosted.org/packages/bc/20/b7fdf89a8456b099837cd1dc21974632a02a999ec9bf7ca3e490aacd98e7/markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d", size = 22048, upload-time = "2025-09-27T18:37:11.547Z" }, + { url = "https://files.pythonhosted.org/packages/9a/a7/591f592afdc734f47db08a75793a55d7fbcc6902a723ae4cfbab61010cc5/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda", size = 23821, upload-time = "2025-09-27T18:37:12.48Z" }, + { url = "https://files.pythonhosted.org/packages/7d/33/45b24e4f44195b26521bc6f1a82197118f74df348556594bd2262bda1038/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf", size = 21606, upload-time = "2025-09-27T18:37:13.485Z" }, + { url = "https://files.pythonhosted.org/packages/ff/0e/53dfaca23a69fbfbbf17a4b64072090e70717344c52eaaaa9c5ddff1e5f0/markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe", size = 23043, upload-time = "2025-09-27T18:37:14.408Z" }, + { url = "https://files.pythonhosted.org/packages/46/11/f333a06fc16236d5238bfe74daccbca41459dcd8d1fa952e8fbd5dccfb70/markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9", size = 14747, upload-time = "2025-09-27T18:37:15.36Z" }, + { url = "https://files.pythonhosted.org/packages/28/52/182836104b33b444e400b14f797212f720cbc9ed6ba34c800639d154e821/markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581", size = 15341, upload-time = "2025-09-27T18:37:16.496Z" }, + { url = "https://files.pythonhosted.org/packages/6f/18/acf23e91bd94fd7b3031558b1f013adfa21a8e407a3fdb32745538730382/markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4", size = 14073, upload-time = "2025-09-27T18:37:17.476Z" }, + { url = "https://files.pythonhosted.org/packages/3c/f0/57689aa4076e1b43b15fdfa646b04653969d50cf30c32a102762be2485da/markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab", size = 11661, upload-time = "2025-09-27T18:37:18.453Z" }, + { url = "https://files.pythonhosted.org/packages/89/c3/2e67a7ca217c6912985ec766c6393b636fb0c2344443ff9d91404dc4c79f/markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175", size = 12069, upload-time = "2025-09-27T18:37:19.332Z" }, + { url = "https://files.pythonhosted.org/packages/f0/00/be561dce4e6ca66b15276e184ce4b8aec61fe83662cce2f7d72bd3249d28/markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634", size = 25670, upload-time = "2025-09-27T18:37:20.245Z" }, + { url = "https://files.pythonhosted.org/packages/50/09/c419f6f5a92e5fadde27efd190eca90f05e1261b10dbd8cbcb39cd8ea1dc/markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50", size = 23598, upload-time = "2025-09-27T18:37:21.177Z" }, + { url = "https://files.pythonhosted.org/packages/22/44/a0681611106e0b2921b3033fc19bc53323e0b50bc70cffdd19f7d679bb66/markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e", size = 23261, upload-time = "2025-09-27T18:37:22.167Z" }, + { url = "https://files.pythonhosted.org/packages/5f/57/1b0b3f100259dc9fffe780cfb60d4be71375510e435efec3d116b6436d43/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5", size = 24835, upload-time = "2025-09-27T18:37:23.296Z" }, + { url = "https://files.pythonhosted.org/packages/26/6a/4bf6d0c97c4920f1597cc14dd720705eca0bf7c787aebc6bb4d1bead5388/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523", size = 22733, upload-time = "2025-09-27T18:37:24.237Z" }, + { url = "https://files.pythonhosted.org/packages/14/c7/ca723101509b518797fedc2fdf79ba57f886b4aca8a7d31857ba3ee8281f/markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc", size = 23672, upload-time = "2025-09-27T18:37:25.271Z" }, + { url = "https://files.pythonhosted.org/packages/fb/df/5bd7a48c256faecd1d36edc13133e51397e41b73bb77e1a69deab746ebac/markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d", size = 14819, upload-time = "2025-09-27T18:37:26.285Z" }, + { url = "https://files.pythonhosted.org/packages/1a/8a/0402ba61a2f16038b48b39bccca271134be00c5c9f0f623208399333c448/markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9", size = 15426, upload-time = "2025-09-27T18:37:27.316Z" }, + { url = "https://files.pythonhosted.org/packages/70/bc/6f1c2f612465f5fa89b95bead1f44dcb607670fd42891d8fdcd5d039f4f4/markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa", size = 14146, upload-time = "2025-09-27T18:37:28.327Z" }, +] + [[package]] name = "mcp" version = "1.25.0" @@ -2775,6 +2985,76 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/bf/2086963c69bdac3d7cff1cc7ff79b8ce5ea0bec6797a017e1be338a46248/protobuf-6.33.5-py3-none-any.whl", hash = "sha256:69915a973dd0f60f31a08b8318b73eab2bd6a392c79184b3612226b0a3f8ec02", size = 170687, upload-time = "2026-01-29T21:51:32.557Z" }, ] +[[package]] +name = "psycopg" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.13'" }, + { name = "tzdata", marker = "sys_platform == 'win32'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d3/b6/379d0a960f8f435ec78720462fd94c4863e7a31237cf81bf76d0af5883bf/psycopg-3.3.3.tar.gz", hash = "sha256:5e9a47458b3c1583326513b2556a2a9473a1001a56c9efe9e587245b43148dd9", size = 165624, upload-time = "2026-02-18T16:52:16.546Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c8/5b/181e2e3becb7672b502f0ed7f16ed7352aca7c109cfb94cf3878a9186db9/psycopg-3.3.3-py3-none-any.whl", hash = "sha256:f96525a72bcfade6584ab17e89de415ff360748c766f0106959144dcbb38c698", size = 212768, upload-time = "2026-02-18T16:46:27.365Z" }, +] + +[package.optional-dependencies] +binary = [ + { name = "psycopg-binary", marker = "implementation_name != 'pypy'" }, +] + +[[package]] +name = "psycopg-binary" +version = "3.3.3" +source = { registry = "https://pypi.org/simple" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/90/15/021be5c0cbc5b7c1ab46e91cc3434eb42569f79a0592e67b8d25e66d844d/psycopg_binary-3.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6698dbab5bcef8fdb570fc9d35fd9ac52041771bfcfe6fd0fc5f5c4e36f1e99d", size = 4591170, upload-time = "2026-02-18T16:48:55.594Z" }, + { url = "https://files.pythonhosted.org/packages/f1/54/a60211c346c9a2f8c6b272b5f2bbe21f6e11800ce7f61e99ba75cf8b63e1/psycopg_binary-3.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:329ff393441e75f10b673ae99ab45276887993d49e65f141da20d915c05aafd8", size = 4670009, upload-time = "2026-02-18T16:49:03.608Z" }, + { url = "https://files.pythonhosted.org/packages/c1/53/ac7c18671347c553362aadbf65f92786eef9540676ca24114cc02f5be405/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:eb072949b8ebf4082ae24289a2b0fd724da9adc8f22743409d6fd718ddb379df", size = 5469735, upload-time = "2026-02-18T16:49:10.128Z" }, + { url = "https://files.pythonhosted.org/packages/7f/c3/4f4e040902b82a344eff1c736cde2f2720f127fe939c7e7565706f96dd44/psycopg_binary-3.3.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:263a24f39f26e19ed7fc982d7859a36f17841b05bebad3eb47bb9cd2dd785351", size = 5152919, upload-time = "2026-02-18T16:49:16.335Z" }, + { url = "https://files.pythonhosted.org/packages/0c/e7/d929679c6a5c212bcf738806c7c89f5b3d0919f2e1685a0e08d6ff877945/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5152d50798c2fa5bd9b68ec68eb68a1b71b95126c1d70adaa1a08cd5eefdc23d", size = 6738785, upload-time = "2026-02-18T16:49:22.687Z" }, + { url = "https://files.pythonhosted.org/packages/69/b0/09703aeb69a9443d232d7b5318d58742e8ca51ff79f90ffe6b88f1db45e7/psycopg_binary-3.3.3-cp312-cp312-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9d6a1e56dd267848edb824dbeb08cf5bac649e02ee0b03ba883ba3f4f0bd54f2", size = 4979008, upload-time = "2026-02-18T16:49:27.313Z" }, + { url = "https://files.pythonhosted.org/packages/cc/a6/e662558b793c6e13a7473b970fee327d635270e41eded3090ef14045a6a5/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:73eaaf4bb04709f545606c1db2f65f4000e8a04cdbf3e00d165a23004692093e", size = 4508255, upload-time = "2026-02-18T16:49:31.575Z" }, + { url = "https://files.pythonhosted.org/packages/5f/7f/0f8b2e1d5e0093921b6f324a948a5c740c1447fbb45e97acaf50241d0f39/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:162e5675efb4704192411eaf8e00d07f7960b679cd3306e7efb120bb8d9456cc", size = 4189166, upload-time = "2026-02-18T16:49:35.801Z" }, + { url = "https://files.pythonhosted.org/packages/92/ec/ce2e91c33bc8d10b00c87e2f6b0fb570641a6a60042d6a9ae35658a3a797/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:fab6b5e37715885c69f5d091f6ff229be71e235f272ebaa35158d5a46fd548a0", size = 3924544, upload-time = "2026-02-18T16:49:41.129Z" }, + { url = "https://files.pythonhosted.org/packages/c5/2f/7718141485f73a924205af60041c392938852aa447a94c8cbd222ff389a1/psycopg_binary-3.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a4aab31bd6d1057f287c96c0effca3a25584eb9cc702f282ecb96ded7814e830", size = 4235297, upload-time = "2026-02-18T16:49:46.726Z" }, + { url = "https://files.pythonhosted.org/packages/57/f9/1add717e2643a003bbde31b1b220172e64fbc0cb09f06429820c9173f7fc/psycopg_binary-3.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:59aa31fe11a0e1d1bcc2ce37ed35fe2ac84cd65bb9036d049b1a1c39064d0f14", size = 3547659, upload-time = "2026-02-18T16:49:52.999Z" }, + { url = "https://files.pythonhosted.org/packages/03/0a/cac9fdf1df16a269ba0e5f0f06cac61f826c94cadb39df028cdfe19d3a33/psycopg_binary-3.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:05f32239aec25c5fb15f7948cffdc2dc0dac098e48b80a140e4ba32b572a2e7d", size = 4590414, upload-time = "2026-02-18T16:50:01.441Z" }, + { url = "https://files.pythonhosted.org/packages/9c/c0/d8f8508fbf440edbc0099b1abff33003cd80c9e66eb3a1e78834e3fb4fb9/psycopg_binary-3.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7c84f9d214f2d1de2fafebc17fa68ac3f6561a59e291553dfc45ad299f4898c1", size = 4669021, upload-time = "2026-02-18T16:50:08.803Z" }, + { url = "https://files.pythonhosted.org/packages/04/05/097016b77e343b4568feddf12c72171fc513acef9a4214d21b9478569068/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:e77957d2ba17cada11be09a5066d93026cdb61ada7c8893101d7fe1c6e1f3925", size = 5467453, upload-time = "2026-02-18T16:50:14.985Z" }, + { url = "https://files.pythonhosted.org/packages/91/23/73244e5feb55b5ca109cede6e97f32ef45189f0fdac4c80d75c99862729d/psycopg_binary-3.3.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:42961609ac07c232a427da7c87a468d3c82fee6762c220f38e37cfdacb2b178d", size = 5151135, upload-time = "2026-02-18T16:50:24.82Z" }, + { url = "https://files.pythonhosted.org/packages/11/49/5309473b9803b207682095201d8708bbc7842ddf3f192488a69204e36455/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae07a3114313dd91fce686cab2f4c44af094398519af0e0f854bc707e1aeedf1", size = 6737315, upload-time = "2026-02-18T16:50:35.106Z" }, + { url = "https://files.pythonhosted.org/packages/d4/5d/03abe74ef34d460b33c4d9662bf6ec1dd38888324323c1a1752133c10377/psycopg_binary-3.3.3-cp313-cp313-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:d257c58d7b36a621dcce1d01476ad8b60f12d80eb1406aee4cf796f88b2ae482", size = 4979783, upload-time = "2026-02-18T16:50:42.067Z" }, + { url = "https://files.pythonhosted.org/packages/f0/6c/3fbf8e604e15f2f3752900434046c00c90bb8764305a1b81112bff30ba24/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:07c7211f9327d522c9c47560cae00a4ecf6687f4e02d779d035dd3177b41cb12", size = 4509023, upload-time = "2026-02-18T16:50:50.116Z" }, + { url = "https://files.pythonhosted.org/packages/9c/6b/1a06b43b7c7af756c80b67eac8bfaa51d77e68635a8a8d246e4f0bb7604a/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:8e7e9eca9b363dbedeceeadd8be97149d2499081f3c52d141d7cd1f395a91f83", size = 4185874, upload-time = "2026-02-18T16:50:55.97Z" }, + { url = "https://files.pythonhosted.org/packages/2b/d3/bf49e3dcaadba510170c8d111e5e69e5ae3f981c1554c5bb71c75ce354bb/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:cb85b1d5702877c16f28d7b92ba030c1f49ebcc9b87d03d8c10bf45a2f1c7508", size = 3925668, upload-time = "2026-02-18T16:51:03.299Z" }, + { url = "https://files.pythonhosted.org/packages/f8/92/0aac830ed6a944fe334404e1687a074e4215630725753f0e3e9a9a595b62/psycopg_binary-3.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4d4606c84d04b80f9138d72f1e28c6c02dc5ae0c7b8f3f8aaf89c681ce1cd1b1", size = 4234973, upload-time = "2026-02-18T16:51:09.097Z" }, + { url = "https://files.pythonhosted.org/packages/2e/96/102244653ee5a143ece5afe33f00f52fe64e389dfce8dbc87580c6d70d3d/psycopg_binary-3.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:74eae563166ebf74e8d950ff359be037b85723d99ca83f57d9b244a871d6c13b", size = 3551342, upload-time = "2026-02-18T16:51:13.892Z" }, + { url = "https://files.pythonhosted.org/packages/a2/71/7a57e5b12275fe7e7d84d54113f0226080423a869118419c9106c083a21c/psycopg_binary-3.3.3-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:497852c5eaf1f0c2d88ab74a64a8097c099deac0c71de1cbcf18659a8a04a4b2", size = 4607368, upload-time = "2026-02-18T16:51:19.295Z" }, + { url = "https://files.pythonhosted.org/packages/c7/04/cb834f120f2b2c10d4003515ef9ca9d688115b9431735e3936ae48549af8/psycopg_binary-3.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:258d1ea53464d29768bf25930f43291949f4c7becc706f6e220c515a63a24edd", size = 4687047, upload-time = "2026-02-18T16:51:23.84Z" }, + { url = "https://files.pythonhosted.org/packages/40/e9/47a69692d3da9704468041aa5ed3ad6fc7f6bb1a5ae788d261a26bbca6c7/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:111c59897a452196116db12e7f608da472fbff000693a21040e35fc978b23430", size = 5487096, upload-time = "2026-02-18T16:51:29.645Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b6/0e0dd6a2f802864a4ae3dbadf4ec620f05e3904c7842b326aafc43e5f464/psycopg_binary-3.3.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:17bb6600e2455993946385249a3c3d0af52cd70c1c1cdbf712e9d696d0b0bf1b", size = 5168720, upload-time = "2026-02-18T16:51:36.499Z" }, + { url = "https://files.pythonhosted.org/packages/6f/0d/977af38ac19a6b55d22dff508bd743fd7c1901e1b73657e7937c7cccb0a3/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:642050398583d61c9856210568eb09a8e4f2fe8224bf3be21b67a370e677eead", size = 6762076, upload-time = "2026-02-18T16:51:43.167Z" }, + { url = "https://files.pythonhosted.org/packages/34/40/912a39d48322cf86895c0eaf2d5b95cb899402443faefd4b09abbba6b6e1/psycopg_binary-3.3.3-cp314-cp314-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:533efe6dc3a7cba5e2a84e38970786bb966306863e45f3db152007e9f48638a6", size = 4997623, upload-time = "2026-02-18T16:51:47.707Z" }, + { url = "https://files.pythonhosted.org/packages/98/0c/c14d0e259c65dc7be854d926993f151077887391d5a081118907a9d89603/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5958dbf28b77ce2033482f6cb9ef04d43f5d8f4b7636e6963d5626f000efb23e", size = 4532096, upload-time = "2026-02-18T16:51:51.421Z" }, + { url = "https://files.pythonhosted.org/packages/39/21/8b7c50a194cfca6ea0fd4d1f276158307785775426e90700ab2eba5cd623/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:a6af77b6626ce92b5817bf294b4d45ec1a6161dba80fc2d82cdffdd6814fd023", size = 4208884, upload-time = "2026-02-18T16:51:57.336Z" }, + { url = "https://files.pythonhosted.org/packages/c7/2c/a4981bf42cf30ebba0424971d7ce70a222ae9b82594c42fc3f2105d7b525/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:47f06fcbe8542b4d96d7392c476a74ada521c5aebdb41c3c0155f6595fc14c8d", size = 3944542, upload-time = "2026-02-18T16:52:04.266Z" }, + { url = "https://files.pythonhosted.org/packages/60/e9/b7c29b56aa0b85a4e0c4d89db691c1ceef08f46a356369144430c155a2f5/psycopg_binary-3.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7800e6c6b5dc4b0ca7cc7370f770f53ac83886b76afda0848065a674231e856", size = 4254339, upload-time = "2026-02-18T16:52:10.444Z" }, + { url = "https://files.pythonhosted.org/packages/98/5a/291d89f44d3820fffb7a04ebc8f3ef5dda4f542f44a5daea0c55a84abf45/psycopg_binary-3.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:165f22ab5a9513a3d7425ffb7fcc7955ed8ccaeef6d37e369d6cc1dff1582383", size = 3652796, upload-time = "2026-02-18T16:52:14.02Z" }, +] + +[[package]] +name = "psycopg-pool" +version = "3.3.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/56/9a/9470d013d0d50af0da9c4251614aeb3c1823635cab3edc211e3839db0bcf/psycopg_pool-3.3.0.tar.gz", hash = "sha256:fa115eb2860bd88fce1717d75611f41490dec6135efb619611142b24da3f6db5", size = 31606, upload-time = "2025-12-01T11:34:33.11Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e7/c3/26b8a0908a9db249de3b4169692e1c7c19048a9bc41a4d3209cee7dbb758/psycopg_pool-3.3.0-py3-none-any.whl", hash = "sha256:2e44329155c410b5e8666372db44276a8b1ebd8c90f1c3026ebba40d4bc81063", size = 39995, upload-time = "2025-12-01T11:34:29.761Z" }, +] + [[package]] name = "pyasn1" version = "0.6.3" @@ -3545,6 +3825,57 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b8/a7/903429719d39ac2c42aa37086c90e816d883560f13c87d51f09a2962e021/speechrecognition-3.14.5-py3-none-any.whl", hash = "sha256:0c496d74e9f29b1daadb0d96f5660f47563e42bf09316dacdd57094c5095977e", size = 32856308, upload-time = "2025-12-31T11:25:41.161Z" }, ] +[[package]] +name = "sqlalchemy" +version = "2.0.48" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "greenlet", marker = "platform_machine == 'AMD64' or platform_machine == 'WIN32' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'ppc64le' or platform_machine == 'win32' or platform_machine == 'x86_64'" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/1f/73/b4a9737255583b5fa858e0bb8e116eb94b88c910164ed2ed719147bde3de/sqlalchemy-2.0.48.tar.gz", hash = "sha256:5ca74f37f3369b45e1f6b7b06afb182af1fd5dde009e4ffd831830d98cbe5fe7", size = 9886075, upload-time = "2026-03-02T15:28:51.474Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ef/91/a42ae716f8925e9659df2da21ba941f158686856107a61cc97a95e7647a3/sqlalchemy-2.0.48-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:348174f228b99f33ca1f773e85510e08927620caa59ffe7803b37170df30332b", size = 2155737, upload-time = "2026-03-02T15:49:13.207Z" }, + { url = "https://files.pythonhosted.org/packages/b9/52/f75f516a1f3888f027c1cfb5d22d4376f4b46236f2e8669dcb0cddc60275/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53667b5f668991e279d21f94ccfa6e45b4e3f4500e7591ae59a8012d0f010dcb", size = 3337020, upload-time = "2026-03-02T15:50:34.547Z" }, + { url = "https://files.pythonhosted.org/packages/37/9a/0c28b6371e0cdcb14f8f1930778cb3123acfcbd2c95bb9cf6b4a2ba0cce3/sqlalchemy-2.0.48-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:34634e196f620c7a61d18d5cf7dc841ca6daa7961aed75d532b7e58b309ac894", size = 3349983, upload-time = "2026-03-02T15:53:25.542Z" }, + { url = "https://files.pythonhosted.org/packages/1c/46/0aee8f3ff20b1dcbceb46ca2d87fcc3d48b407925a383ff668218509d132/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:546572a1793cc35857a2ffa1fe0e58571af1779bcc1ffa7c9fb0839885ed69a9", size = 3279690, upload-time = "2026-03-02T15:50:36.277Z" }, + { url = "https://files.pythonhosted.org/packages/ce/8c/a957bc91293b49181350bfd55e6dfc6e30b7f7d83dc6792d72043274a390/sqlalchemy-2.0.48-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:07edba08061bc277bfdc772dd2a1a43978f5a45994dd3ede26391b405c15221e", size = 3314738, upload-time = "2026-03-02T15:53:27.519Z" }, + { url = "https://files.pythonhosted.org/packages/4b/44/1d257d9f9556661e7bdc83667cc414ba210acfc110c82938cb3611eea58f/sqlalchemy-2.0.48-cp312-cp312-win32.whl", hash = "sha256:908a3fa6908716f803b86896a09a2c4dde5f5ce2bb07aacc71ffebb57986ce99", size = 2115546, upload-time = "2026-03-02T15:54:31.591Z" }, + { url = "https://files.pythonhosted.org/packages/f2/af/c3c7e1f3a2b383155a16454df62ae8c62a30dd238e42e68c24cebebbfae6/sqlalchemy-2.0.48-cp312-cp312-win_amd64.whl", hash = "sha256:68549c403f79a8e25984376480959975212a670405e3913830614432b5daa07a", size = 2142484, upload-time = "2026-03-02T15:54:34.072Z" }, + { url = "https://files.pythonhosted.org/packages/d1/c6/569dc8bf3cd375abc5907e82235923e986799f301cd79a903f784b996fca/sqlalchemy-2.0.48-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:e3070c03701037aa418b55d36532ecb8f8446ed0135acb71c678dbdf12f5b6e4", size = 2152599, upload-time = "2026-03-02T15:49:14.41Z" }, + { url = "https://files.pythonhosted.org/packages/6d/ff/f4e04a4bd5a24304f38cb0d4aa2ad4c0fb34999f8b884c656535e1b2b74c/sqlalchemy-2.0.48-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2645b7d8a738763b664a12a1542c89c940daa55196e8d73e55b169cc5c99f65f", size = 3278825, upload-time = "2026-03-02T15:50:38.269Z" }, + { url = "https://files.pythonhosted.org/packages/fe/88/cb59509e4668d8001818d7355d9995be90c321313078c912420603a7cb95/sqlalchemy-2.0.48-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b19151e76620a412c2ac1c6f977ab1b9fa7ad43140178345136456d5265b32ed", size = 3295200, upload-time = "2026-03-02T15:53:29.366Z" }, + { url = "https://files.pythonhosted.org/packages/87/dc/1609a4442aefd750ea2f32629559394ec92e89ac1d621a7f462b70f736ff/sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:5b193a7e29fd9fa56e502920dca47dffe60f97c863494946bd698c6058a55658", size = 3226876, upload-time = "2026-03-02T15:50:39.802Z" }, + { url = "https://files.pythonhosted.org/packages/37/c3/6ae2ab5ea2fa989fbac4e674de01224b7a9d744becaf59bb967d62e99bed/sqlalchemy-2.0.48-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:36ac4ddc3d33e852da9cb00ffb08cea62ca05c39711dc67062ca2bb1fae35fd8", size = 3265045, upload-time = "2026-03-02T15:53:31.421Z" }, + { url = "https://files.pythonhosted.org/packages/6f/82/ea4665d1bb98c50c19666e672f21b81356bd6077c4574e3d2bbb84541f53/sqlalchemy-2.0.48-cp313-cp313-win32.whl", hash = "sha256:389b984139278f97757ea9b08993e7b9d1142912e046ab7d82b3fbaeb0209131", size = 2113700, upload-time = "2026-03-02T15:54:35.825Z" }, + { url = "https://files.pythonhosted.org/packages/b7/2b/b9040bec58c58225f073f5b0c1870defe1940835549dafec680cbd58c3c3/sqlalchemy-2.0.48-cp313-cp313-win_amd64.whl", hash = "sha256:d612c976cbc2d17edfcc4c006874b764e85e990c29ce9bd411f926bbfb02b9a2", size = 2139487, upload-time = "2026-03-02T15:54:37.079Z" }, + { url = "https://files.pythonhosted.org/packages/f4/f4/7b17bd50244b78a49d22cc63c969d71dc4de54567dc152a9b46f6fae40ce/sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:69f5bc24904d3bc3640961cddd2523e361257ef68585d6e364166dfbe8c78fae", size = 3558851, upload-time = "2026-03-02T15:57:48.607Z" }, + { url = "https://files.pythonhosted.org/packages/20/0d/213668e9aca61d370f7d2a6449ea4ec699747fac67d4bda1bb3d129025be/sqlalchemy-2.0.48-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fd08b90d211c086181caed76931ecfa2bdfc83eea3cfccdb0f82abc6c4b876cb", size = 3525525, upload-time = "2026-03-02T16:04:38.058Z" }, + { url = "https://files.pythonhosted.org/packages/85/d7/a84edf412979e7d59c69b89a5871f90a49228360594680e667cb2c46a828/sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:1ccd42229aaac2df431562117ac7e667d702e8e44afdb6cf0e50fa3f18160f0b", size = 3466611, upload-time = "2026-03-02T15:57:50.759Z" }, + { url = "https://files.pythonhosted.org/packages/86/55/42404ce5770f6be26a2b0607e7866c31b9a4176c819e9a7a5e0a055770be/sqlalchemy-2.0.48-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:f0dcbc588cd5b725162c076eb9119342f6579c7f7f55057bb7e3c6ff27e13121", size = 3475812, upload-time = "2026-03-02T16:04:40.092Z" }, + { url = "https://files.pythonhosted.org/packages/ae/ae/29b87775fadc43e627cf582fe3bda4d02e300f6b8f2747c764950d13784c/sqlalchemy-2.0.48-cp313-cp313t-win32.whl", hash = "sha256:9764014ef5e58aab76220c5664abb5d47d5bc858d9debf821e55cfdd0f128485", size = 2141335, upload-time = "2026-03-02T15:52:51.518Z" }, + { url = "https://files.pythonhosted.org/packages/91/44/f39d063c90f2443e5b46ec4819abd3d8de653893aae92df42a5c4f5843de/sqlalchemy-2.0.48-cp313-cp313t-win_amd64.whl", hash = "sha256:e2f35b4cccd9ed286ad62e0a3c3ac21e06c02abc60e20aa51a3e305a30f5fa79", size = 2173095, upload-time = "2026-03-02T15:52:52.79Z" }, + { url = "https://files.pythonhosted.org/packages/f7/b3/f437eaa1cf028bb3c927172c7272366393e73ccd104dcf5b6963f4ab5318/sqlalchemy-2.0.48-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:e2d0d88686e3d35a76f3e15a34e8c12d73fc94c1dea1cd55782e695cc14086dd", size = 2154401, upload-time = "2026-03-02T15:49:17.24Z" }, + { url = "https://files.pythonhosted.org/packages/6c/1c/b3abdf0f402aa3f60f0df6ea53d92a162b458fca2321d8f1f00278506402/sqlalchemy-2.0.48-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:49b7bddc1eebf011ea5ab722fdbe67a401caa34a350d278cc7733c0e88fecb1f", size = 3274528, upload-time = "2026-03-02T15:50:41.489Z" }, + { url = "https://files.pythonhosted.org/packages/f2/5e/327428a034407651a048f5e624361adf3f9fbac9d0fa98e981e9c6ff2f5e/sqlalchemy-2.0.48-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:426c5ca86415d9b8945c7073597e10de9644802e2ff502b8e1f11a7a2642856b", size = 3279523, upload-time = "2026-03-02T15:53:32.962Z" }, + { url = "https://files.pythonhosted.org/packages/2a/ca/ece73c81a918add0965b76b868b7b5359e068380b90ef1656ee995940c02/sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:288937433bd44e3990e7da2402fabc44a3c6c25d3704da066b85b89a85474ae0", size = 3224312, upload-time = "2026-03-02T15:50:42.996Z" }, + { url = "https://files.pythonhosted.org/packages/88/11/fbaf1ae91fa4ee43f4fe79661cead6358644824419c26adb004941bdce7c/sqlalchemy-2.0.48-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8183dc57ae7d9edc1346e007e840a9f3d6aa7b7f165203a99e16f447150140d2", size = 3246304, upload-time = "2026-03-02T15:53:34.937Z" }, + { url = "https://files.pythonhosted.org/packages/fa/a8/5fb0deb13930b4f2f698c5541ae076c18981173e27dd00376dbaea7a9c82/sqlalchemy-2.0.48-cp314-cp314-win32.whl", hash = "sha256:1182437cb2d97988cfea04cf6cdc0b0bb9c74f4d56ec3d08b81e23d621a28cc6", size = 2116565, upload-time = "2026-03-02T15:54:38.321Z" }, + { url = "https://files.pythonhosted.org/packages/95/7e/e83615cb63f80047f18e61e31e8e32257d39458426c23006deeaf48f463b/sqlalchemy-2.0.48-cp314-cp314-win_amd64.whl", hash = "sha256:144921da96c08feb9e2b052c5c5c1d0d151a292c6135623c6b2c041f2a45f9e0", size = 2142205, upload-time = "2026-03-02T15:54:39.831Z" }, + { url = "https://files.pythonhosted.org/packages/83/e3/69d8711b3f2c5135e9cde5f063bc1605860f0b2c53086d40c04017eb1f77/sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5aee45fd2c6c0f2b9cdddf48c48535e7471e42d6fb81adfde801da0bd5b93241", size = 3563519, upload-time = "2026-03-02T15:57:52.387Z" }, + { url = "https://files.pythonhosted.org/packages/f8/4f/a7cce98facca73c149ea4578981594aaa5fd841e956834931de503359336/sqlalchemy-2.0.48-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cddca31edf8b0653090cbb54562ca027c421c58ddde2c0685f49ff56a1690e0", size = 3528611, upload-time = "2026-03-02T16:04:42.097Z" }, + { url = "https://files.pythonhosted.org/packages/cd/7d/5936c7a03a0b0cb0fa0cc425998821c6029756b0855a8f7ee70fba1de955/sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:7a936f1bb23d370b7c8cc079d5fce4c7d18da87a33c6744e51a93b0f9e97e9b3", size = 3472326, upload-time = "2026-03-02T15:57:54.423Z" }, + { url = "https://files.pythonhosted.org/packages/f4/33/cea7dfc31b52904efe3dcdc169eb4514078887dff1f5ae28a7f4c5d54b3c/sqlalchemy-2.0.48-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:e004aa9248e8cb0a5f9b96d003ca7c1c0a5da8decd1066e7b53f59eb8ce7c62b", size = 3478453, upload-time = "2026-03-02T16:04:44.584Z" }, + { url = "https://files.pythonhosted.org/packages/c8/95/32107c4d13be077a9cae61e9ae49966a35dc4bf442a8852dd871db31f62e/sqlalchemy-2.0.48-cp314-cp314t-win32.whl", hash = "sha256:b8438ec5594980d405251451c5b7ea9aa58dda38eb7ac35fb7e4c696712ee24f", size = 2147209, upload-time = "2026-03-02T15:52:54.274Z" }, + { url = "https://files.pythonhosted.org/packages/d2/d7/1e073da7a4bc645eb83c76067284a0374e643bc4be57f14cc6414656f92c/sqlalchemy-2.0.48-cp314-cp314t-win_amd64.whl", hash = "sha256:d854b3970067297f3a7fbd7a4683587134aa9b3877ee15aa29eea478dc68f933", size = 2182198, upload-time = "2026-03-02T15:52:55.606Z" }, + { url = "https://files.pythonhosted.org/packages/46/2c/9664130905f03db57961b8980b05cab624afd114bf2be2576628a9f22da4/sqlalchemy-2.0.48-py3-none-any.whl", hash = "sha256:a66fe406437dd65cacd96a72689a3aaaecaebbcd62d81c5ac1c0fdbeac835096", size = 1940202, upload-time = "2026-03-02T15:52:43.285Z" }, +] + +[package.optional-dependencies] +asyncio = [ + { name = "greenlet" }, +] + [[package]] name = "sqlite-vec" version = "0.1.6" diff --git a/config.example.yaml b/config.example.yaml index c22ad9b9d..aa78cc67c 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -708,42 +708,79 @@ memory: # ============================================================================ # Allow the agent to autonomously create and improve skills in skills/custom/. skill_evolution: - enabled: false # Set to true to allow agent-managed writes under skills/custom - moderation_model_name: null # Model for LLM-based security scanning (null = use default model) + enabled: false # Set to true to allow agent-managed writes under skills/custom + moderation_model_name: null # Model for LLM-based security scanning (null = use default model) # ============================================================================ -# Checkpointer Configuration +# Checkpointer Configuration (DEPRECATED — use `database` instead) # ============================================================================ -# Configure state persistence for the embedded DeerFlowClient. -# The LangGraph Server manages its own state persistence separately -# via the server infrastructure (this setting does not affect it). +# Legacy standalone checkpointer config. Kept for backward compatibility. +# Prefer the unified `database` section below, which drives BOTH the +# LangGraph checkpointer AND DeerFlow application data (runs, feedback, +# events) from a single backend setting. # -# When configured, DeerFlowClient will automatically use this checkpointer, -# enabling multi-turn conversations to persist across process restarts. +# If both `checkpointer` and `database` are present, `checkpointer` +# takes precedence for LangGraph state persistence only. # -# Supported types: -# memory - In-process only. State is lost when the process exits. (default) -# sqlite - File-based SQLite persistence. Survives restarts. -# Requires: uv add langgraph-checkpoint-sqlite -# postgres - PostgreSQL persistence. Suitable for multi-process deployments. -# Requires: uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool -# -# Examples: -# -# In-memory (default when omitted — no persistence): # checkpointer: -# type: memory +# type: sqlite +# connection_string: checkpoints.db # -# SQLite (file-based, single-process): -checkpointer: - type: sqlite - connection_string: checkpoints.db -# -# PostgreSQL (multi-process, production): # checkpointer: # type: postgres # connection_string: postgresql://user:password@localhost:5432/deerflow +# ============================================================================ +# Database +# ============================================================================ +# Unified storage backend for LangGraph checkpointer and DeerFlow +# application data (runs, threads metadata, feedback, etc.). +# +# backend: memory -- No persistence, data lost on restart (default) +# backend: sqlite -- Single-node deployment, files in sqlite_dir +# backend: postgres -- Production multi-node deployment +# +# SQLite mode automatically uses separate .db files for checkpointer +# and application data to avoid write-lock contention. +# +# Postgres mode: put your connection URL in .env as DATABASE_URL, +# then reference it here with $DATABASE_URL. +# Install the driver first: +# Local: uv sync --extra postgres +# Docker: UV_EXTRAS=postgres docker compose build +# +# NOTE: When both `checkpointer` and `database` are configured, +# `checkpointer` takes precedence for LangGraph state persistence. +# If you use `database`, you can remove the `checkpointer` section. +# database: +# backend: sqlite +# sqlite_dir: .deer-flow/data +# +# database: +# backend: postgres +# postgres_url: $DATABASE_URL +database: + backend: sqlite + sqlite_dir: .deer-flow/data + +# ============================================================================ +# Run Events Configuration +# ============================================================================ +# Storage backend for run events (messages + execution traces). +# +# backend: memory -- No persistence, data lost on restart (default) +# backend: db -- SQL database via ORM, full query capability (production) +# backend: jsonl -- Append-only JSONL files (lightweight single-node persistence) +# +# run_events: +# backend: memory +# max_trace_content: 10240 # Truncation threshold for trace content (db backend, bytes) +# track_token_usage: true # Accumulate token counts to RunRow +run_events: + backend: memory + max_trace_content: 10240 + track_token_usage: true + # ============================================================================ # IM Channels Configuration # ============================================================================ diff --git a/docker/docker-compose.yaml b/docker/docker-compose.yaml index 38337c7df..31cb673da 100644 --- a/docker/docker-compose.yaml +++ b/docker/docker-compose.yaml @@ -73,6 +73,7 @@ services: APT_MIRROR: ${APT_MIRROR:-} UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20} UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple} + UV_EXTRAS: ${UV_EXTRAS:-} container_name: deer-flow-gateway command: sh -c "cd backend && PYTHONPATH=. uv run uvicorn app.gateway.app:app --host 0.0.0.0 --port 8001 --workers ${GATEWAY_WORKERS:-4}" volumes: @@ -126,6 +127,7 @@ services: APT_MIRROR: ${APT_MIRROR:-} UV_IMAGE: ${UV_IMAGE:-ghcr.io/astral-sh/uv:0.7.20} UV_INDEX_URL: ${UV_INDEX_URL:-https://pypi.org/simple} + UV_EXTRAS: ${UV_EXTRAS:-} container_name: deer-flow-langgraph command: sh -c 'cd /app/backend && args="--no-browser --no-reload --host 0.0.0.0 --port 2024 --n-jobs-per-worker $${LANGGRAPH_JOBS_PER_WORKER:-10}" && if [ "$${LANGGRAPH_ALLOW_BLOCKING:-0}" = "1" ]; then args="$$args --allow-blocking"; fi && uv run langgraph dev $$args' volumes: