diff --git a/frontend/src/content/en/application/agents-and-threads.mdx b/frontend/src/content/en/application/agents-and-threads.mdx index 223bb2e77..da6e7ca06 100644 --- a/frontend/src/content/en/application/agents-and-threads.mdx +++ b/frontend/src/content/en/application/agents-and-threads.mdx @@ -1,3 +1,5 @@ +import { Callout, Cards, Steps } from "nextra/components"; + # Agents and Threads DeerFlow App supports multiple named agents and maintains conversation state across sessions through threads and checkpointing. @@ -104,11 +106,10 @@ checkpointer: ``` - DeerFlow App uses the checkpointer setting in{" "} - config.yaml to persist thread state through the Gateway runtime - (via make_checkpointer() in - deerflow.agents.checkpointer.async_provider). Configure this - section if you want threads to survive process restarts. + The LangGraph Server manages its own state separately. The + checkpointer setting in config.yaml applies to the + embedded DeerFlowClient (used in direct Python integrations), not + to the LangGraph Server deployment used by DeerFlow App. ### Thread data storage diff --git a/frontend/src/content/en/application/configuration.mdx b/frontend/src/content/en/application/configuration.mdx index 79de38b76..00f387ef6 100644 --- a/frontend/src/content/en/application/configuration.mdx +++ b/frontend/src/content/en/application/configuration.mdx @@ -1,3 +1,268 @@ +import { Callout, Cards, Tabs } from "nextra/components"; + # Configuration -TBD +DeerFlow App is configured through two files and a set of environment variables. This page covers the application-level configuration that most operators need to set up before deploying. + +## Configuration files + +| File | Purpose | +|---|---| +| `config.yaml` | Backend configuration: models, sandbox, tools, skills, memory, and all Harness settings | +| `extensions_config.json` | MCP servers and skill enable/disable state (managed by the App UI and Gateway API) | + +Frontend environment variables control the Next.js build and runtime behavior. + +## config.yaml + +Start by copying the example: + +```bash +cp config.example.yaml config.yaml +``` + +The most important sections for application configuration are: + +### Models + +Configure the LLM providers the agent can use. At least one model is required. + + + +```yaml +models: + - name: gpt-4o + use: langchain_openai:ChatOpenAI + model: gpt-4o + api_key: $OPENAI_API_KEY + request_timeout: 600.0 + max_retries: 2 + supports_vision: true +``` + + +```yaml +models: + - name: claude-3-5-sonnet + use: langchain_anthropic:ChatAnthropic + model: claude-3-5-sonnet-20241022 + api_key: $ANTHROPIC_API_KEY + default_request_timeout: 600.0 + max_retries: 2 + max_tokens: 8192 + supports_vision: true + supports_thinking: true + when_thinking_enabled: + thinking: + type: enabled + when_thinking_disabled: + thinking: + type: disabled +``` + + +```yaml +models: + - name: deepseek-v3 + use: deerflow.models.patched_deepseek:PatchedChatDeepSeek + model: deepseek-reasoner + api_key: $DEEPSEEK_API_KEY + timeout: 600.0 + max_retries: 2 + supports_thinking: true + when_thinking_enabled: + extra_body: + thinking: + type: enabled + when_thinking_disabled: + extra_body: + thinking: + type: disabled +``` + + +```yaml +models: + - name: qwen3-local + use: langchain_ollama:ChatOllama + model: qwen3:32b + base_url: http://localhost:11434 # No /v1 suffix — uses native Ollama API + num_predict: 8192 + temperature: 0.7 + reasoning: true + supports_thinking: true + supports_vision: false +``` + +Install Ollama provider: `cd backend && uv add 'deerflow-harness[ollama]'` + + + Use langchain_ollama:ChatOllama (not the OpenAI-compatible + endpoint) for Ollama models. The native API correctly separates thinking + content; the OpenAI-compatible endpoint may flatten or drop it. + + + +```yaml +models: + - name: gemini-2.5-pro + use: langchain_google_genai:ChatGoogleGenerativeAI + model: gemini-2.5-pro + gemini_api_key: $GEMINI_API_KEY + timeout: 600.0 + max_retries: 2 + max_tokens: 8192 + supports_vision: true +``` + + + +### Sandbox + +Choose the execution environment for agent file and command operations: + + + +```yaml +sandbox: + use: deerflow.sandbox.local:LocalSandboxProvider + allow_host_bash: false # set true only for trusted single-user workflows +``` + + +```yaml +sandbox: + use: deerflow.community.aio_sandbox:AioSandboxProvider + image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest + replicas: 3 + idle_timeout: 600 +``` + +Install: `cd backend && uv add 'deerflow-harness[aio-sandbox]'` + + +```yaml +sandbox: + use: deerflow.community.aio_sandbox:AioSandboxProvider + provisioner_url: http://provisioner:8002 +``` + + + +### Tools + +Configure which tools the agent has access to. The defaults use DuckDuckGo (no API key) and Jina AI for web operations: + +```yaml +tools: + # Web search (choose one) + - use: deerflow.community.ddg_search.tools:web_search_tool # default, no key required + # - use: deerflow.community.tavily.tools:web_search_tool + # api_key: $TAVILY_API_KEY + + # Web fetch (choose one) + - use: deerflow.community.jina_ai.tools:web_fetch_tool + + # Image search + - use: deerflow.community.image_search.tools:image_search_tool + + # File operations + - use: deerflow.sandbox.tools:ls_tool + - use: deerflow.sandbox.tools:read_file_tool + - use: deerflow.sandbox.tools:glob_tool + - use: deerflow.sandbox.tools:grep_tool + - use: deerflow.sandbox.tools:write_file_tool + - use: deerflow.sandbox.tools:str_replace_tool + - use: deerflow.sandbox.tools:bash_tool +``` + +### Thread state persistence (checkpointer) + +By default, DeerFlow uses an SQLite checkpointer for thread state persistence: + +```yaml +checkpointer: + type: sqlite + connection_string: checkpoints.db # stored in backend/.deer-flow/ +``` + +For production deployments with multiple processes: + +```yaml +checkpointer: + type: postgres + connection_string: postgresql://user:password@localhost:5432/deerflow +``` + +Install PostgreSQL support: `cd backend && uv add langgraph-checkpoint-postgres psycopg[binary] psycopg-pool` + +For in-memory only (state lost on restart): + +```yaml +checkpointer: + type: memory +``` + +### Memory + +```yaml +memory: + enabled: true + storage_path: memory.json + debounce_seconds: 30 + max_facts: 100 + injection_enabled: true + max_injection_tokens: 2000 +``` + +## Frontend environment variables + +Set these before running `pnpm build` or starting the frontend in production: + +| Variable | Required | Description | +|---|---|---| +| `BETTER_AUTH_SECRET` | **Required** in production | Secret for session signing. Use `openssl rand -base64 32`. | +| `BETTER_AUTH_URL` | Recommended | Public-facing base URL (e.g., `https://your-domain.com`) | +| `SKIP_ENV_VALIDATION` | Optional | Set to `1` to skip env validation during build (not recommended) | +| `NEXT_PUBLIC_API_URL` | Optional | Override the API base URL for the frontend | + +In development, set these in a `.env` file at the repo root: + +```bash +BETTER_AUTH_SECRET=your-strong-secret-here-min-32-chars +``` + +## extensions_config.json + +This file manages MCP server connections and skill enable/disable state. It is created automatically when you first manage extensions through the App UI or Gateway API. + +Manual example: + +```json +{ + "mcpServers": { + "my-server": { + "command": "npx", + "args": ["-y", "@my-org/my-mcp-server"], + "enabled": true + } + }, + "skills": { + "deep-research": { "enabled": true }, + "data-analysis": { "enabled": true } + } +} +``` + +## Config upgrade + +When the config schema changes, `config_version` is bumped. To merge new fields into your existing config without losing customizations: + +```bash +make config-upgrade +``` + + + + + diff --git a/frontend/src/content/en/application/deployment-guide.mdx b/frontend/src/content/en/application/deployment-guide.mdx index 05fe72289..59a60d72c 100644 --- a/frontend/src/content/en/application/deployment-guide.mdx +++ b/frontend/src/content/en/application/deployment-guide.mdx @@ -1,3 +1,205 @@ +import { Callout, Cards, Steps, Tabs } from "nextra/components"; + # Deployment Guide -TBD +This guide covers all supported deployment methods for DeerFlow App: local development, Docker Compose, and production with Kubernetes-managed sandboxes. + +## Local development deployment + +The local workflow is the fastest way to run DeerFlow. All services run as native processes on your machine. + + + +```bash +make dev +``` + +Services started: + +| Service | Port | Description | +|---|---|---| +| LangGraph | 2024 | DeerFlow Harness runtime | +| Gateway API | 8001 | FastAPI backend | +| Frontend | 3000 | Next.js UI | +| nginx | 2026 | Unified reverse proxy | + +Access the app at **http://localhost:2026**. + + +```bash +make stop +``` + +Stops all four services. Safe to run even if a service is not running. + + +``` +logs/langgraph.log # Agent runtime logs +logs/gateway.log # API gateway logs +logs/frontend.log # Next.js dev server logs +logs/nginx.log # nginx access/error logs +``` + +Tail a log in real time: +```bash +tail -f logs/langgraph.log +``` + + + +## Docker Compose deployment + +Docker Compose runs all services in containers. Use this for a more production-like local setup or for team environments. + +### Prerequisites + +- Docker (or Docker Desktop / OrbStack on macOS) +- A configured `config.yaml` at the repo root + +### Development compose + +```bash +# Set the absolute path to your deer-flow repo root +export DEER_FLOW_ROOT=/path/to/deer-flow + +docker compose -f docker/docker-compose-dev.yaml up --build +``` + +Services: nginx, frontend, gateway, langgraph, and optionally provisioner (for K8s-managed sandboxes). + +Access the app at **http://localhost:2026**. + +### Environment variables + +Create a `.env` file in the repo root for secrets and runtime configuration: + +```bash +# .env +OPENAI_API_KEY=sk-... +DEER_FLOW_ROOT=/absolute/path/to/deer-flow +BETTER_AUTH_SECRET=your-secret-here-min-32-chars +``` + +The `docker-compose*.yaml` files include an `env_file: ../.env` directive that loads this automatically. + + + Always set BETTER_AUTH_SECRET to a strong random string before + deploying. Without it, the frontend build uses a default that is publicly + known. + + +### Data persistence + +Thread data is stored in `backend/.deer-flow/threads/`. In Docker deployments, this directory is bind-mounted into the langgraph container. + +To avoid data loss when containers are recreated: + +1. Set `DEER_FLOW_ROOT` to the absolute repo root path (or a stable host path). +2. Verify the `threads/` and `skills/` directories are mounted correctly. + +For production, use a named volume or a Persistent Volume Claim (PVC) instead of a host bind-mount. + +## Production deployment considerations + +### Sandbox mode selection + +| Sandbox | Use case | +|---|---| +| `LocalSandboxProvider` | Single-user, trusted local workflows | +| `AioSandboxProvider` (Docker) | Multi-user, moderate isolation requirement | +| `AioSandboxProvider` + K8s Provisioner | Production, strong isolation, multi-user | + +For any deployment with more than one concurrent user, use a container-based sandbox to prevent users from interfering with each other's execution environments. + +### K8s Provisioner setup + +The provisioner manages sandbox Pods in a Kubernetes cluster. It is included in `docker/docker-compose-dev.yaml`. + + + +#### Configure the provisioner + +Set required environment variables in your `.env` or compose override: + +```bash +K8S_NAMESPACE=deer-flow +SANDBOX_IMAGE=enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest +DEER_FLOW_ROOT=/absolute/path/to/deer-flow +``` + +#### Configure the sandbox provider + +```yaml +# config.yaml +sandbox: + use: deerflow.community.aio_sandbox:AioSandboxProvider + provisioner_url: http://provisioner:8002 +``` + +#### Configure data persistence + +For production, use PVCs instead of hostPath volumes: + +```bash +# In .env or compose environment +USERDATA_PVC_NAME=deer-flow-userdata-pvc +SKILLS_PVC_NAME=deer-flow-skills-pvc +``` + +When `USERDATA_PVC_NAME` is set, the provisioner automatically uses subPath (`threads/{thread_id}/user-data`) so each thread gets its own directory in the PVC. + + + +### nginx configuration + +nginx routes all traffic. Key environment variables that control routing: + +| Variable | Default | Description | +|---|---|---| +| `LANGGRAPH_UPSTREAM` | `langgraph:2024` | LangGraph service address | +| `LANGGRAPH_REWRITE` | `/` | URL rewrite prefix for LangGraph routes | + +These are set in the Docker Compose environment and processed by `envsubst` at container startup. + +### Authentication + +DeerFlow App uses [Better Auth](https://www.better-auth.com/) for session management. In production: + +1. Set `BETTER_AUTH_SECRET` to a strong random string (minimum 32 characters). +2. Set `BETTER_AUTH_URL` to your public-facing URL (e.g., `https://your-domain.com`). + +```bash +# Generate a secret +openssl rand -base64 32 +``` + +### Resource recommendations + +| Service | Minimum | Recommended | +|---|---|---| +| LangGraph (agent runtime) | 2 vCPU, 4 GB RAM | 4 vCPU, 8 GB RAM | +| Gateway | 0.5 vCPU, 512 MB | 1 vCPU, 1 GB | +| Frontend | 0.5 vCPU, 512 MB | 1 vCPU, 1 GB | +| Sandbox container (per session) | 1 vCPU, 1 GB | 2 vCPU, 2 GB | + +## Deployment verification + +After starting, verify the deployment: + +```bash +# Check Gateway health +curl http://localhost:8001/health + +# Check LangGraph health +curl http://localhost:2024/ok + +# List configured models (through nginx) +curl http://localhost:2026/api/models +``` + +A working deployment returns a `200` response from each endpoint. The `/api/models` call returns the list of models from your `config.yaml`. + + + + + diff --git a/frontend/src/content/en/application/index.mdx b/frontend/src/content/en/application/index.mdx index a951ea81b..8e2bf133b 100644 --- a/frontend/src/content/en/application/index.mdx +++ b/frontend/src/content/en/application/index.mdx @@ -1,3 +1,67 @@ +import { Callout, Cards } from "nextra/components"; + # DeerFlow App -TBD + + DeerFlow App is a complete Super Agent application built on top of DeerFlow + Harness. It packages the runtime capabilities into a ready-to-deploy product + with a web UI, API gateway, and operational tooling. + + +DeerFlow App is the reference implementation of what a production DeerFlow experience looks like. It assembles the Harness runtime, a web-based conversation workspace, an API gateway, and a reverse proxy into a single deployable system. + +## What the App provides + +| Capability | Description | +|---|---| +| **Web workspace** | Browser-based conversation UI with support for threads, artifacts, file uploads, and skill selection | +| **Custom agents** | Create and manage named agents with different models, skills, and tool sets | +| **Thread management** | Persistent conversation threads with checkpointing and history | +| **Streaming responses** | Real-time token streaming with thinking steps and tool call visibility | +| **Artifact viewer** | In-browser preview and download of files and outputs produced by the agent | +| **Extensions UI** | Enable/disable MCP servers and skills without editing config files | +| **Gateway API** | FastAPI-based REST API that bridges the frontend and the LangGraph runtime | + +## Architecture + +The DeerFlow App runs as four services behind a single nginx reverse proxy: + +``` + ┌──────────────────┐ + Browser → │ nginx :2026 │ + └──────────────────┘ + │ │ + ┌────────┘ └────────┐ + ▼ ▼ +┌──────────────────┐ ┌──────────────────────┐ +│ Frontend :3000 │ │ Gateway API :8001 │ +│ (Next.js) │ │ (FastAPI) │ +└──────────────────┘ └──────────────────────┘ + │ + ┌─────────┘ + ▼ + ┌──────────────────────┐ + │ LangGraph :2024 │ + │ (DeerFlow Harness) │ + └──────────────────────┘ +``` + +- **nginx**: routes requests — `/api/*` to the Gateway, LangGraph streaming endpoints to LangGraph directly, and everything else to the frontend. +- **Frontend** (Next.js + React): the browser UI. Communicates with both the Gateway and LangGraph. +- **Gateway** (FastAPI): handles API operations — model listing, agent CRUD, memory, extensions management, file uploads. +- **LangGraph**: the DeerFlow Harness runtime. Manages thread state, agent execution, and streaming. + +## Technology stack + +| Layer | Technology | +|---|---| +| Frontend | Next.js 16, React 19, TypeScript, pnpm | +| Gateway | FastAPI, Python 3.12, uvicorn | +| Agent runtime | LangGraph, LangChain, DeerFlow Harness | +| Reverse proxy | nginx | +| State persistence | LangGraph Server (default) + optional SQLite/PostgreSQL checkpointer | + + + + + diff --git a/frontend/src/content/en/application/operations-and-troubleshooting.mdx b/frontend/src/content/en/application/operations-and-troubleshooting.mdx index 6bf022503..7592e5fbf 100644 --- a/frontend/src/content/en/application/operations-and-troubleshooting.mdx +++ b/frontend/src/content/en/application/operations-and-troubleshooting.mdx @@ -1,3 +1,171 @@ +import { Callout, Cards } from "nextra/components"; + # Operations and Troubleshooting -TBD +This page covers day-to-day operational tasks and solutions to common problems when running DeerFlow App. + +## Log files + +All services write logs to the `logs/` directory when started with `make dev`: + +| File | Service | +|---|---| +| `logs/langgraph.log` | LangGraph / DeerFlow Harness runtime | +| `logs/gateway.log` | FastAPI Gateway API | +| `logs/frontend.log` | Next.js frontend dev server | +| `logs/nginx.log` | nginx reverse proxy | + +Tail logs in real time: + +```bash +tail -f logs/langgraph.log +tail -f logs/gateway.log +``` + +Adjust the runtime log level in `config.yaml`: + +```yaml +log_level: debug # debug | info | warning | error +``` + +## Health checks + +Verify each service is responding: + +```bash +# Gateway health +curl http://localhost:8001/health + +# LangGraph health +curl http://localhost:2024/ok + +# Through nginx (verifies full proxy chain) +curl http://localhost:2026/api/models +``` + +## Config upgrade + +When you pull a new version of DeerFlow, the `config_version` in `config.example.yaml` may be higher than your `config.yaml`. To merge new fields without losing your customizations: + +```bash +make config-upgrade +``` + +Check the current version in your config: + +```bash +grep config_version config.yaml +``` + +## Common problems + +### The app loads but the agent doesn't respond + +1. Check `logs/langgraph.log` for startup errors. +2. Verify your model is correctly configured in `config.yaml` with a valid API key. +3. Confirm the API key environment variable is set in the shell that ran `make dev`. +4. Test the model endpoint directly with `curl` to rule out network issues. + +--- + +### Model API errors in the agent response + +The agent reports an error like `"No chat models are configured"` or `"model not found"`: + +- Check that the `models:` section in `config.yaml` has at least one entry. +- Verify the `name:` field matches what you are requesting in the UI. +- Check that `api_key:` is referencing the correct environment variable and that the variable is set. + +--- + +### Frontend build fails with `BETTER_AUTH_SECRET` + +``` +Error: BETTER_AUTH_SECRET is required +``` + +Set the environment variable before building: + +```bash +export BETTER_AUTH_SECRET=$(openssl rand -base64 32) +pnpm build +``` + +Or set it in your `.env` file at the repo root. + +--- + +### Sandbox-related tool failures + +If file tools (`read_file`, `ls`, `bash`) fail with permission or path errors: + +1. For `LocalSandboxProvider`: check that `allow_host_bash` is set correctly and that the agent has read/write access to the thread data directory. +2. For container sandboxes: verify Docker (or Apple Container) is running and the sandbox image is accessible. +3. For K8s Provisioner: check that the provisioner service is healthy (`curl http://localhost:8002/health`) and that the K8s cluster is reachable. + +--- + +### K8s Provisioner not connecting + +``` +Connection refused: http://provisioner:8002 +``` + +- Ensure the provisioner service is running: `docker compose ps`. +- Check that `K8S_API_SERVER` is set correctly (should point to the K8s API from inside the container, not `localhost`). +- Verify `~/.kube/config` is mounted and the cluster is reachable from the container host. + +--- + +### MCP tools not loading + +If MCP tools appear in `extensions_config.json` but are not available in the agent: + +1. Check `logs/langgraph.log` for MCP initialization errors. +2. Verify the MCP server command is installed (`npx`, `uvx`, or the relevant binary). +3. Test the server command manually to confirm it starts without errors. +4. Set `log_level: debug` to see detailed MCP loading output. + +--- + +### Memory not persisting across sessions + +- Verify `memory.enabled: true` in `config.yaml`. +- Check that the storage path is writable: `ls -la backend/.deer-flow/`. +- Look for memory update errors in `logs/langgraph.log` (search for "memory"). + +## Data backup + +Thread data and memory are stored under `backend/.deer-flow/`: + +``` +backend/.deer-flow/ + memory.json # global agent memory + agents/ # per-agent memory + threads/ # thread working directories + {thread_id}/ + user-data/ + uploads/ + outputs/ + checkpoints.db # SQLite checkpoints (if configured) +``` + +Back up this entire directory to preserve conversation history, artifacts, and learned memory. + +In Docker deployments, the bind-mounted host path (`$DEER_FLOW_ROOT/backend/.deer-flow/`) is the source of truth — back up the host path. + +## Restarting services + +To restart a single service (local deployment): + +```bash +make stop +make dev +``` + +Individual service restart scripts are in `scripts/`. For targeted restarts, you can kill and relaunch individual processes manually using the PIDs in the log files. + + + + + diff --git a/frontend/src/content/en/application/quick-start.mdx b/frontend/src/content/en/application/quick-start.mdx index 81c102836..2363f8191 100644 --- a/frontend/src/content/en/application/quick-start.mdx +++ b/frontend/src/content/en/application/quick-start.mdx @@ -1,3 +1,124 @@ +import { Callout, Cards, Steps } from "nextra/components"; + # Quick Start -TBD + + Get DeerFlow App running locally in about 10 minutes. You need a machine with + Python 3.12+, Node.js 22+, and at least one LLM API key. + + +This guide walks you through starting DeerFlow App on your local machine using the `make dev` workflow. All four services (LangGraph, Gateway, Frontend, nginx) start together and are accessible through a single URL. + +## Prerequisites + +Check that all required tools are installed: + +```bash +make check +``` + +Required: + +| Tool | Minimum version | +|---|---| +| Python | 3.12 | +| uv | latest | +| Node.js | 22 | +| pnpm | 10 | +| nginx | any recent version | + +On macOS, install with `brew install python uv node pnpm nginx`. On Linux, use your distribution's package manager. + +## Steps + + + +### Clone the repository + +```bash +git clone https://github.com/bytedance/deer-flow.git +cd deer-flow +``` + +### Install dependencies + +```bash +make install +``` + +This installs both backend Python dependencies (via `uv`) and frontend Node.js dependencies (via `pnpm`). + +### Create your config file + +```bash +cp config.example.yaml config.yaml +``` + +Then edit `config.yaml` to add at least one model. The minimum change is adding a model under the `models:` section: + +```yaml +models: + - name: gpt-4o + use: langchain_openai:ChatOpenAI + model: gpt-4o + api_key: $OPENAI_API_KEY + request_timeout: 600.0 + max_retries: 2 + supports_vision: true +``` + +Set the corresponding environment variable before starting: + +```bash +export OPENAI_API_KEY=sk-... +``` + +See the [Application Configuration](/docs/application/configuration) page for examples with other model providers. + +### Start all services + +```bash +make dev +``` + +This starts: +- LangGraph server on port `2024` +- Gateway API on port `8001` +- Frontend on port `3000` +- nginx reverse proxy on port `2026` + +Open [http://localhost:2026](http://localhost:2026) in your browser. + +### Stop all services + +```bash +make stop +``` + + + +## What happens when you run `make dev` + +- Existing service processes are stopped first (safe to run after an interrupted start). +- Each service is started in the background and writes logs to the `logs/` directory. +- nginx proxies all traffic through port `2026`, so you only need one URL. + +Log files: + +| Service | Log file | +|---|---| +| LangGraph | `logs/langgraph.log` | +| Gateway | `logs/gateway.log` | +| Frontend | `logs/frontend.log` | +| nginx | `logs/nginx.log` | + + + If something is not working, check the log files first. Most startup errors + (missing API keys, config parsing failures) appear in `logs/langgraph.log` or + `logs/gateway.log`. + + + + + + diff --git a/frontend/src/content/en/application/workspace-usage.mdx b/frontend/src/content/en/application/workspace-usage.mdx index 567a6fb98..e879f6df4 100644 --- a/frontend/src/content/en/application/workspace-usage.mdx +++ b/frontend/src/content/en/application/workspace-usage.mdx @@ -1,3 +1,74 @@ +import { Callout, Cards } from "nextra/components"; + # Workspace Usage -TBD +The DeerFlow App workspace is a browser-based interface for having multi-turn conversations with the agent, tracking task progress, viewing artifacts, and managing files. + +## Starting a conversation + +Open the app at `http://localhost:2026` (or your deployment URL). The workspace is split into: + +- **Sidebar** (left): thread list, new thread button, and navigation to agents and settings. +- **Conversation area** (center): the active thread's message history. +- **Input bar** (bottom): text input, skill selector, model selector, and attachment controls. + +To start a new thread, click **New Thread** in the sidebar or use the keyboard shortcut. Each thread is independent — it has its own conversation history, artifacts, and state. + +## Selecting a model + +Use the model picker in the input bar to choose which configured model to use for the current request. Models listed here correspond to the `models:` entries in your `config.yaml`. + +The selected model applies to the next message only. You can switch models between messages in the same thread. + +If a model supports **thinking mode**, a toggle appears next to the model selector. When thinking is enabled, the agent's internal reasoning steps are shown inline in the response. + +## Selecting a skill + +Click the **Skills** button in the input bar to open the skill selector. Enabled skills are listed here. Selecting a skill tells the agent to apply that skill's workflow and instructions for the current message. + + + Skills are most useful when you want the agent to follow a specific approach, + such as deep research methodology or structured data analysis. For general + questions, you typically do not need to select a skill. + + +## Plan mode + +Toggle **Plan Mode** in the input bar to enable the todo list middleware. In plan mode, the agent creates and maintains a visible task list as it works through a complex multi-step objective. Each task shows its status (`pending`, `in_progress`, `completed`) in real time. + +Plan mode is most useful for complex tasks with 3 or more distinct steps. + +## Uploading files + +Click the attachment icon in the input bar to upload files. Supported file types include PDFs, text files, spreadsheets, and images. + +Uploaded files are stored under the thread's working directory (`/mnt/user-data/uploads/`) and are accessible to the agent during the conversation. PDFs are automatically converted to Markdown for better model comprehension (the converter can be configured via `uploads.pdf_converter` in `config.yaml`). + +## Viewing artifacts + +When the agent produces output files (reports, charts, code, etc.), they appear in the **Artifacts** panel. Each artifact shows a preview (for supported types) and a download link. + +Artifacts are tracked in the thread state and persist across page reloads. + +## Understanding the message stream + +Each agent response in the conversation may contain: + +- **Text**: the agent's direct reply. +- **Thinking** (if thinking mode is enabled): the model's internal reasoning, shown in a collapsible block. +- **Tool calls**: a record of which tools were called and with what arguments. +- **Tool results**: the output returned by each tool. +- **Subagent output**: if the agent delegated a task, the subagent's progress appears inline. + +Tool calls and thinking steps are collapsed by default. Click to expand them. + +## Switching agents + +If you have created custom agents, use the **Agent** selector in the input bar to switch to a different agent. The selected agent persists for the duration of the thread. + +Custom agents may have different models, skills, tool sets, and system prompts. See [Agents and Threads](/docs/application/agents-and-threads) for how to create and manage custom agents. + + + + + diff --git a/frontend/src/content/en/harness/_meta.ts b/frontend/src/content/en/harness/_meta.ts index f96fd39f9..b682b71cb 100644 --- a/frontend/src/content/en/harness/_meta.ts +++ b/frontend/src/content/en/harness/_meta.ts @@ -10,6 +10,12 @@ const meta: MetaRecord = { "design-principles": { title: "Design Principles", }, + "lead-agent": { + title: "Lead Agent", + }, + middlewares: { + title: "Middlewares", + }, configuration: { title: "Configuration", }, @@ -25,6 +31,12 @@ const meta: MetaRecord = { sandbox: { title: "Sandbox", }, + subagents: { + title: "Subagents", + }, + mcp: { + title: "MCP Integration", + }, customization: { title: "Customization", }, diff --git a/frontend/src/content/en/harness/configuration.mdx b/frontend/src/content/en/harness/configuration.mdx index 79de38b76..fd2d9f16a 100644 --- a/frontend/src/content/en/harness/configuration.mdx +++ b/frontend/src/content/en/harness/configuration.mdx @@ -1,3 +1,157 @@ +import { Callout, Cards } from "nextra/components"; + # Configuration -TBD + + All DeerFlow Harness behaviors are driven by config.yaml. One + file controls which models are available, how the sandbox runs, what tools are + loaded, and how each subsystem behaves. + + +DeerFlow's configuration system is designed around one goal: every meaningful behavior should be expressible in a config file, not hardcoded in the application. This makes deployments reproducible, auditable, and easy to customize per environment. + +## Config file location + +DeerFlow resolves `config.yaml` using the following priority order: + +1. The path passed to `AppConfig.from_file(config_path)` explicitly. +2. The `DEER_FLOW_CONFIG_PATH` environment variable. +3. `backend/config.yaml` (relative to the backend directory). +4. `config.yaml` in the repository root. + +If none of these paths exist, the application raises an error at startup. + +To use a custom location: + +```bash +export DEER_FLOW_CONFIG_PATH=/path/to/my-config.yaml +``` + +## Environment variable interpolation + +Any field value can reference an environment variable using `$VAR_NAME` syntax: + +```yaml +models: + - name: gpt-4o + api_key: $OPENAI_API_KEY +``` + +This keeps secrets out of the config file itself. The variable is resolved at runtime from the process environment. + +## The `use` field + +Many configuration entries use a `use:` field to specify the Python class or object to instantiate. The format is: + +``` +package.subpackage.module:ClassName +``` + +or for module-level objects: + +``` +package.subpackage.module:variable_name +``` + +Examples: + +```yaml +sandbox: + use: deerflow.sandbox.local:LocalSandboxProvider + +tools: + - use: deerflow.community.tavily.tools:web_search_tool + api_key: $TAVILY_API_KEY +``` + +This pattern is how DeerFlow achieves pluggability without hardcoding class references. + +## Extra fields are passed through + +For model configuration, `ModelConfig` uses `pydantic ConfigDict(extra="allow")`. This means any extra fields you add under a model entry are passed directly to the model constructor. This allows provider-specific options (like `extra_body`, `reasoning`, or custom timeout keys) to work without modifying the harness: + +```yaml +models: + - name: my-model + use: langchain_openai:ChatOpenAI + model: gpt-4o + api_key: $OPENAI_API_KEY + some_provider_specific_option: value # passed through to ChatOpenAI constructor +``` + +## Configuration version + +`config.yaml` includes a `config_version` field that tracks the schema version: + +```yaml +config_version: 6 +``` + +When the schema changes (new fields, renamed sections), this number is bumped. If your local `config.yaml` is behind the current version, run: + +```bash +make config-upgrade +``` + +This merges new fields from `config.example.yaml` into your existing `config.yaml` without overwriting your customizations. + +## Module configuration reference + +The following table maps each top-level `config.yaml` section to its documentation page: + +| Section | Description | Documentation | +|---|---|---| +| `log_level` | Logging level (`debug`/`info`/`warning`/`error`) | — | +| `models` | Available LLM models | [Lead Agent](/docs/harness/lead-agent) | +| `token_usage` | Token tracking per model call | [Middlewares](/docs/harness/middlewares) | +| `tools` | Available agent tools | [Tools](/docs/harness/tools) | +| `tool_groups` | Named groups of tools | [Tools](/docs/harness/tools) | +| `tool_search` | Deferred/on-demand tool loading | [Tools](/docs/harness/tools) | +| `sandbox` | Sandbox provider and options | [Sandbox](/docs/harness/sandbox) | +| `skills` | Skills directory and container path | [Skills](/docs/harness/skills) | +| `skill_evolution` | Agent-managed skill creation | [Skills](/docs/harness/skills) | +| `subagents` | Subagent timeouts and max turns | [Subagents](/docs/harness/subagents) | +| `acp_agents` | External ACP agent integrations | [Subagents](/docs/harness/subagents) | +| `memory` | Cross-session memory storage | [Memory](/docs/harness/memory) | +| `summarization` | Conversation summarization | [Middlewares](/docs/harness/middlewares) | +| `title` | Automatic thread title generation | [Middlewares](/docs/harness/middlewares) | +| `checkpointer` | Thread state persistence | [Agents & Threads](/docs/application/agents-and-threads) | +| `guardrails` | Tool call authorization | — | +| `stream_bridge` | Streaming configuration | — | +| `uploads` | File upload settings (PDF converter) | — | +| `channels` | IM channel integrations (Feishu, Slack, etc.) | — | + +## Minimal config to get started + +The minimum valid `config.yaml` requires at least one model and a sandbox: + +```yaml +config_version: 6 + +models: + - name: gpt-4o + use: langchain_openai:ChatOpenAI + model: gpt-4o + api_key: $OPENAI_API_KEY + request_timeout: 600.0 + max_retries: 2 + supports_vision: true + +sandbox: + use: deerflow.sandbox.local:LocalSandboxProvider + +tools: + - use: deerflow.community.ddg_search.tools:web_search_tool + - use: deerflow.community.jina_ai.tools:web_fetch_tool + - use: deerflow.sandbox.tools:ls_tool + - use: deerflow.sandbox.tools:read_file_tool + - use: deerflow.sandbox.tools:write_file_tool + - use: deerflow.sandbox.tools:bash_tool +``` + +Start from `config.example.yaml` in the repository root and uncomment the sections you need. + + + + + diff --git a/frontend/src/content/en/harness/design-principles.mdx b/frontend/src/content/en/harness/design-principles.mdx index bae1cc217..49ca9d8ae 100644 --- a/frontend/src/content/en/harness/design-principles.mdx +++ b/frontend/src/content/en/harness/design-principles.mdx @@ -1,3 +1,121 @@ +import { Callout, Cards } from "nextra/components"; + # Design Principles -TBD + + DeerFlow is built around one central idea: agent behavior should be composed + from small, observable, replaceable pieces — not hardcoded into a fixed + workflow graph. + + +Understanding the design principles behind DeerFlow Harness helps you use it effectively, extend it confidently, and reason about how your agents will behave in production. + +## Why a harness, not a framework + +A framework gives you abstractions and building blocks. You assemble the parts and write the glue code that connects them. + +A **harness** goes further. It packages an opinionated, ready-to-run runtime so that agents can do real work without you rebuilding the same infrastructure every time. + +DeerFlow is a harness because it bundles: + +- a lead agent with tool routing, +- a middleware chain that wraps every LLM turn, +- sandboxed execution for files and commands, +- skills that load specialized capabilities on demand, +- subagents for delegated parallel work, +- memory for cross-session continuity, and +- a configuration system that controls all of it. + +You do not need to design the orchestration layer from scratch. The harness is the orchestration layer. + +## Long-horizon tasks are the primary case + +DeerFlow is designed for tasks that require more than a single prompt-response exchange. A useful long-horizon agent must: + +1. make a plan, +2. call tools in sequence, +3. inspect and modify files, +4. recover when something fails, +5. delegate work to subagents when the task is too broad, and +6. return a concrete artifact at the end. + +Every architectural decision in DeerFlow is evaluated against this use case. Short, stateless exchanges are easy. Long, multi-step workflows under real-world pressure are the target. + +## Middleware chain over inheritance + +DeerFlow does not ask you to subclass an agent or override methods to change its behavior. Instead, it uses a **middleware chain** that wraps every LLM turn. + +Each middleware is a small, focused plugin that can inspect or modify the agent's state before and after the model call. The lead agent's behavior is entirely determined by which middlewares are active. + +This design has several benefits: + +- Individual behaviors (memory, summarization, clarification, loop detection) are isolated and testable independently. +- The chain can be extended without touching the agent's core logic. +- Each middleware's effect is visible and auditable because it only touches the state it declares. + +See the [Middlewares](/docs/harness/middlewares) page for the full list and configuration. + +## Skills provide specialization without contamination + +A **skill** is a task-oriented capability package. It contains instructions, workflows, best practices, and any tools or resources that make the agent effective at a specific class of work. + +The key design decision is that skills are loaded **on demand**. The base agent stays general. When a task requires deep research, the research skill is loaded. When a task requires data analysis, the analysis skill is loaded. + +This matters because it keeps the base agent's context clean. A specialized prompt for writing academic papers does not pollute a session focused on coding. Skills inject their content exactly when relevant and no further. + +Skills also make the system extensible. Adding a new capability to DeerFlow means writing a new skill pack, not modifying the agent core. + +## Sandbox is the execution environment + +DeerFlow gives agents a **sandbox**: an isolated workspace where they can read files, write outputs, run commands, and produce artifacts. + +This turns the agent from a text generator into a system that can do work. Instead of only describing what code to write, the agent can write it, run it, and verify the result. + +Isolation is important because execution should be reproducible and controllable. The sandbox is the reason DeerFlow can support genuine action rather than only conversation. + +Two modes are available: + +- **LocalSandbox**: commands run directly on the host. Suitable for trusted, single-user local workflows. +- **Container-based sandbox**: commands run in an isolated container (Docker or Apple Container). Suitable for multi-user environments and production deployments. + +## Context engineering keeps long tasks tractable + +Context pressure is the primary challenge for long-horizon agents. If everything accumulates in the context window indefinitely, the agent becomes slower, noisier, and less reliable. + +DeerFlow addresses this through **context engineering** — deliberate control of what the agent sees, remembers, and ignores at each step: + +- **Summarization**: when the conversation grows too long, older turns are summarized and replaced. The agent retains the meaning without the bulk. +- **Scoped subagent context**: when work is delegated to a subagent, that subagent receives only the information it needs for its piece of the task, not the full parent history. +- **External working memory**: files and artifacts produced during a task live on disk, not in the context window. The agent references them when needed. +- **Memory injection**: cross-session facts are injected into the system prompt at a controlled token budget. + +This is one of the most important ideas in DeerFlow. Good agent behavior is not only about a stronger model. It is also about giving the model the right working set at the right time. + +## Configuration drives behavior + +All meaningful behaviors in DeerFlow are controlled through `config.yaml`. The system is designed so that operators can change how the agent behaves — which models to use, whether summarization is active, how subagents are limited, what tools are available — without touching code. + +This design principle has three implications: + +1. **Reproducibility**: a config file is a complete description of the agent's behavior at a point in time. +2. **Deployability**: the same code runs differently in different environments because the config is different. +3. **Auditability**: what the agent can and cannot do is visible in one place. + +Environment variable interpolation (`api_key: $OPENAI_API_KEY`) keeps secrets out of committed config files while preserving the same structure. + +## Summary + +| Principle | What it means in practice | +|---|---| +| Harness, not framework | Ready-to-run runtime with all the infrastructure already wired | +| Long-horizon first | Architecture assumes multi-step, multi-tool, multi-turn tasks | +| Middleware over inheritance | Behavior is composed from small, isolated plugins | +| Skills for specialization | Domain capability injected on demand, keeping the base clean | +| Sandbox for execution | Isolated workspace for real file and command work | +| Context engineering | Active management of what the agent sees to stay effective | +| Config-driven | All key behaviors are controlled through `config.yaml` | + + + + + diff --git a/frontend/src/content/en/harness/lead-agent.mdx b/frontend/src/content/en/harness/lead-agent.mdx new file mode 100644 index 000000000..59d3337a0 --- /dev/null +++ b/frontend/src/content/en/harness/lead-agent.mdx @@ -0,0 +1,151 @@ +import { Callout, Cards, Steps } from "nextra/components"; + +# Lead Agent + + + The Lead Agent is the primary reasoning and orchestration unit in every + DeerFlow thread. It decides what to do, calls tools, delegates to subagents, + and returns artifacts. + + +The Lead Agent is the central executor in a DeerFlow thread. Every conversation, task, and workflow flows through it. Understanding how it works helps you configure it effectively and extend it when needed. + +## What the Lead Agent does + +The Lead Agent is responsible for: + +- receiving user messages and maintaining conversation state, +- reasoning about what to do next (planning, tool selection, delegation), +- calling tools — built-in, community, MCP, or skill tools, +- delegating subtasks to subagents via the `task` tool, +- managing artifacts (files, outputs, deliverables), +- updating the todo list in plan mode, and +- returning final responses or artifacts to the user. + +The Lead Agent does not hardcode a specific workflow. It uses the model's reasoning to adapt to whatever task the user provides, guided by the system prompt and the skills currently in scope. + +## Runtime foundation + +The Lead Agent is built on **LangGraph** and **LangChain Agent** primitives. Specifically: + +- [`create_agent`](https://python.langchain.com/docs/concepts/agents/) from `langchain.agents` wraps the LLM into a tool-calling agent loop. +- LangGraph manages the `ThreadState` and provides the checkpointing, streaming, and graph execution model. +- A **middleware chain** wraps every turn of the agent loop, providing cross-cutting capabilities like memory, summarization, and clarification. + +## Execution flow + + + +### Receive message + +The user message arrives and is added to `ThreadState.messages`. The `ThreadState` holds the full conversation history, any active todo list, accumulated artifacts, and runtime metadata. + +### Middleware pre-processing + +Before the model is called, each active middleware has a chance to modify the state. For example, the `MemoryMiddleware` injects persisted memory facts into the system prompt, and the `SummarizationMiddleware` may condense old messages if the token budget is exceeded. + +### LLM reasoning + +The model receives the current messages (including system prompt with active skill instructions) and produces either a direct reply or one or more tool call requests. + +### Tool execution + +If tool calls are requested, they are dispatched to the appropriate handlers — sandbox tools for file and command work, community tools for web access, or the `task` tool for subagent delegation. + +### Middleware post-processing + +After tool results are returned and before the next model call, middlewares run again. The `TitleMiddleware` may generate a thread title on the first exchange, and the `TodoMiddleware` may update the task list. + +### Loop or respond + +If the model needs more information (e.g., a tool returned partial results), the loop continues. When the model decides the task is complete, it produces a final message and the loop ends. + +### State update + +`ThreadState` is updated with new messages, artifacts, and memory queues. If a checkpointer is configured, the state is persisted. + + + +## Model selection + +The Lead Agent resolves which model to use at runtime using the following priority order: + +1. `model_name` (or `model`) from the per-request configuration, if provided and valid. +2. The `model` field of the active custom agent's config, if an agent is specified. +3. The first model in the `models:` list in `config.yaml` (the global default). + +If the requested model name is not found in the config, the system falls back to the default model and logs a warning. + +```yaml +models: + - name: my-primary-model + use: langchain_openai:ChatOpenAI + model: gpt-4o + api_key: $OPENAI_API_KEY + request_timeout: 600.0 + max_retries: 2 + supports_vision: true + + - name: my-fast-model + use: langchain_openai:ChatOpenAI + model: gpt-4o-mini + api_key: $OPENAI_API_KEY +``` + +The first entry (`my-primary-model`) becomes the default. Any request that does not specify a model, or specifies an unknown model name, will use it. + +## Thinking mode + +If the model supports extended thinking (e.g., DeepSeek Reasoner, Doubao with thinking enabled, Anthropic Claude with thinking), the Lead Agent can run in **thinking mode**. In this mode, the model's internal reasoning steps are visible in the response stream. + +Thinking mode is controlled per-request through the `thinking_enabled` flag. If thinking is enabled but the configured model does not support it, the system falls back gracefully and logs a warning. + +```yaml +models: + - name: deepseek-v3 + use: deerflow.models.patched_deepseek:PatchedChatDeepSeek + model: deepseek-reasoner + api_key: $DEEPSEEK_API_KEY + supports_thinking: true + when_thinking_enabled: + extra_body: + thinking: + type: enabled + when_thinking_disabled: + extra_body: + thinking: + type: disabled +``` + +## Plan mode + +When `is_plan_mode` is set to `true` in the request configuration, the `TodoMiddleware` is activated. The agent then maintains a structured task list, marking items as `in_progress`, `completed`, or `pending` as it works through a complex task. This provides visibility into the agent's progress for the user. + +Plan mode is appropriate for complex, multi-step tasks where showing incremental progress is valuable. For simple requests, it is better left disabled to avoid unnecessary overhead. + +## Custom agents + +The same Lead Agent runtime powers both the default agent and any custom agents you create. A custom agent differs only in: + +- its **name** (ASCII slug, auto-derived from `display_name`), +- its **system prompt** or agent-specific instructions, +- which **skills** it has access to, +- which **tool groups** it can use, and +- which **model** it defaults to. + +Custom agents are created through the DeerFlow App UI or via the `/api/agents` endpoint. Their configuration is stored in `agents/{name}/config.yaml` relative to the backend directory. + + + When a custom agent is selected in a thread, the Lead Agent loads that + agent's config at runtime. Switching models or skills for a specific agent + does not require restarting the server. + + +## Bootstrap mode + +DeerFlow includes a special **bootstrap mode** for the initial setup of custom agents. When `is_bootstrap: true` is passed in the request config, the Lead Agent runs with a minimal system prompt and only the core setup tools exposed. This is used internally to guide the first-run agent configuration flow. + + + + + diff --git a/frontend/src/content/en/harness/mcp.mdx b/frontend/src/content/en/harness/mcp.mdx new file mode 100644 index 000000000..32b95672b --- /dev/null +++ b/frontend/src/content/en/harness/mcp.mdx @@ -0,0 +1,109 @@ +import { Callout, Cards, Steps } from "nextra/components"; + +# MCP Integration + + + Model Context Protocol (MCP) lets DeerFlow connect to any external tool + server. Once connected, MCP tools are available to the Lead Agent exactly like + built-in tools. + + +The **Model Context Protocol (MCP)** is an open standard for connecting language models to external tools and data sources. DeerFlow's MCP integration allows you to extend the agent with any tool server that implements the MCP protocol — without modifying the harness itself. + +## Configuration + +MCP servers are configured in `extensions_config.json`, a file separate from `config.yaml`. This separation allows MCP and skill configurations to be managed independently and updated at runtime through the Gateway API. + +The default location is the project root (same directory as `config.yaml`). The path is determined by `ExtensionsConfig.resolve_config_path()`. + +```json +{ + "mcpServers": { + "my-server": { + "command": "npx", + "args": ["-y", "@my-org/my-mcp-server"], + "enabled": true + }, + "filesystem": { + "command": "npx", + "args": ["-y", "@modelcontextprotocol/server-filesystem", "/path/to/dir"], + "enabled": true + }, + "sqlite": { + "command": "uvx", + "args": ["mcp-server-sqlite", "--db-path", "/path/to/db.sqlite"], + "enabled": false + } + } +} +``` + +Each server entry supports: +- `command`: the executable to run (e.g., `npx`, `uvx`, `python`) +- `args`: command arguments as an array +- `enabled`: whether the server is active (can be toggled without removing the entry) +- `env`: optional environment variables injected into the server process + +## How tools are loaded + + + +### Startup initialization + +When the DeerFlow server starts, `initialize_mcp_tools()` is called. This connects to all enabled MCP servers, retrieves their tool schemas, and caches the results. + +### Lazy initialization fallback + +If the server starts before MCP tools are initialized (e.g., in LangGraph Studio), `get_cached_mcp_tools()` performs lazy initialization on the first tool call. + +### Cache invalidation + +The MCP tools cache tracks the modification time (`mtime`) of `extensions_config.json`. When the file changes — for example, when a server is enabled or disabled through the Gateway API — the cache is marked stale and tools are reloaded on the next request. + +This means MCP server changes take effect without restarting the DeerFlow server. + +### Tool availability + +Once loaded, MCP tools appear in the Lead Agent's tool list alongside built-in and community tools. The agent selects and calls them using the same mechanism as any other tool. + + + +## Tool search integration + +When many MCP servers expose a large number of tools, loading all of them into the agent's context at once can increase token usage and reduce tool selection accuracy. + +Enable **tool search** to load MCP tools on demand instead: + +```yaml +# config.yaml +tool_search: + enabled: true +``` + +With tool search enabled, MCP tools are listed by name in the system prompt but not included in the full tool schema. The agent discovers them using the `tool_search` built-in tool and loads only the ones it needs for a given task. + +## OAuth support + +Some MCP servers require OAuth authentication. DeerFlow's `mcp/oauth.py` handles the OAuth flow for servers that declare OAuth requirements in their capability headers. + +When an OAuth-protected MCP server is connected, DeerFlow will: +1. Detect the OAuth requirement from the server's capability headers +2. Build the appropriate authorization headers using `get_initial_oauth_headers()` +3. Wrap tool calls with an OAuth interceptor via `build_oauth_tool_interceptor()` + +The OAuth flow is transparent to the Lead Agent — it simply calls the tool, and DeerFlow handles the authentication. + +## Managing MCP servers + +MCP servers can be managed in several ways: + +- **Through the DeerFlow App UI**: the extensions panel shows connected MCP servers and lets you enable/disable them. +- **Through the Gateway API**: `POST /api/extensions/mcp/{name}/enable` and `/disable`. +- **By editing `extensions_config.json` directly**: useful for scripted or programmatic configuration. + +Changes are picked up automatically due to the file mtime-based cache invalidation. + + + + + diff --git a/frontend/src/content/en/harness/memory.mdx b/frontend/src/content/en/harness/memory.mdx index 30fd1060f..97094bbd7 100644 --- a/frontend/src/content/en/harness/memory.mdx +++ b/frontend/src/content/en/harness/memory.mdx @@ -1,3 +1,119 @@ +import { Callout, Cards } from "nextra/components"; + # Memory -TBD + + Memory lets DeerFlow carry useful information across sessions. The agent + remembers user preferences, project context, and recurring facts so it can + give better responses without starting from zero every time. + + +Memory is a runtime feature of the DeerFlow Harness. It is not a simple conversation log — it is a structured store of facts and context summaries that persist across separate sessions and inform the agent's behavior in future conversations. + +## What memory stores + +The memory store holds several categories of information: + +- **Work context**: summaries of ongoing projects, goals, and recurring topics the user works on. +- **Personal context**: preferences, communication style, and other user-specific details the agent has learned. +- **Top of mind**: the most recent focus areas and active tasks. +- **History**: recent months' context, earlier background, and long-term facts. +- **Facts**: discrete, specific facts the agent has extracted from conversations (e.g., preferred tools, team names, project constraints). + +Each category is updated over time as the agent learns from ongoing conversations. + +## How it works + +Memory is managed by `MemoryMiddleware`, which runs on every Lead Agent turn: + +1. **Injection**: at the start of each conversation, the agent's current memory is injected into the system prompt at a controlled token budget (`max_injection_tokens`). +2. **Learning**: after a conversation, a background job extracts new facts and updates the relevant memory categories. Updates are debounced by `debounce_seconds` to batch rapid changes. +3. **Per-agent memory**: when a custom agent is active, its memory is stored separately from the global memory. This keeps different agents' knowledge isolated. + +## Configuration + +```yaml +memory: + enabled: true + + # Storage path for the global memory file. + # Default: {base_dir}/memory.json (resolves to backend/.deer-flow/memory.json) + # Absolute paths are used as-is. + # Relative paths are resolved against base_dir (not the backend working directory). + storage_path: memory.json + + # Storage class (default: file-based JSON storage) + storage_class: deerflow.agents.memory.storage.FileMemoryStorage + + # Seconds to wait before processing queued memory updates (debounce) + debounce_seconds: 30 + + # Model for memory update extraction (null = use default model) + model_name: null + + # Maximum number of facts to store + max_facts: 100 + + # Minimum confidence score required to store a fact (0.0–1.0) + fact_confidence_threshold: 0.7 + + # Whether to inject memory into the system prompt + injection_enabled: true + + # Maximum tokens to use for memory injection into system prompt + max_injection_tokens: 2000 +``` + +## Global vs per-agent memory + +DeerFlow supports two levels of memory: + +- **Global memory**: stored at `{base_dir}/memory.json`. Used when no specific agent is active or when the agent has no per-agent memory file. +- **Per-agent memory**: stored at `{base_dir}/agents/{agent_name}/memory.json`. Used when a custom agent is active, keeping that agent's learned knowledge separate. + +The `MemoryMiddleware` automatically selects the correct memory file based on the active `agent_name` in the request configuration. + +Agent names used for memory storage are validated against `AGENT_NAME_PATTERN` to ensure filesystem safety. + +## Storage location + +By default, memory files are stored under the backend base directory: + +- Base directory: `backend/.deer-flow/` +- Global memory: `backend/.deer-flow/memory.json` +- Per-agent memory: `backend/.deer-flow/agents/{agent_name}/memory.json` + +You can change the storage path with the `storage_path` field. Relative paths are resolved against the base directory. Use an absolute path to store memory in a custom location. + +## Custom storage backend + +The `storage_class` field allows you to replace the default file-based storage with a custom implementation. Any class that extends `MemoryStorage` and implements `load()`, `reload()`, and `save()` methods can be used: + +```yaml +memory: + storage_class: mypackage.storage.RedisMemoryStorage +``` + +If the configured class cannot be loaded, the system falls back to the default `FileMemoryStorage` and logs an error. + +## Disabling memory + +To disable memory entirely: + +```yaml +memory: + enabled: false +``` + +To keep memory storage but prevent injection into the system prompt: + +```yaml +memory: + enabled: true + injection_enabled: false +``` + + + + + diff --git a/frontend/src/content/en/harness/middlewares.mdx b/frontend/src/content/en/harness/middlewares.mdx new file mode 100644 index 000000000..fab5b732f --- /dev/null +++ b/frontend/src/content/en/harness/middlewares.mdx @@ -0,0 +1,212 @@ +import { Callout } from "nextra/components"; + +# Middlewares + + + Middlewares wrap every LLM turn in the Lead Agent. They are the primary + extension point for adding cross-cutting behaviors like memory, summarization, + clarification, and token tracking. + + +Every time the Lead Agent calls the LLM, it runs through a **middleware chain** before and after the model call. Middlewares can read and modify the agent's state, inject content into the system prompt, intercept tool calls, and react to model outputs. + +This design keeps the agent core simple and stable while allowing rich, composable behaviors to be layered in. + +## How the chain works + +The middleware chain is built once per agent invocation, based on the current configuration and request parameters. The middlewares run in a defined order: + +1. Runtime middlewares (error handling, thread data, uploads, dangling tool call patching) +2. `SummarizationMiddleware` — context compression (if enabled) +3. `TodoMiddleware` — task list management (plan mode only) +4. `TokenUsageMiddleware` — token tracking (if enabled) +5. `TitleMiddleware` — automatic thread title generation +6. `MemoryMiddleware` — cross-session memory injection and queuing +7. `ViewImageMiddleware` — image details injection (if model supports vision) +8. `DeferredToolFilterMiddleware` — hides deferred tool schemas (if tool search enabled) +9. `SubagentLimitMiddleware` — limits parallel subagent calls (if subagents enabled) +10. `LoopDetectionMiddleware` — breaks repetitive tool call loops +11. Custom middlewares (if any) +12. `ClarificationMiddleware` — intercepts clarification requests (always last) + +The ordering is significant. Summarization runs early to reduce context before other processing. Clarification always runs last so it can intercept after all other middlewares have had their turn. + +## Middleware reference + +### ClarificationMiddleware + +Intercepts clarification tool calls and converts them into proper user-facing requests for additional information. When the model decides it needs to ask the user something before proceeding, this middleware surfaces that request. + +**Configuration**: controlled by `guardrails.clarification` settings. + +--- + +### LoopDetectionMiddleware + +Detects when the agent is making the same tool call repeatedly without making progress. When a loop is detected, the middleware intervenes to break the cycle and prevents the agent from burning turns indefinitely. + +**Configuration**: built-in, no user configuration. + +--- + +### MemoryMiddleware + +Reads persisted memory facts at the start of each conversation and injects them into the system prompt. After a conversation ends, queues a background update to incorporate any new information into the memory store. + +**Configuration**: see the [Memory](/docs/harness/memory) page and the `memory:` section in `config.yaml`. + +```yaml +memory: + enabled: true + injection_enabled: true + max_injection_tokens: 2000 + debounce_seconds: 30 +``` + +--- + +### SubagentLimitMiddleware + +Limits the number of parallel subagent task calls the agent can make in a single turn. This prevents the agent from spawning an unbounded number of concurrent subagents. + +**Configuration**: `subagent_enabled` and `max_concurrent_subagents` in the per-request config. + +--- + +### TitleMiddleware + +Automatically generates a title for the thread after the first exchange. The title is derived from the user's first message and the agent's response. + +**Configuration**: `title:` section in `config.yaml`. + +```yaml +title: + enabled: true + max_words: 6 + max_chars: 60 + model_name: null # use default model +``` + +--- + +### TodoMiddleware + +When plan mode is active, maintains a structured task list visible to the user. The agent uses the `write_todos` tool to mark tasks as `pending`, `in_progress`, or `completed` as it works through a complex objective. + +**Activation**: enabled automatically when `is_plan_mode: true` is set in the request configuration. No `config.yaml` entry required. + +--- + +### TokenUsageMiddleware + +Tracks LLM token consumption per model call and logs it at the `info` level. Useful for monitoring costs and understanding where tokens are going in long tasks. + +**Configuration**: `token_usage:` section in `config.yaml`. + +```yaml +token_usage: + enabled: false +``` + +--- + +### SandboxAuditMiddleware + +Audits sandbox operations performed during the agent's execution. Provides a record of what files were read, written, and what commands were run. + +**Configuration**: built-in runtime middleware, always active when a sandbox is available. + +--- + +### SummarizationMiddleware + +When the conversation grows long, summarizes older messages to reduce context size. The summary is injected back into the conversation in place of the original messages, preserving meaning without the full token cost. + +**Configuration**: `summarization:` section in `config.yaml`. See detailed configuration below. + +--- + +### ViewImageMiddleware + +When the current model supports vision (`supports_vision: true`), this middleware intercepts `view_image` tool calls and injects the image content directly into the model's context so it can be analyzed. + +**Activation**: automatically enabled when the resolved model has `supports_vision: true`. + +--- + +### DeferredToolFilterMiddleware + +When tool search is enabled, this middleware hides deferred tool schemas from the model's context. Tools are discovered lazily via the `tool_search` tool instead of being listed upfront, reducing context usage. + +**Configuration**: `tool_search.enabled: true` in `config.yaml`. + +## Summarization configuration + +The `SummarizationMiddleware` is one of the most impactful middlewares for long-horizon tasks. Here is the full configuration reference: + +```yaml +summarization: + enabled: true + + # Model to use for summarization (null = use default model) + # A lightweight model like gpt-4o-mini is recommended to reduce cost. + model_name: null + + # Trigger conditions — summarization runs when ANY threshold is met + trigger: + - type: tokens # trigger when context exceeds N tokens + value: 15564 + # - type: messages # trigger when there are more than N messages + # value: 50 + # - type: fraction # trigger when context exceeds X% of model max + # value: 0.8 + + # How much recent history to keep after summarization + keep: + type: messages + value: 10 # keep the 10 most recent messages + # Alternative: keep by tokens + # type: tokens + # value: 3000 + + # Maximum tokens to trim when preparing messages for the summarizer + trim_tokens_to_summarize: 15564 + + # Custom summary prompt (null = use default LangChain prompt) + summary_prompt: null +``` + +**Trigger types**: +- `tokens`: triggers when the total token count in the conversation exceeds `value`. +- `messages`: triggers when the number of messages exceeds `value`. +- `fraction`: triggers when the context reaches `value` fraction of the model's maximum input token limit. + +Multiple triggers can be listed; summarization runs when **any** of them fires. + +**Keep types**: +- `messages`: keep the last `value` messages after summarization. +- `tokens`: keep up to `value` tokens of recent history. +- `fraction`: keep up to `value` fraction of the model's max input token limit. + +## Writing a custom middleware + +Custom middlewares can be injected into the chain for specialized use cases. A middleware must implement the `AgentMiddleware` interface from `langchain.agents.middleware`. + +The basic structure is: + +```python +from langchain.agents.middleware import AgentMiddleware + +class MyMiddleware(AgentMiddleware): + async def on_start(self, state, config): + # Runs before the model call + # Modify state or config here + return state, config + + async def on_end(self, state, config): + # Runs after the model call + # Inspect or modify the result + return state, config +``` + +Custom middlewares are passed to `make_lead_agent` via the `custom_middlewares` parameter in `_build_middlewares`. They are injected immediately before `ClarificationMiddleware` at the end of the chain. diff --git a/frontend/src/content/en/harness/sandbox.mdx b/frontend/src/content/en/harness/sandbox.mdx index 318d5475f..b5762210c 100644 --- a/frontend/src/content/en/harness/sandbox.mdx +++ b/frontend/src/content/en/harness/sandbox.mdx @@ -1,3 +1,150 @@ +import { Callout, Cards, Tabs } from "nextra/components"; + # Sandbox -TBD + + The sandbox is the isolated workspace where the agent does file and + command-based work. It is what makes DeerFlow capable of real action, not + just conversation. + + +The sandbox gives the Lead Agent a controlled environment where it can read files, write outputs, run shell commands, and produce artifacts. Without a sandbox, the agent can only generate text. With a sandbox, it can write and execute code, process data files, generate charts, and build deliverables. + +## Sandbox modes + +DeerFlow supports three sandbox modes. Choose the one that fits your deployment: + +### LocalSandbox (default) + +Commands run directly on the host machine's filesystem. There is no container isolation. + +- **Best for**: trusted, single-user local development workflows. +- **Risk**: the agent has access to the host filesystem. Use `allow_host_bash: false` (default) to prevent arbitrary command execution. + +```yaml +sandbox: + use: deerflow.sandbox.local:LocalSandboxProvider + allow_host_bash: false # default; set to true only for fully trusted workflows +``` + +### Container-based AIO Sandbox + +Commands run in an isolated container (Docker on Linux/Windows, or Apple Container on macOS). Each sandbox session gets a fresh container environment. + +- **Best for**: multi-user environments, production deployments, or any case where you want execution isolation. + +```yaml +sandbox: + use: deerflow.community.aio_sandbox:AioSandboxProvider + + # Optional: container image (default shown below) + image: enterprise-public-cn-beijing.cr.volces.com/vefaas-public/all-in-one-sandbox:latest + + # Optional: max concurrent containers (default: 3, LRU eviction when exceeded) + replicas: 3 + + # Optional: container name prefix (default: deer-flow-sandbox) + container_prefix: deer-flow-sandbox + + # Optional: idle timeout in seconds (default: 600) + idle_timeout: 600 + + # Optional: custom mounts + mounts: + - host_path: /path/on/host + container_path: /home/user/shared + read_only: false + + # Optional: environment variables injected into the container + environment: + API_KEY: $MY_API_KEY +``` + +Install: `cd backend && uv add 'deerflow-harness[aio-sandbox]'` + +### Provisioner-managed Sandbox (Kubernetes) + +Each sandbox gets a dedicated Pod in a Kubernetes cluster, managed by the provisioner service. This provides the strongest isolation and is recommended for production environments with multiple concurrent users. + +```yaml +sandbox: + use: deerflow.community.aio_sandbox:AioSandboxProvider + provisioner_url: http://provisioner:8002 +``` + +The provisioner service is included in `docker/docker-compose-dev.yaml` and manages the Pod and Service lifecycle for each sandbox ID. + +## Path mappings + +The sandbox uses path mappings to bridge the host filesystem and the container's virtual filesystem. Two key mappings are always configured: + +| Host path | Container path | Access | +|---|---|---| +| `skills/` (from `skills.path`) | `/mnt/skills` (from `skills.container_path`) | Read-only | +| `.deer-flow/threads/{thread_id}/user-data/` | `/mnt/user-data/` | Read-write | + +The skills directory is always mounted read-only. Threads write their working data (uploads, outputs, intermediate files) to `/mnt/user-data/`. + +### Custom mounts + +You can add additional mounts for the local sandbox using the `mounts:` configuration: + +```yaml +sandbox: + use: deerflow.sandbox.local:LocalSandboxProvider + mounts: + - host_path: /home/user/my-project + container_path: /mnt/my-project + read_only: true +``` + + + Custom mount `container_path` values must not conflict with reserved prefixes: + `/mnt/skills`, `/mnt/acp-workspace`, or `/mnt/user-data`. + + +## Output truncation + +The sandbox tools limit output size to keep the agent's context manageable. These limits are configurable: + +```yaml +sandbox: + use: deerflow.sandbox.local:LocalSandboxProvider + + # bash uses middle-truncation (head + tail) + bash_output_max_chars: 20000 + + # read_file uses head-truncation + read_file_output_max_chars: 50000 + + # ls uses head-truncation + ls_output_max_chars: 20000 +``` + +Set to `0` to disable truncation. + +## Security + +### LocalSandbox + +The `LocalSandbox` runs commands directly on the host. By default, the `bash` tool is **disabled** to prevent arbitrary host command execution. Enable it only for fully trusted, single-user workflows: + +```yaml +sandbox: + allow_host_bash: true # Dangerous: grants the agent shell access to your machine +``` + +Even without `bash`, the agent can still read and write files through the dedicated file tools. + +### Container sandbox + +Container-based sandboxes provide filesystem and process isolation. The agent cannot see or modify the host filesystem except through explicit mounts. The provisioner-managed mode adds a further layer: each thread gets its own isolated Pod. + +### Audit middleware + +`SandboxAuditMiddleware` runs on every agent turn and records all sandbox operations. This provides an audit trail of what files were accessed and what commands were run during a session. + + + + + diff --git a/frontend/src/content/en/harness/skills.mdx b/frontend/src/content/en/harness/skills.mdx index 375984c3f..71efab6fc 100644 --- a/frontend/src/content/en/harness/skills.mdx +++ b/frontend/src/content/en/harness/skills.mdx @@ -1,3 +1,160 @@ +import { Callout, Cards, FileTree, Steps } from "nextra/components"; + # Skills -TBD + + Skills are task-oriented capability packages that teach the agent how to do a + specific class of work. The base agent stays general; skills provide + specialization only when needed. + + +A skill is more than a prompt. It is a self-contained capability package that can include structured instructions, step-by-step workflows, domain-specific best practices, supporting resources, and tool configurations. Skills are loaded on demand — they inject their content when a task calls for them and stay out of the context otherwise. + +## What a skill contains + +Each skill lives in its own subdirectory under `skills/public/` (or `skills/custom/` for user-created skills). The directory contains a `SKILL.md` file that defines the skill's metadata, instructions, and workflow. + + + + + + + + + + + + + + + + + + + + +The `SKILL.md` file is the authoritative definition of the skill. It is parsed by `skills/parser.py` to extract the skill name, description, category, instructions, and any dependencies or tool requirements. + +## Built-in skills + +DeerFlow ships with the following public skills: + +| Skill | Description | +|---|---| +| `deep-research` | Multi-step research with source gathering, cross-checking, and structured output | +| `data-analysis` | Data exploration, statistical analysis, and insight generation | +| `chart-visualization` | Chart and graph creation from data | +| `ppt-generation` | Presentation slide generation | +| `image-generation` | AI image generation workflows | +| `code-documentation` | Automated code documentation generation | +| `newsletter-generation` | Newsletter content creation | +| `podcast-generation` | Podcast script and outline generation | +| `academic-paper-review` | Structured academic paper analysis | +| `consulting-analysis` | Business consulting frameworks and analysis | +| `systematic-literature-review` | Literature review methodology and synthesis | +| `github-deep-research` | Repository and code deep-dive research | +| `frontend-design` | Frontend design and UI workflow | +| `web-design-guidelines` | Web design standards and review | +| `video-generation` | Video content planning and generation | + +## Skill lifecycle + + + +### Discovery and loading + +`load_skills()` in `skills/loader.py` scans both `public/` and `custom/` directories under the configured skills path. It re-reads `ExtensionsConfig.from_file()` on every call, which means enabling or disabling a skill through the Gateway API takes effect immediately in the running LangGraph server without a restart. + +### Parsing + +`parser.py` reads each `SKILL.md` file and extracts structured metadata: name, description, category, instructions, and any tool or resource requirements. + +### Security scanning + +`security_scanner.py` checks skill content for potentially dangerous patterns before it is loaded into the agent's context. This step runs during skill loading to prevent malicious skill content from being injected. + +### Dependency installation + +`installer.py` handles any Python or system dependencies declared by the skill. Dependencies are installed into the runtime environment when the skill is first loaded. + +### Context injection + +When the agent is invoked with a specific skill in scope, the skill's instructions are injected into the system prompt. The agent then has access to the skill's workflow, best practices, and domain knowledge for the duration of that conversation. + + + +## Configuration + +The skills system is configured under `skills:` in `config.yaml`: + +```yaml +skills: + # Path to skills directory on the host. + # Default: ../skills relative to the backend directory. + # Uncomment to customize: + # path: /absolute/path/to/custom/skills + + # Path where skills are mounted in the sandbox container. + # The agent uses this path to access skill files during execution. + container_path: /mnt/skills +``` + +The `container_path` is important: it tells the agent where to find skill resources inside the sandbox. The harness automatically mounts the host skills directory to this container path. + +## Enabling and disabling skills + +Skill availability is tracked in `extensions_config.json` (separate from `config.yaml`). You can manage skill state: + +- **Through the DeerFlow App UI**: the skills panel lets you toggle skills on and off. +- **Through the Gateway API**: `POST /api/extensions/skills/{name}/enable` and `/disable`. +- **By editing `extensions_config.json` directly**. + +Because `load_skills()` re-reads the extensions config on every call, changes take effect immediately — no server restart required. + +## Restricting skills per custom agent + +A custom agent can be restricted to a specific subset of skills. In the agent's config (stored in `agents/{name}/config.yaml`), set a `skills` list: + +```yaml +# agents/my-researcher/config.yaml +name: my-researcher +skills: + - deep-research + - academic-paper-review + # Omit all other skills +``` + +- **Omitted or null**: the agent loads all globally enabled skills. +- **Empty list `[]`**: the agent has no skills. +- **Named list**: the agent loads only those specific skills. + +## Skill evolution + +DeerFlow includes an optional **skill evolution** feature that allows the agent to autonomously create and improve skills in the `skills/custom/` directory: + +```yaml +skill_evolution: + enabled: false # Set to true to allow agent-managed skill creation + moderation_model_name: null # Model for security scanning (null = use default) +``` + + + Enable skill evolution only in environments where you trust the agent's + outputs. Newly created skills are security-scanned before being loaded, but + the feature gives the agent write access to the skills directory. + + +## Writing a custom skill + +To create a custom skill: + +1. Create a new directory under `skills/custom/your-skill-name/` +2. Add a `SKILL.md` file that defines the skill's metadata and instructions +3. The skill will be discovered automatically on the next `load_skills()` call + +The `SKILL.md` format follows the same structure as the built-in skills. Use one of the existing public skills as a reference for the expected format. + + + + + diff --git a/frontend/src/content/en/harness/subagents.mdx b/frontend/src/content/en/harness/subagents.mdx new file mode 100644 index 000000000..062ca9698 --- /dev/null +++ b/frontend/src/content/en/harness/subagents.mdx @@ -0,0 +1,131 @@ +import { Callout, Cards } from "nextra/components"; + +# Subagents + + + Subagents are focused workers that the Lead Agent delegates subtasks to. They + run with isolated context, keeping the main conversation clean while handling + parallel or specialized work. + + +When a task is too broad for a single reasoning thread, or when parts of it can be done in parallel, the Lead Agent delegates work to **subagents**. A subagent is a self-contained agent invocation that receives a specific task, executes it, and returns the result. + +## Why subagents matter + +Subagents solve two key problems in long-horizon workflows: + +1. **Context isolation**: a subagent only sees the information it needs for its piece of the task, not the entire parent conversation. This keeps each agent's working context focused and tractable. +2. **Parallelism**: multiple subagents can run concurrently, allowing independent parts of a task (e.g., researching multiple topics simultaneously) to be processed in parallel. + +## Built-in subagents + +DeerFlow ships with two built-in subagents: + +### general-purpose + +A general-purpose reasoning and execution agent. Suitable for delegating complex subtasks that require multi-step reasoning, web search, file operations, and artifact production. + +- **Default timeout**: 900 seconds (15 minutes) +- **Default max turns**: 160 + +### bash + +A subagent specialized for command-line task execution inside the sandbox. Suitable for scripting, data processing, file transformation, and environment setup tasks. + +- **Default timeout**: 900 seconds (15 minutes) +- **Default max turns**: 80 +- **Availability**: only exposed when the sandbox's `bash` tool is available (either `allow_host_bash: true` or a container sandbox is configured) + +## Delegation flow + +The Lead Agent delegates work to a subagent using the built-in `task` tool: + +``` +task( + agent="general-purpose", + task="Research the top 5 competitors of Acme Corp and summarize their pricing", + context="Focus on B2B SaaS pricing models" +) +``` + +The runtime then: + +1. Looks up the subagent configuration from the registry, applying any `config.yaml` overrides. +2. Creates a new agent invocation with the subagent's own prompt and tools. +3. Runs the subagent to completion (or until timeout / max turns). +4. Returns the subagent's final output to the Lead Agent as the tool result. + +## Configuration + +Subagent timeouts and max turns are controlled through the `subagents:` section in `config.yaml`: + +```yaml +subagents: + # Default timeout in seconds for all subagents (default: 900 = 15 minutes) + timeout_seconds: 900 + + # Optional: override max turns for all subagents + # max_turns: 120 + + # Optional: per-agent overrides + agents: + general-purpose: + timeout_seconds: 1800 # 30 minutes for complex tasks + max_turns: 160 + bash: + timeout_seconds: 300 # 5 minutes for quick commands + max_turns: 80 +``` + +Per-agent overrides take priority over the global `timeout_seconds` and `max_turns` settings. + +## Concurrency limits + +The `SubagentLimitMiddleware` controls how many subagents the Lead Agent can invoke in parallel in a single turn. This is controlled through the per-request configuration: + +- `subagent_enabled`: whether subagent delegation is active for this session +- `max_concurrent_subagents`: maximum parallel task calls in one turn (default: 3) + +If the agent tries to call more subagents than the limit allows, the middleware trims the excess calls. + +## ACP agents (external agents) + +In addition to the built-in subagents, DeerFlow supports delegating to external agents through the **Agent Connect Protocol (ACP)**. ACP allows DeerFlow to invoke agents running as separate processes (including third-party CLI tools wrapped with an ACP adapter). + +Configure ACP agents in `config.yaml`: + +```yaml +acp_agents: + claude_code: + command: npx + args: ["-y", "@zed-industries/claude-agent-acp"] + description: Claude Code for implementation, refactoring, and debugging + model: null + # auto_approve_permissions: false + # env: + # ANTHROPIC_API_KEY: $ANTHROPIC_API_KEY + + codex: + command: npx + args: ["-y", "@zed-industries/codex-acp"] + description: Codex CLI for repository tasks and code generation + model: null +``` + +The Lead Agent invokes ACP agents through the `invoke_acp_agent` built-in tool. + + + ACP agents run as child processes managed by DeerFlow. They communicate over + the ACP wire protocol. The standard CLI tools (like the plain `claude` or + `codex` commands) are not ACP-compatible by default — use the adapter + packages listed above or a compatible ACP wrapper. + + +## Custom agents as subagents + +Custom agents created through the DeerFlow App UI can also be invoked as subagents using the `task` tool. When you specify `agent="my-custom-agent"`, the runtime loads that agent's configuration (skills, tool groups, model) and runs it as a subagent for the delegated task. + + + + + diff --git a/frontend/src/content/en/harness/tools.mdx b/frontend/src/content/en/harness/tools.mdx index ad9493e40..eefc72631 100644 --- a/frontend/src/content/en/harness/tools.mdx +++ b/frontend/src/content/en/harness/tools.mdx @@ -1,3 +1,236 @@ +import { Callout, Cards, Tabs } from "nextra/components"; + # Tools -TBD + + Tools are the actions the Lead Agent can take. DeerFlow provides built-in + tools, community integrations, MCP tools, and skill tools — all controlled + through config.yaml. + + +The Lead Agent is a tool-calling agent. Tools are how it interacts with the world: searching the web, reading and writing files, running commands, delegating tasks, and presenting outputs to the user. + +DeerFlow organizes tools into four categories: + +1. **Built-in tools** — core runtime capabilities always available to the agent +2. **Community tools** — integrations with external search, fetch, and image services +3. **MCP tools** — tools provided by external Model Context Protocol servers +4. **Skill tools** — tools bundled with specific skill packs + +## Built-in tools + +Built-in tools are part of the harness and do not require configuration to be available. + +### task + +Delegates a subtask to a subagent. The Lead Agent uses this tool when a task is too broad for a single reasoning thread or when parallel work would be beneficial. + +``` +task(agent="general-purpose", task="...", context="...") +``` + +See the [Subagents](/docs/harness/subagents) page for how subagents are configured. + +--- + +### present_files + +Presents output files to the user as artifacts. The agent calls this tool after producing a file (report, chart, code, etc.) to surface it in the conversation. + +Files at `/mnt/user-data/uploads/*` are copied into `/mnt/user-data/outputs/*` before being presented. The artifact paths are tracked in `ThreadState.artifacts`. + +--- + +### view_image + +Reads an image file and injects its content into the model's context for visual analysis. Only available when the active model has `supports_vision: true`. + +--- + +### clarification + +Asks the user a clarifying question before proceeding. This is triggered by the `ClarificationMiddleware` when the model decides it does not have enough information to act. + +--- + +### setup_agent + +Dynamically configures the current agent session. Used during the bootstrap flow when setting up a new custom agent. + +--- + +### invoke_acp_agent + +Invokes an external agent using the [Agent Connect Protocol (ACP)](https://agentconnectprotocol.org/). Requires `acp_agents:` configuration in `config.yaml`. See the [Subagents](/docs/harness/subagents) page for ACP configuration. + +--- + +### tool_search + +Searches for tools by name or description and loads them into the agent's context on demand. Only active when `tool_search.enabled: true` in `config.yaml`. Useful when MCP or other tool sets expose many tools and you want to reduce context usage. + +## Sandbox file tools + +The following tools interact with the sandbox filesystem. They require a sandbox to be configured and active. + +| Tool | Description | +|---|---| +| `ls` | List files in a directory | +| `read_file` | Read file contents | +| `glob` | Find files matching a pattern | +| `grep` | Search file contents | +| `write_file` | Write content to a file | +| `str_replace` | Replace a string in a file | +| `bash` | Execute a shell command (requires `allow_host_bash: true` or a container sandbox) | + +These are configured in `config.yaml` under `tools:`: + +```yaml +tools: + - use: deerflow.sandbox.tools:ls_tool + - use: deerflow.sandbox.tools:read_file_tool + - use: deerflow.sandbox.tools:glob_tool + - use: deerflow.sandbox.tools:grep_tool + - use: deerflow.sandbox.tools:write_file_tool + - use: deerflow.sandbox.tools:str_replace_tool + - use: deerflow.sandbox.tools:bash_tool # requires host bash or container sandbox +``` + +## Community tools + +Community tools connect the agent to external services. They are configured in `config.yaml` under `tools:` using the `use:` field to specify the implementation. + +### Web search + + + +```yaml +tools: + - use: deerflow.community.ddg_search.tools:web_search_tool +``` +No API key required. Default configuration. Suitable for development and general use. + + +```yaml +tools: + - use: deerflow.community.tavily.tools:web_search_tool + api_key: $TAVILY_API_KEY +``` +High-quality search with structured results. Requires a [Tavily](https://tavily.com) API key. + +Install: `cd backend && uv add 'deerflow-harness[tavily]'` + + +```yaml +tools: + - use: deerflow.community.exa.tools:web_search_tool + api_key: $EXA_API_KEY +``` +Semantic search with neural retrieval. Requires an [Exa](https://exa.ai) API key. + +Install: `cd backend && uv add 'deerflow-harness[exa]'` + + +```yaml +tools: + - use: deerflow.community.infoquest.tools:web_search_tool + api_key: $INFOQUEST_API_KEY +``` +InfoQuest search integration. + + +```yaml +tools: + - use: deerflow.community.firecrawl.tools:web_search_tool + api_key: $FIRECRAWL_API_KEY +``` +Firecrawl-powered search and crawl. Requires a [Firecrawl](https://firecrawl.dev) API key. + +Install: `cd backend && uv add 'deerflow-harness[firecrawl]'` + + + +### Web fetch (page content extraction) + + + +```yaml +tools: + - use: deerflow.community.jina_ai.tools:web_fetch_tool + api_key: $JINA_API_KEY # optional; anonymous usage has rate limits +``` +Converts web pages to clean Markdown. Works without an API key at reduced rate limits. + + +```yaml +tools: + - use: deerflow.community.exa.tools:web_fetch_tool + api_key: $EXA_API_KEY +``` + + +```yaml +tools: + - use: deerflow.community.infoquest.tools:web_fetch_tool + api_key: $INFOQUEST_API_KEY +``` + + +```yaml +tools: + - use: deerflow.community.firecrawl.tools:web_fetch_tool + api_key: $FIRECRAWL_API_KEY +``` + + + +### Image search + +```yaml +tools: + - use: deerflow.community.image_search.tools:image_search_tool + # Or use InfoQuest: + # - use: deerflow.community.infoquest.tools:image_search_tool + # api_key: $INFOQUEST_API_KEY +``` + +## Tool groups + +Tool groups let you organize tools into named sets and restrict which groups a custom agent can access. + +```yaml +tool_groups: + - name: research + tools: + - web_search + - web_fetch + - image_search + - name: coding + tools: + - bash + - read_file + - write_file + - str_replace + - glob + - grep +``` + +Custom agents can then reference a group by name in their configuration, restricting their tool access to only the relevant set. + +## Tool search (deferred loading) + +When you have many tools (especially from multiple MCP servers), loading all of them upfront increases context usage and can confuse the model. The tool search feature addresses this: + +```yaml +tool_search: + enabled: true +``` + +When enabled, tools are not listed in the model's context directly. Instead, they are discoverable at runtime via the `tool_search` built-in tool. The agent searches by name or description and the matching tools are loaded into context on demand. + +This is particularly useful when MCP servers expose dozens of tools. + + + + +