diff --git a/backend/packages/harness/pyproject.toml b/backend/packages/harness/pyproject.toml index 6d48caeae..e7a81ff7b 100644 --- a/backend/packages/harness/pyproject.toml +++ b/backend/packages/harness/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ ] [project.optional-dependencies] +ollama = ["langchain-ollama>=0.3.0"] pymupdf = ["pymupdf4llm>=0.0.17"] [build-system] diff --git a/config.example.yaml b/config.example.yaml index 933f20a4f..401022786 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -79,6 +79,41 @@ models: # output_version: responses/v1 # supports_vision: true + # Example: Ollama (native provider — preserves thinking/reasoning content) + # + # IMPORTANT: Use langchain_ollama:ChatOllama instead of langchain_openai:ChatOpenAI + # for Ollama models. The OpenAI-compatible endpoint (/v1/chat/completions) does NOT + # return reasoning_content as a separate field — thinking content is either flattened + # into tags or dropped entirely (ollama/ollama#15293). The native Ollama API + # (/api/chat) correctly separates thinking from response content. + # + # Install: cd backend && uv pip install 'deerflow-harness[ollama]' + # + # - name: qwen3-local + # display_name: Qwen3 32B (Ollama) + # use: langchain_ollama:ChatOllama + # model: qwen3:32b + # base_url: http://localhost:11434 # No /v1 suffix — uses native /api/chat + # num_predict: 8192 + # temperature: 0.7 + # reasoning: true # Passes think:true to Ollama native API + # supports_thinking: true + # supports_vision: false + # + # - name: gemma4-local + # display_name: Gemma 4 27B (Ollama) + # use: langchain_ollama:ChatOllama + # model: gemma4:27b + # base_url: http://localhost:11434 + # num_predict: 8192 + # temperature: 0.7 + # reasoning: true + # supports_thinking: true + # supports_vision: true + # + # For Docker deployments, use host.docker.internal instead of localhost: + # base_url: http://host.docker.internal:11434 + # Example: Anthropic Claude model # - name: claude-3-5-sonnet # display_name: Claude 3.5 Sonnet