feat(community): add Exa search as community tool provider (#1357)

* feat(community): add Exa search as community tool provider

Add Exa (exa.ai) as a new community search provider alongside Tavily,
Firecrawl, InfoQuest, and Jina AI. Exa is an AI-native search engine
with neural, keyword, and auto search types.

New files:
- community/exa/tools.py: web_search_tool and web_fetch_tool
- tests/test_exa_tools.py: 10 unit tests with mocked Exa client

Changes:
- pyproject.toml: add exa-py dependency
- config.example.yaml: add commented-out Exa configuration examples

Usage: set `use: deerflow.community.exa.tools:web_search_tool` in
config.yaml and provide EXA_API_KEY.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(community): address PR review comments for Exa tools

- Make _get_exa_client() accept tool_name param so web_fetch reads its own config
- Remove __init__.py to match namespace package pattern of other providers
- Add duplicate tool name warning in config.example.yaml
- Add regression tests for web_fetch config resolution

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Update revision in uv.lock to 3

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
hung_ng__ 2026-04-08 19:13:39 +10:00 committed by GitHub
parent 29817c3b34
commit 5350b2fb24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 377 additions and 0 deletions

View File

@ -0,0 +1,79 @@
import json
from exa_py import Exa
from langchain.tools import tool
from deerflow.config import get_app_config
def _get_exa_client(tool_name: str = "web_search") -> Exa:
config = get_app_config().get_tool_config(tool_name)
api_key = None
if config is not None and "api_key" in config.model_extra:
api_key = config.model_extra.get("api_key")
return Exa(api_key=api_key)
@tool("web_search", parse_docstring=True)
def web_search_tool(query: str) -> str:
"""Search the web.
Args:
query: The query to search for.
"""
try:
config = get_app_config().get_tool_config("web_search")
max_results = 5
search_type = "auto"
contents_max_characters = 1000
if config is not None:
max_results = config.model_extra.get("max_results", max_results)
search_type = config.model_extra.get("search_type", search_type)
contents_max_characters = config.model_extra.get("contents_max_characters", contents_max_characters)
client = _get_exa_client()
res = client.search(
query,
type=search_type,
num_results=max_results,
contents={"highlights": {"max_characters": contents_max_characters}},
)
normalized_results = [
{
"title": result.title or "",
"url": result.url or "",
"snippet": "\n".join(result.highlights) if result.highlights else "",
}
for result in res.results
]
json_results = json.dumps(normalized_results, indent=2, ensure_ascii=False)
return json_results
except Exception as e:
return f"Error: {str(e)}"
@tool("web_fetch", parse_docstring=True)
def web_fetch_tool(url: str) -> str:
"""Fetch the contents of a web page at a given URL.
Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
Do NOT add www. to URLs that do NOT have them.
URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
Args:
url: The URL to fetch the contents of.
"""
try:
client = _get_exa_client("web_fetch")
res = client.get_contents([url], text={"max_characters": 4096})
if res.results:
result = res.results[0]
title = result.title or "Untitled"
text = result.text or ""
return f"# {title}\n\n{text[:4096]}"
else:
return "Error: No results found"
except Exception as e:
return f"Error: {str(e)}"

View File

@ -7,6 +7,7 @@ dependencies = [
"agent-client-protocol>=0.4.0",
"agent-sandbox>=0.0.19",
"dotenv>=0.9.9",
"exa-py>=1.0.0",
"httpx>=0.28.0",
"kubernetes>=30.0.0",
"langchain>=1.2.3",

View File

@ -0,0 +1,260 @@
"""Unit tests for the Exa community tools."""
import json
from unittest.mock import MagicMock, patch
import pytest
@pytest.fixture
def mock_app_config():
"""Mock the app config to return tool configurations."""
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
tool_config = MagicMock()
tool_config.model_extra = {
"max_results": 5,
"search_type": "auto",
"contents_max_characters": 1000,
"api_key": "test-api-key",
}
mock_config.return_value.get_tool_config.return_value = tool_config
yield mock_config
@pytest.fixture
def mock_exa_client():
"""Mock the Exa client."""
with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
mock_client = MagicMock()
mock_exa_cls.return_value = mock_client
yield mock_client
class TestWebSearchTool:
def test_basic_search(self, mock_app_config, mock_exa_client):
"""Test basic web search returns normalized results."""
mock_result_1 = MagicMock()
mock_result_1.title = "Test Title 1"
mock_result_1.url = "https://example.com/1"
mock_result_1.highlights = ["This is a highlight about the topic."]
mock_result_2 = MagicMock()
mock_result_2.title = "Test Title 2"
mock_result_2.url = "https://example.com/2"
mock_result_2.highlights = ["First highlight.", "Second highlight."]
mock_response = MagicMock()
mock_response.results = [mock_result_1, mock_result_2]
mock_exa_client.search.return_value = mock_response
from deerflow.community.exa.tools import web_search_tool
result = web_search_tool.invoke({"query": "test query"})
parsed = json.loads(result)
assert len(parsed) == 2
assert parsed[0]["title"] == "Test Title 1"
assert parsed[0]["url"] == "https://example.com/1"
assert parsed[0]["snippet"] == "This is a highlight about the topic."
assert parsed[1]["snippet"] == "First highlight.\nSecond highlight."
mock_exa_client.search.assert_called_once_with(
"test query",
type="auto",
num_results=5,
contents={"highlights": {"max_characters": 1000}},
)
def test_search_with_custom_config(self, mock_exa_client):
"""Test search respects custom configuration values."""
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
tool_config = MagicMock()
tool_config.model_extra = {
"max_results": 10,
"search_type": "neural",
"contents_max_characters": 2000,
"api_key": "test-key",
}
mock_config.return_value.get_tool_config.return_value = tool_config
mock_response = MagicMock()
mock_response.results = []
mock_exa_client.search.return_value = mock_response
from deerflow.community.exa.tools import web_search_tool
web_search_tool.invoke({"query": "neural search"})
mock_exa_client.search.assert_called_once_with(
"neural search",
type="neural",
num_results=10,
contents={"highlights": {"max_characters": 2000}},
)
def test_search_with_no_highlights(self, mock_app_config, mock_exa_client):
"""Test search handles results with no highlights."""
mock_result = MagicMock()
mock_result.title = "No Highlights"
mock_result.url = "https://example.com/empty"
mock_result.highlights = None
mock_response = MagicMock()
mock_response.results = [mock_result]
mock_exa_client.search.return_value = mock_response
from deerflow.community.exa.tools import web_search_tool
result = web_search_tool.invoke({"query": "test"})
parsed = json.loads(result)
assert parsed[0]["snippet"] == ""
def test_search_empty_results(self, mock_app_config, mock_exa_client):
"""Test search with no results returns empty list."""
mock_response = MagicMock()
mock_response.results = []
mock_exa_client.search.return_value = mock_response
from deerflow.community.exa.tools import web_search_tool
result = web_search_tool.invoke({"query": "nothing"})
parsed = json.loads(result)
assert parsed == []
def test_search_error_handling(self, mock_app_config, mock_exa_client):
"""Test search returns error string on exception."""
mock_exa_client.search.side_effect = Exception("API rate limit exceeded")
from deerflow.community.exa.tools import web_search_tool
result = web_search_tool.invoke({"query": "error"})
assert result == "Error: API rate limit exceeded"
class TestWebFetchTool:
def test_basic_fetch(self, mock_app_config, mock_exa_client):
"""Test basic web fetch returns formatted content."""
mock_result = MagicMock()
mock_result.title = "Fetched Page"
mock_result.text = "This is the page content."
mock_response = MagicMock()
mock_response.results = [mock_result]
mock_exa_client.get_contents.return_value = mock_response
from deerflow.community.exa.tools import web_fetch_tool
result = web_fetch_tool.invoke({"url": "https://example.com"})
assert result == "# Fetched Page\n\nThis is the page content."
mock_exa_client.get_contents.assert_called_once_with(
["https://example.com"],
text={"max_characters": 4096},
)
def test_fetch_no_title(self, mock_app_config, mock_exa_client):
"""Test fetch with missing title uses 'Untitled'."""
mock_result = MagicMock()
mock_result.title = None
mock_result.text = "Content without title."
mock_response = MagicMock()
mock_response.results = [mock_result]
mock_exa_client.get_contents.return_value = mock_response
from deerflow.community.exa.tools import web_fetch_tool
result = web_fetch_tool.invoke({"url": "https://example.com"})
assert result.startswith("# Untitled\n\n")
def test_fetch_no_results(self, mock_app_config, mock_exa_client):
"""Test fetch with no results returns error."""
mock_response = MagicMock()
mock_response.results = []
mock_exa_client.get_contents.return_value = mock_response
from deerflow.community.exa.tools import web_fetch_tool
result = web_fetch_tool.invoke({"url": "https://example.com/404"})
assert result == "Error: No results found"
def test_fetch_error_handling(self, mock_app_config, mock_exa_client):
"""Test fetch returns error string on exception."""
mock_exa_client.get_contents.side_effect = Exception("Connection timeout")
from deerflow.community.exa.tools import web_fetch_tool
result = web_fetch_tool.invoke({"url": "https://example.com"})
assert result == "Error: Connection timeout"
def test_fetch_reads_web_fetch_config(self, mock_exa_client):
"""Test that web_fetch_tool reads 'web_fetch' config, not 'web_search'."""
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
tool_config = MagicMock()
tool_config.model_extra = {"api_key": "exa-fetch-key"}
mock_config.return_value.get_tool_config.return_value = tool_config
mock_result = MagicMock()
mock_result.title = "Page"
mock_result.text = "Content."
mock_response = MagicMock()
mock_response.results = [mock_result]
mock_exa_client.get_contents.return_value = mock_response
from deerflow.community.exa.tools import web_fetch_tool
web_fetch_tool.invoke({"url": "https://example.com"})
mock_config.return_value.get_tool_config.assert_any_call("web_fetch")
def test_fetch_uses_independent_api_key(self, mock_exa_client):
"""Test mixed-provider config: web_fetch uses its own api_key, not web_search's."""
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
mock_exa_cls.return_value = mock_exa_client
fetch_config = MagicMock()
fetch_config.model_extra = {"api_key": "exa-fetch-key"}
def get_tool_config(name):
if name == "web_fetch":
return fetch_config
return None
mock_config.return_value.get_tool_config.side_effect = get_tool_config
mock_result = MagicMock()
mock_result.title = "Page"
mock_result.text = "Content."
mock_response = MagicMock()
mock_response.results = [mock_result]
mock_exa_client.get_contents.return_value = mock_response
from deerflow.community.exa.tools import web_fetch_tool
web_fetch_tool.invoke({"url": "https://example.com"})
mock_exa_cls.assert_called_once_with(api_key="exa-fetch-key")
def test_fetch_truncates_long_content(self, mock_app_config, mock_exa_client):
"""Test fetch truncates content to 4096 characters."""
mock_result = MagicMock()
mock_result.title = "Long Page"
mock_result.text = "x" * 5000
mock_response = MagicMock()
mock_response.results = [mock_result]
mock_exa_client.get_contents.return_value = mock_response
from deerflow.community.exa.tools import web_fetch_tool
result = web_fetch_tool.invoke({"url": "https://example.com"})
# "# Long Page\n\n" is 14 chars, content truncated to 4096
content_after_header = result.split("\n\n", 1)[1]
assert len(content_after_header) == 4096

20
backend/uv.lock generated
View File

@ -722,6 +722,7 @@ dependencies = [
{ name = "ddgs" },
{ name = "dotenv" },
{ name = "duckdb" },
{ name = "exa-py" },
{ name = "firecrawl-py" },
{ name = "httpx" },
{ name = "kubernetes" },
@ -759,6 +760,7 @@ requires-dist = [
{ name = "ddgs", specifier = ">=9.10.0" },
{ name = "dotenv", specifier = ">=0.9.9" },
{ name = "duckdb", specifier = ">=1.4.4" },
{ name = "exa-py", specifier = ">=1.0.0" },
{ name = "firecrawl-py", specifier = ">=1.15.0" },
{ name = "httpx", specifier = ">=0.28.0" },
{ name = "kubernetes", specifier = ">=30.0.0" },
@ -871,6 +873,24 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
]
[[package]]
name = "exa-py"
version = "2.10.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "httpcore" },
{ name = "httpx" },
{ name = "openai" },
{ name = "pydantic" },
{ name = "python-dotenv" },
{ name = "requests" },
{ name = "typing-extensions" },
]
sdist = { url = "https://files.pythonhosted.org/packages/fb/bb/23c9f78edbf0e0d656839be7346a2f77b9caaae8cc3cb301012c46fd7dc5/exa_py-2.10.1.tar.gz", hash = "sha256:731958c2befc5fc82f031c93cfe7b3d55dc3b0e1bf32f83ec34d32a65ee31ba1", size = 53826, upload-time = "2026-03-25T00:50:49.286Z" }
wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/8d/0665263aa8d51ef8e2a3955e2b56496add4879730451961b09610bbc7036/exa_py-2.10.1-py3-none-any.whl", hash = "sha256:e2174c932764fff747e84e9e6d0637eaa4a6503556014df73a3427f42cc9d6a7", size = 72270, upload-time = "2026-03-25T00:50:47.721Z" },
]
[[package]]
name = "fake-useragent"
version = "2.2.0"

View File

@ -304,6 +304,23 @@ tools:
# # Used to limit the scope of search results, only returns content within the specified time range. Set to -1 to disable time filtering
# search_time_range: 10
# Web search tool (uses Exa, requires EXA_API_KEY)
# - name: web_search
# group: web
# use: deerflow.community.exa.tools:web_search_tool
# max_results: 5
# search_type: auto # Options: auto, neural, keyword
# contents_max_characters: 1000
# # api_key: $EXA_API_KEY
# Web fetch tool (uses Exa)
# NOTE: Only one web_fetch provider can be active at a time.
# Comment out the Jina AI web_fetch entry below before enabling this one.
# - name: web_fetch
# group: web
# use: deerflow.community.exa.tools:web_fetch_tool
# # api_key: $EXA_API_KEY
# Web fetch tool (uses Jina AI reader)
- name: web_fetch
group: web