mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-25 11:18:22 +00:00
feat(community): add Exa search as community tool provider (#1357)
* feat(community): add Exa search as community tool provider Add Exa (exa.ai) as a new community search provider alongside Tavily, Firecrawl, InfoQuest, and Jina AI. Exa is an AI-native search engine with neural, keyword, and auto search types. New files: - community/exa/tools.py: web_search_tool and web_fetch_tool - tests/test_exa_tools.py: 10 unit tests with mocked Exa client Changes: - pyproject.toml: add exa-py dependency - config.example.yaml: add commented-out Exa configuration examples Usage: set `use: deerflow.community.exa.tools:web_search_tool` in config.yaml and provide EXA_API_KEY. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix(community): address PR review comments for Exa tools - Make _get_exa_client() accept tool_name param so web_fetch reads its own config - Remove __init__.py to match namespace package pattern of other providers - Add duplicate tool name warning in config.example.yaml - Add regression tests for web_fetch config resolution Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Update revision in uv.lock to 3 --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
parent
29817c3b34
commit
5350b2fb24
79
backend/packages/harness/deerflow/community/exa/tools.py
Normal file
79
backend/packages/harness/deerflow/community/exa/tools.py
Normal file
@ -0,0 +1,79 @@
|
||||
import json
|
||||
|
||||
from exa_py import Exa
|
||||
from langchain.tools import tool
|
||||
|
||||
from deerflow.config import get_app_config
|
||||
|
||||
|
||||
def _get_exa_client(tool_name: str = "web_search") -> Exa:
|
||||
config = get_app_config().get_tool_config(tool_name)
|
||||
api_key = None
|
||||
if config is not None and "api_key" in config.model_extra:
|
||||
api_key = config.model_extra.get("api_key")
|
||||
return Exa(api_key=api_key)
|
||||
|
||||
|
||||
@tool("web_search", parse_docstring=True)
|
||||
def web_search_tool(query: str) -> str:
|
||||
"""Search the web.
|
||||
|
||||
Args:
|
||||
query: The query to search for.
|
||||
"""
|
||||
try:
|
||||
config = get_app_config().get_tool_config("web_search")
|
||||
max_results = 5
|
||||
search_type = "auto"
|
||||
contents_max_characters = 1000
|
||||
if config is not None:
|
||||
max_results = config.model_extra.get("max_results", max_results)
|
||||
search_type = config.model_extra.get("search_type", search_type)
|
||||
contents_max_characters = config.model_extra.get("contents_max_characters", contents_max_characters)
|
||||
|
||||
client = _get_exa_client()
|
||||
res = client.search(
|
||||
query,
|
||||
type=search_type,
|
||||
num_results=max_results,
|
||||
contents={"highlights": {"max_characters": contents_max_characters}},
|
||||
)
|
||||
|
||||
normalized_results = [
|
||||
{
|
||||
"title": result.title or "",
|
||||
"url": result.url or "",
|
||||
"snippet": "\n".join(result.highlights) if result.highlights else "",
|
||||
}
|
||||
for result in res.results
|
||||
]
|
||||
json_results = json.dumps(normalized_results, indent=2, ensure_ascii=False)
|
||||
return json_results
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
|
||||
|
||||
@tool("web_fetch", parse_docstring=True)
|
||||
def web_fetch_tool(url: str) -> str:
|
||||
"""Fetch the contents of a web page at a given URL.
|
||||
Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
|
||||
This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
|
||||
Do NOT add www. to URLs that do NOT have them.
|
||||
URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
|
||||
|
||||
Args:
|
||||
url: The URL to fetch the contents of.
|
||||
"""
|
||||
try:
|
||||
client = _get_exa_client("web_fetch")
|
||||
res = client.get_contents([url], text={"max_characters": 4096})
|
||||
|
||||
if res.results:
|
||||
result = res.results[0]
|
||||
title = result.title or "Untitled"
|
||||
text = result.text or ""
|
||||
return f"# {title}\n\n{text[:4096]}"
|
||||
else:
|
||||
return "Error: No results found"
|
||||
except Exception as e:
|
||||
return f"Error: {str(e)}"
|
||||
@ -7,6 +7,7 @@ dependencies = [
|
||||
"agent-client-protocol>=0.4.0",
|
||||
"agent-sandbox>=0.0.19",
|
||||
"dotenv>=0.9.9",
|
||||
"exa-py>=1.0.0",
|
||||
"httpx>=0.28.0",
|
||||
"kubernetes>=30.0.0",
|
||||
"langchain>=1.2.3",
|
||||
|
||||
260
backend/tests/test_exa_tools.py
Normal file
260
backend/tests/test_exa_tools.py
Normal file
@ -0,0 +1,260 @@
|
||||
"""Unit tests for the Exa community tools."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_app_config():
|
||||
"""Mock the app config to return tool configurations."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {
|
||||
"max_results": 5,
|
||||
"search_type": "auto",
|
||||
"contents_max_characters": 1000,
|
||||
"api_key": "test-api-key",
|
||||
}
|
||||
mock_config.return_value.get_tool_config.return_value = tool_config
|
||||
yield mock_config
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_exa_client():
|
||||
"""Mock the Exa client."""
|
||||
with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
|
||||
mock_client = MagicMock()
|
||||
mock_exa_cls.return_value = mock_client
|
||||
yield mock_client
|
||||
|
||||
|
||||
class TestWebSearchTool:
|
||||
def test_basic_search(self, mock_app_config, mock_exa_client):
|
||||
"""Test basic web search returns normalized results."""
|
||||
mock_result_1 = MagicMock()
|
||||
mock_result_1.title = "Test Title 1"
|
||||
mock_result_1.url = "https://example.com/1"
|
||||
mock_result_1.highlights = ["This is a highlight about the topic."]
|
||||
|
||||
mock_result_2 = MagicMock()
|
||||
mock_result_2.title = "Test Title 2"
|
||||
mock_result_2.url = "https://example.com/2"
|
||||
mock_result_2.highlights = ["First highlight.", "Second highlight."]
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result_1, mock_result_2]
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test query"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert len(parsed) == 2
|
||||
assert parsed[0]["title"] == "Test Title 1"
|
||||
assert parsed[0]["url"] == "https://example.com/1"
|
||||
assert parsed[0]["snippet"] == "This is a highlight about the topic."
|
||||
assert parsed[1]["snippet"] == "First highlight.\nSecond highlight."
|
||||
|
||||
mock_exa_client.search.assert_called_once_with(
|
||||
"test query",
|
||||
type="auto",
|
||||
num_results=5,
|
||||
contents={"highlights": {"max_characters": 1000}},
|
||||
)
|
||||
|
||||
def test_search_with_custom_config(self, mock_exa_client):
|
||||
"""Test search respects custom configuration values."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {
|
||||
"max_results": 10,
|
||||
"search_type": "neural",
|
||||
"contents_max_characters": 2000,
|
||||
"api_key": "test-key",
|
||||
}
|
||||
mock_config.return_value.get_tool_config.return_value = tool_config
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = []
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
web_search_tool.invoke({"query": "neural search"})
|
||||
|
||||
mock_exa_client.search.assert_called_once_with(
|
||||
"neural search",
|
||||
type="neural",
|
||||
num_results=10,
|
||||
contents={"highlights": {"max_characters": 2000}},
|
||||
)
|
||||
|
||||
def test_search_with_no_highlights(self, mock_app_config, mock_exa_client):
|
||||
"""Test search handles results with no highlights."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "No Highlights"
|
||||
mock_result.url = "https://example.com/empty"
|
||||
mock_result.highlights = None
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "test"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed[0]["snippet"] == ""
|
||||
|
||||
def test_search_empty_results(self, mock_app_config, mock_exa_client):
|
||||
"""Test search with no results returns empty list."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = []
|
||||
mock_exa_client.search.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "nothing"})
|
||||
parsed = json.loads(result)
|
||||
|
||||
assert parsed == []
|
||||
|
||||
def test_search_error_handling(self, mock_app_config, mock_exa_client):
|
||||
"""Test search returns error string on exception."""
|
||||
mock_exa_client.search.side_effect = Exception("API rate limit exceeded")
|
||||
|
||||
from deerflow.community.exa.tools import web_search_tool
|
||||
|
||||
result = web_search_tool.invoke({"query": "error"})
|
||||
|
||||
assert result == "Error: API rate limit exceeded"
|
||||
|
||||
|
||||
class TestWebFetchTool:
|
||||
def test_basic_fetch(self, mock_app_config, mock_exa_client):
|
||||
"""Test basic web fetch returns formatted content."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Fetched Page"
|
||||
mock_result.text = "This is the page content."
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
assert result == "# Fetched Page\n\nThis is the page content."
|
||||
mock_exa_client.get_contents.assert_called_once_with(
|
||||
["https://example.com"],
|
||||
text={"max_characters": 4096},
|
||||
)
|
||||
|
||||
def test_fetch_no_title(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch with missing title uses 'Untitled'."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = None
|
||||
mock_result.text = "Content without title."
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
assert result.startswith("# Untitled\n\n")
|
||||
|
||||
def test_fetch_no_results(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch with no results returns error."""
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = []
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com/404"})
|
||||
|
||||
assert result == "Error: No results found"
|
||||
|
||||
def test_fetch_error_handling(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch returns error string on exception."""
|
||||
mock_exa_client.get_contents.side_effect = Exception("Connection timeout")
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
assert result == "Error: Connection timeout"
|
||||
|
||||
def test_fetch_reads_web_fetch_config(self, mock_exa_client):
|
||||
"""Test that web_fetch_tool reads 'web_fetch' config, not 'web_search'."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
tool_config = MagicMock()
|
||||
tool_config.model_extra = {"api_key": "exa-fetch-key"}
|
||||
mock_config.return_value.get_tool_config.return_value = tool_config
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Page"
|
||||
mock_result.text = "Content."
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
mock_config.return_value.get_tool_config.assert_any_call("web_fetch")
|
||||
|
||||
def test_fetch_uses_independent_api_key(self, mock_exa_client):
|
||||
"""Test mixed-provider config: web_fetch uses its own api_key, not web_search's."""
|
||||
with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
|
||||
with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
|
||||
mock_exa_cls.return_value = mock_exa_client
|
||||
fetch_config = MagicMock()
|
||||
fetch_config.model_extra = {"api_key": "exa-fetch-key"}
|
||||
|
||||
def get_tool_config(name):
|
||||
if name == "web_fetch":
|
||||
return fetch_config
|
||||
return None
|
||||
|
||||
mock_config.return_value.get_tool_config.side_effect = get_tool_config
|
||||
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Page"
|
||||
mock_result.text = "Content."
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
mock_exa_cls.assert_called_once_with(api_key="exa-fetch-key")
|
||||
|
||||
def test_fetch_truncates_long_content(self, mock_app_config, mock_exa_client):
|
||||
"""Test fetch truncates content to 4096 characters."""
|
||||
mock_result = MagicMock()
|
||||
mock_result.title = "Long Page"
|
||||
mock_result.text = "x" * 5000
|
||||
|
||||
mock_response = MagicMock()
|
||||
mock_response.results = [mock_result]
|
||||
mock_exa_client.get_contents.return_value = mock_response
|
||||
|
||||
from deerflow.community.exa.tools import web_fetch_tool
|
||||
|
||||
result = web_fetch_tool.invoke({"url": "https://example.com"})
|
||||
|
||||
# "# Long Page\n\n" is 14 chars, content truncated to 4096
|
||||
content_after_header = result.split("\n\n", 1)[1]
|
||||
assert len(content_after_header) == 4096
|
||||
20
backend/uv.lock
generated
20
backend/uv.lock
generated
@ -722,6 +722,7 @@ dependencies = [
|
||||
{ name = "ddgs" },
|
||||
{ name = "dotenv" },
|
||||
{ name = "duckdb" },
|
||||
{ name = "exa-py" },
|
||||
{ name = "firecrawl-py" },
|
||||
{ name = "httpx" },
|
||||
{ name = "kubernetes" },
|
||||
@ -759,6 +760,7 @@ requires-dist = [
|
||||
{ name = "ddgs", specifier = ">=9.10.0" },
|
||||
{ name = "dotenv", specifier = ">=0.9.9" },
|
||||
{ name = "duckdb", specifier = ">=1.4.4" },
|
||||
{ name = "exa-py", specifier = ">=1.0.0" },
|
||||
{ name = "firecrawl-py", specifier = ">=1.15.0" },
|
||||
{ name = "httpx", specifier = ">=0.28.0" },
|
||||
{ name = "kubernetes", specifier = ">=30.0.0" },
|
||||
@ -871,6 +873,24 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "exa-py"
|
||||
version = "2.10.1"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "httpcore" },
|
||||
{ name = "httpx" },
|
||||
{ name = "openai" },
|
||||
{ name = "pydantic" },
|
||||
{ name = "python-dotenv" },
|
||||
{ name = "requests" },
|
||||
{ name = "typing-extensions" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/fb/bb/23c9f78edbf0e0d656839be7346a2f77b9caaae8cc3cb301012c46fd7dc5/exa_py-2.10.1.tar.gz", hash = "sha256:731958c2befc5fc82f031c93cfe7b3d55dc3b0e1bf32f83ec34d32a65ee31ba1", size = 53826, upload-time = "2026-03-25T00:50:49.286Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/fc/8d/0665263aa8d51ef8e2a3955e2b56496add4879730451961b09610bbc7036/exa_py-2.10.1-py3-none-any.whl", hash = "sha256:e2174c932764fff747e84e9e6d0637eaa4a6503556014df73a3427f42cc9d6a7", size = 72270, upload-time = "2026-03-25T00:50:47.721Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fake-useragent"
|
||||
version = "2.2.0"
|
||||
|
||||
@ -304,6 +304,23 @@ tools:
|
||||
# # Used to limit the scope of search results, only returns content within the specified time range. Set to -1 to disable time filtering
|
||||
# search_time_range: 10
|
||||
|
||||
# Web search tool (uses Exa, requires EXA_API_KEY)
|
||||
# - name: web_search
|
||||
# group: web
|
||||
# use: deerflow.community.exa.tools:web_search_tool
|
||||
# max_results: 5
|
||||
# search_type: auto # Options: auto, neural, keyword
|
||||
# contents_max_characters: 1000
|
||||
# # api_key: $EXA_API_KEY
|
||||
|
||||
# Web fetch tool (uses Exa)
|
||||
# NOTE: Only one web_fetch provider can be active at a time.
|
||||
# Comment out the Jina AI web_fetch entry below before enabling this one.
|
||||
# - name: web_fetch
|
||||
# group: web
|
||||
# use: deerflow.community.exa.tools:web_fetch_tool
|
||||
# # api_key: $EXA_API_KEY
|
||||
|
||||
# Web fetch tool (uses Jina AI reader)
|
||||
- name: web_fetch
|
||||
group: web
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user