From 5350b2fb24b3bdb98729cc20b4544658fb8dfaa9 Mon Sep 17 00:00:00 2001
From: hung_ng__ <51025722+hung-ngm@users.noreply.github.com>
Date: Wed, 8 Apr 2026 19:13:39 +1000
Subject: [PATCH] feat(community): add Exa search as community tool provider
 (#1357)

* feat(community): add Exa search as community tool provider

Add Exa (exa.ai) as a new community search provider alongside Tavily,
Firecrawl, InfoQuest, and Jina AI. Exa is an AI-native search engine
with neural, keyword, and auto search types.

New files:
- community/exa/tools.py: web_search_tool and web_fetch_tool
- tests/test_exa_tools.py: 10 unit tests with mocked Exa client

Changes:
- pyproject.toml: add exa-py dependency
- config.example.yaml: add commented-out Exa configuration examples

Usage: set `use: deerflow.community.exa.tools:web_search_tool` in
config.yaml and provide EXA_API_KEY.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* fix(community): address PR review comments for Exa tools

- Make _get_exa_client() accept tool_name param so web_fetch reads its own config
- Remove __init__.py to match namespace package pattern of other providers
- Add duplicate tool name warning in config.example.yaml
- Add regression tests for web_fetch config resolution

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* Update revision in uv.lock to 3

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
---
 .../harness/deerflow/community/exa/tools.py   |  79 ++++++
 backend/packages/harness/pyproject.toml       |   1 +
 backend/tests/test_exa_tools.py               | 260 ++++++++++++++++++
 backend/uv.lock                               |  20 ++
 config.example.yaml                           |  17 ++
 5 files changed, 377 insertions(+)
 create mode 100644 backend/packages/harness/deerflow/community/exa/tools.py
 create mode 100644 backend/tests/test_exa_tools.py

diff --git a/backend/packages/harness/deerflow/community/exa/tools.py b/backend/packages/harness/deerflow/community/exa/tools.py
new file mode 100644
index 000000000..974280402
--- /dev/null
+++ b/backend/packages/harness/deerflow/community/exa/tools.py
@@ -0,0 +1,79 @@
+import json
+
+from exa_py import Exa
+from langchain.tools import tool
+
+from deerflow.config import get_app_config
+
+
+def _get_exa_client(tool_name: str = "web_search") -> Exa:
+    config = get_app_config().get_tool_config(tool_name)
+    api_key = None
+    if config is not None and "api_key" in config.model_extra:
+        api_key = config.model_extra.get("api_key")
+    return Exa(api_key=api_key)
+
+
+@tool("web_search", parse_docstring=True)
+def web_search_tool(query: str) -> str:
+    """Search the web.
+
+    Args:
+        query: The query to search for.
+    """
+    try:
+        config = get_app_config().get_tool_config("web_search")
+        max_results = 5
+        search_type = "auto"
+        contents_max_characters = 1000
+        if config is not None:
+            max_results = config.model_extra.get("max_results", max_results)
+            search_type = config.model_extra.get("search_type", search_type)
+            contents_max_characters = config.model_extra.get("contents_max_characters", contents_max_characters)
+
+        client = _get_exa_client()
+        res = client.search(
+            query,
+            type=search_type,
+            num_results=max_results,
+            contents={"highlights": {"max_characters": contents_max_characters}},
+        )
+
+        normalized_results = [
+            {
+                "title": result.title or "",
+                "url": result.url or "",
+                "snippet": "\n".join(result.highlights) if result.highlights else "",
+            }
+            for result in res.results
+        ]
+        json_results = json.dumps(normalized_results, indent=2, ensure_ascii=False)
+        return json_results
+    except Exception as e:
+        return f"Error: {str(e)}"
+
+
+@tool("web_fetch", parse_docstring=True)
+def web_fetch_tool(url: str) -> str:
+    """Fetch the contents of a web page at a given URL.
+    Only fetch EXACT URLs that have been provided directly by the user or have been returned in results from the web_search and web_fetch tools.
+    This tool can NOT access content that requires authentication, such as private Google Docs or pages behind login walls.
+    Do NOT add www. to URLs that do NOT have them.
+    URLs must include the schema: https://example.com is a valid URL while example.com is an invalid URL.
+
+    Args:
+        url: The URL to fetch the contents of.
+    """
+    try:
+        client = _get_exa_client("web_fetch")
+        res = client.get_contents([url], text={"max_characters": 4096})
+
+        if res.results:
+            result = res.results[0]
+            title = result.title or "Untitled"
+            text = result.text or ""
+            return f"# {title}\n\n{text[:4096]}"
+        else:
+            return "Error: No results found"
+    except Exception as e:
+        return f"Error: {str(e)}"
diff --git a/backend/packages/harness/pyproject.toml b/backend/packages/harness/pyproject.toml
index cf8b15839..6d48caeae 100644
--- a/backend/packages/harness/pyproject.toml
+++ b/backend/packages/harness/pyproject.toml
@@ -7,6 +7,7 @@ dependencies = [
     "agent-client-protocol>=0.4.0",
     "agent-sandbox>=0.0.19",
     "dotenv>=0.9.9",
+    "exa-py>=1.0.0",
     "httpx>=0.28.0",
     "kubernetes>=30.0.0",
     "langchain>=1.2.3",
diff --git a/backend/tests/test_exa_tools.py b/backend/tests/test_exa_tools.py
new file mode 100644
index 000000000..b7196918e
--- /dev/null
+++ b/backend/tests/test_exa_tools.py
@@ -0,0 +1,260 @@
+"""Unit tests for the Exa community tools."""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+@pytest.fixture
+def mock_app_config():
+    """Mock the app config to return tool configurations."""
+    with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
+        tool_config = MagicMock()
+        tool_config.model_extra = {
+            "max_results": 5,
+            "search_type": "auto",
+            "contents_max_characters": 1000,
+            "api_key": "test-api-key",
+        }
+        mock_config.return_value.get_tool_config.return_value = tool_config
+        yield mock_config
+
+
+@pytest.fixture
+def mock_exa_client():
+    """Mock the Exa client."""
+    with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
+        mock_client = MagicMock()
+        mock_exa_cls.return_value = mock_client
+        yield mock_client
+
+
+class TestWebSearchTool:
+    def test_basic_search(self, mock_app_config, mock_exa_client):
+        """Test basic web search returns normalized results."""
+        mock_result_1 = MagicMock()
+        mock_result_1.title = "Test Title 1"
+        mock_result_1.url = "https://example.com/1"
+        mock_result_1.highlights = ["This is a highlight about the topic."]
+
+        mock_result_2 = MagicMock()
+        mock_result_2.title = "Test Title 2"
+        mock_result_2.url = "https://example.com/2"
+        mock_result_2.highlights = ["First highlight.", "Second highlight."]
+
+        mock_response = MagicMock()
+        mock_response.results = [mock_result_1, mock_result_2]
+        mock_exa_client.search.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_search_tool
+
+        result = web_search_tool.invoke({"query": "test query"})
+        parsed = json.loads(result)
+
+        assert len(parsed) == 2
+        assert parsed[0]["title"] == "Test Title 1"
+        assert parsed[0]["url"] == "https://example.com/1"
+        assert parsed[0]["snippet"] == "This is a highlight about the topic."
+        assert parsed[1]["snippet"] == "First highlight.\nSecond highlight."
+
+        mock_exa_client.search.assert_called_once_with(
+            "test query",
+            type="auto",
+            num_results=5,
+            contents={"highlights": {"max_characters": 1000}},
+        )
+
+    def test_search_with_custom_config(self, mock_exa_client):
+        """Test search respects custom configuration values."""
+        with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
+            tool_config = MagicMock()
+            tool_config.model_extra = {
+                "max_results": 10,
+                "search_type": "neural",
+                "contents_max_characters": 2000,
+                "api_key": "test-key",
+            }
+            mock_config.return_value.get_tool_config.return_value = tool_config
+
+            mock_response = MagicMock()
+            mock_response.results = []
+            mock_exa_client.search.return_value = mock_response
+
+            from deerflow.community.exa.tools import web_search_tool
+
+            web_search_tool.invoke({"query": "neural search"})
+
+            mock_exa_client.search.assert_called_once_with(
+                "neural search",
+                type="neural",
+                num_results=10,
+                contents={"highlights": {"max_characters": 2000}},
+            )
+
+    def test_search_with_no_highlights(self, mock_app_config, mock_exa_client):
+        """Test search handles results with no highlights."""
+        mock_result = MagicMock()
+        mock_result.title = "No Highlights"
+        mock_result.url = "https://example.com/empty"
+        mock_result.highlights = None
+
+        mock_response = MagicMock()
+        mock_response.results = [mock_result]
+        mock_exa_client.search.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_search_tool
+
+        result = web_search_tool.invoke({"query": "test"})
+        parsed = json.loads(result)
+
+        assert parsed[0]["snippet"] == ""
+
+    def test_search_empty_results(self, mock_app_config, mock_exa_client):
+        """Test search with no results returns empty list."""
+        mock_response = MagicMock()
+        mock_response.results = []
+        mock_exa_client.search.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_search_tool
+
+        result = web_search_tool.invoke({"query": "nothing"})
+        parsed = json.loads(result)
+
+        assert parsed == []
+
+    def test_search_error_handling(self, mock_app_config, mock_exa_client):
+        """Test search returns error string on exception."""
+        mock_exa_client.search.side_effect = Exception("API rate limit exceeded")
+
+        from deerflow.community.exa.tools import web_search_tool
+
+        result = web_search_tool.invoke({"query": "error"})
+
+        assert result == "Error: API rate limit exceeded"
+
+
+class TestWebFetchTool:
+    def test_basic_fetch(self, mock_app_config, mock_exa_client):
+        """Test basic web fetch returns formatted content."""
+        mock_result = MagicMock()
+        mock_result.title = "Fetched Page"
+        mock_result.text = "This is the page content."
+
+        mock_response = MagicMock()
+        mock_response.results = [mock_result]
+        mock_exa_client.get_contents.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_fetch_tool
+
+        result = web_fetch_tool.invoke({"url": "https://example.com"})
+
+        assert result == "# Fetched Page\n\nThis is the page content."
+        mock_exa_client.get_contents.assert_called_once_with(
+            ["https://example.com"],
+            text={"max_characters": 4096},
+        )
+
+    def test_fetch_no_title(self, mock_app_config, mock_exa_client):
+        """Test fetch with missing title uses 'Untitled'."""
+        mock_result = MagicMock()
+        mock_result.title = None
+        mock_result.text = "Content without title."
+
+        mock_response = MagicMock()
+        mock_response.results = [mock_result]
+        mock_exa_client.get_contents.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_fetch_tool
+
+        result = web_fetch_tool.invoke({"url": "https://example.com"})
+
+        assert result.startswith("# Untitled\n\n")
+
+    def test_fetch_no_results(self, mock_app_config, mock_exa_client):
+        """Test fetch with no results returns error."""
+        mock_response = MagicMock()
+        mock_response.results = []
+        mock_exa_client.get_contents.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_fetch_tool
+
+        result = web_fetch_tool.invoke({"url": "https://example.com/404"})
+
+        assert result == "Error: No results found"
+
+    def test_fetch_error_handling(self, mock_app_config, mock_exa_client):
+        """Test fetch returns error string on exception."""
+        mock_exa_client.get_contents.side_effect = Exception("Connection timeout")
+
+        from deerflow.community.exa.tools import web_fetch_tool
+
+        result = web_fetch_tool.invoke({"url": "https://example.com"})
+
+        assert result == "Error: Connection timeout"
+
+    def test_fetch_reads_web_fetch_config(self, mock_exa_client):
+        """Test that web_fetch_tool reads 'web_fetch' config, not 'web_search'."""
+        with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
+            tool_config = MagicMock()
+            tool_config.model_extra = {"api_key": "exa-fetch-key"}
+            mock_config.return_value.get_tool_config.return_value = tool_config
+
+            mock_result = MagicMock()
+            mock_result.title = "Page"
+            mock_result.text = "Content."
+            mock_response = MagicMock()
+            mock_response.results = [mock_result]
+            mock_exa_client.get_contents.return_value = mock_response
+
+            from deerflow.community.exa.tools import web_fetch_tool
+
+            web_fetch_tool.invoke({"url": "https://example.com"})
+
+            mock_config.return_value.get_tool_config.assert_any_call("web_fetch")
+
+    def test_fetch_uses_independent_api_key(self, mock_exa_client):
+        """Test mixed-provider config: web_fetch uses its own api_key, not web_search's."""
+        with patch("deerflow.community.exa.tools.get_app_config") as mock_config:
+            with patch("deerflow.community.exa.tools.Exa") as mock_exa_cls:
+                mock_exa_cls.return_value = mock_exa_client
+                fetch_config = MagicMock()
+                fetch_config.model_extra = {"api_key": "exa-fetch-key"}
+
+                def get_tool_config(name):
+                    if name == "web_fetch":
+                        return fetch_config
+                    return None
+
+                mock_config.return_value.get_tool_config.side_effect = get_tool_config
+
+                mock_result = MagicMock()
+                mock_result.title = "Page"
+                mock_result.text = "Content."
+                mock_response = MagicMock()
+                mock_response.results = [mock_result]
+                mock_exa_client.get_contents.return_value = mock_response
+
+                from deerflow.community.exa.tools import web_fetch_tool
+
+                web_fetch_tool.invoke({"url": "https://example.com"})
+
+                mock_exa_cls.assert_called_once_with(api_key="exa-fetch-key")
+
+    def test_fetch_truncates_long_content(self, mock_app_config, mock_exa_client):
+        """Test fetch truncates content to 4096 characters."""
+        mock_result = MagicMock()
+        mock_result.title = "Long Page"
+        mock_result.text = "x" * 5000
+
+        mock_response = MagicMock()
+        mock_response.results = [mock_result]
+        mock_exa_client.get_contents.return_value = mock_response
+
+        from deerflow.community.exa.tools import web_fetch_tool
+
+        result = web_fetch_tool.invoke({"url": "https://example.com"})
+
+        # "# Long Page\n\n" is 14 chars, content truncated to 4096
+        content_after_header = result.split("\n\n", 1)[1]
+        assert len(content_after_header) == 4096
diff --git a/backend/uv.lock b/backend/uv.lock
index 45731fb04..92a20393e 100644
--- a/backend/uv.lock
+++ b/backend/uv.lock
@@ -722,6 +722,7 @@ dependencies = [
     { name = "ddgs" },
     { name = "dotenv" },
     { name = "duckdb" },
+    { name = "exa-py" },
     { name = "firecrawl-py" },
     { name = "httpx" },
     { name = "kubernetes" },
@@ -759,6 +760,7 @@ requires-dist = [
     { name = "ddgs", specifier = ">=9.10.0" },
     { name = "dotenv", specifier = ">=0.9.9" },
     { name = "duckdb", specifier = ">=1.4.4" },
+    { name = "exa-py", specifier = ">=1.0.0" },
     { name = "firecrawl-py", specifier = ">=1.15.0" },
     { name = "httpx", specifier = ">=0.28.0" },
     { name = "kubernetes", specifier = ">=30.0.0" },
@@ -871,6 +873,24 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
 ]
 
+[[package]]
+name = "exa-py"
+version = "2.10.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpcore" },
+    { name = "httpx" },
+    { name = "openai" },
+    { name = "pydantic" },
+    { name = "python-dotenv" },
+    { name = "requests" },
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/fb/bb/23c9f78edbf0e0d656839be7346a2f77b9caaae8cc3cb301012c46fd7dc5/exa_py-2.10.1.tar.gz", hash = "sha256:731958c2befc5fc82f031c93cfe7b3d55dc3b0e1bf32f83ec34d32a65ee31ba1", size = 53826, upload-time = "2026-03-25T00:50:49.286Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/fc/8d/0665263aa8d51ef8e2a3955e2b56496add4879730451961b09610bbc7036/exa_py-2.10.1-py3-none-any.whl", hash = "sha256:e2174c932764fff747e84e9e6d0637eaa4a6503556014df73a3427f42cc9d6a7", size = 72270, upload-time = "2026-03-25T00:50:47.721Z" },
+]
+
 [[package]]
 name = "fake-useragent"
 version = "2.2.0"
diff --git a/config.example.yaml b/config.example.yaml
index 96dc7b4a0..7edfe60ae 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -304,6 +304,23 @@ tools:
   #   # Used to limit the scope of search results, only returns content within the specified time range. Set to -1 to disable time filtering
   #   search_time_range: 10
 
+  # Web search tool (uses Exa, requires EXA_API_KEY)
+  # - name: web_search
+  #   group: web
+  #   use: deerflow.community.exa.tools:web_search_tool
+  #   max_results: 5
+  #   search_type: auto  # Options: auto, neural, keyword
+  #   contents_max_characters: 1000
+  #   # api_key: $EXA_API_KEY
+
+  # Web fetch tool (uses Exa)
+  # NOTE: Only one web_fetch provider can be active at a time.
+  # Comment out the Jina AI web_fetch entry below before enabling this one.
+  # - name: web_fetch
+  #   group: web
+  #   use: deerflow.community.exa.tools:web_fetch_tool
+  #   # api_key: $EXA_API_KEY
+
   # Web fetch tool (uses Jina AI reader)
   - name: web_fetch
     group: web