diff --git a/.env.example b/.env.example index f443818b3..41d87a8c7 100644 --- a/.env.example +++ b/.env.example @@ -1,3 +1,6 @@ +# Serper API Key (Google Search) - https://serper.dev +SERPER_API_KEY=your-serper-api-key + # TAVILY API Key TAVILY_API_KEY=your-tavily-api-key diff --git a/backend/packages/harness/deerflow/community/serper/__init__.py b/backend/packages/harness/deerflow/community/serper/__init__.py new file mode 100644 index 000000000..876167859 --- /dev/null +++ b/backend/packages/harness/deerflow/community/serper/__init__.py @@ -0,0 +1,3 @@ +from .tools import web_search_tool + +__all__ = ["web_search_tool"] diff --git a/backend/packages/harness/deerflow/community/serper/tools.py b/backend/packages/harness/deerflow/community/serper/tools.py new file mode 100644 index 000000000..1cad11fb8 --- /dev/null +++ b/backend/packages/harness/deerflow/community/serper/tools.py @@ -0,0 +1,95 @@ +""" +Web Search Tool - Search the web using Serper (Google Search API). + +Serper provides real-time Google Search results via a JSON API. +An API key is required. Sign up at https://serper.dev to get one. +""" + +import json +import logging +import os + +import httpx +from langchain.tools import tool + +from deerflow.config import get_app_config + +logger = logging.getLogger(__name__) + +_SERPER_ENDPOINT = "https://google.serper.dev/search" +_api_key_warned = False + + +def _get_api_key() -> str | None: + config = get_app_config().get_tool_config("web_search") + if config is not None: + api_key = config.model_extra.get("api_key") + if isinstance(api_key, str) and api_key.strip(): + return api_key + return os.getenv("SERPER_API_KEY") + + +@tool("web_search", parse_docstring=True) +def web_search_tool(query: str, max_results: int = 5) -> str: + """Search the web for information using Google Search via Serper. + + Args: + query: Search keywords describing what you want to find. Be specific for better results. + max_results: Maximum number of search results to return. Default is 5. + """ + global _api_key_warned + + config = get_app_config().get_tool_config("web_search") + if config is not None and "max_results" in config.model_extra: + max_results = config.model_extra.get("max_results", max_results) + + api_key = _get_api_key() + if not api_key: + if not _api_key_warned: + _api_key_warned = True + logger.warning("Serper API key is not set. Set SERPER_API_KEY in your environment or provide api_key in config.yaml. Sign up at https://serper.dev") + return json.dumps( + {"error": "SERPER_API_KEY is not configured", "query": query}, + ensure_ascii=False, + ) + + headers = { + "X-API-KEY": api_key, + "Content-Type": "application/json", + } + payload = {"q": query, "num": max_results} + + try: + with httpx.Client(timeout=30) as client: + response = client.post(_SERPER_ENDPOINT, headers=headers, json=payload) + response.raise_for_status() + data = response.json() + except httpx.HTTPStatusError as e: + logger.error(f"Serper API returned HTTP {e.response.status_code}: {e.response.text}") + return json.dumps( + {"error": f"Serper API error: HTTP {e.response.status_code}", "query": query}, + ensure_ascii=False, + ) + except Exception as e: + logger.error(f"Serper search failed: {type(e).__name__}: {e}") + return json.dumps({"error": str(e), "query": query}, ensure_ascii=False) + + organic = data.get("organic", []) + if not organic: + return json.dumps({"error": "No results found", "query": query}, ensure_ascii=False) + + normalized_results = [ + { + "title": r.get("title", ""), + "url": r.get("link", ""), + "content": r.get("snippet", ""), + } + for r in organic[:max_results] + ] + + output = { + "query": query, + "total_results": len(normalized_results), + "results": normalized_results, + } + return json.dumps(output, indent=2, ensure_ascii=False) diff --git a/backend/tests/test_serper_tools.py b/backend/tests/test_serper_tools.py new file mode 100644 index 000000000..2e53b0351 --- /dev/null +++ b/backend/tests/test_serper_tools.py @@ -0,0 +1,308 @@ +"""Unit tests for the Serper community web search tool.""" + +import json +from unittest.mock import MagicMock, patch + +import httpx +import pytest + + +@pytest.fixture(autouse=True) +def reset_api_key_warned(): + """Reset the module-level warning flag before each test.""" + import deerflow.community.serper.tools as serper_mod + + serper_mod._api_key_warned = False + yield + serper_mod._api_key_warned = False + + +@pytest.fixture +def mock_config_with_key(): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": "test-serper-key", "max_results": 5} + mock.return_value.get_tool_config.return_value = tool_config + yield mock + + +@pytest.fixture +def mock_config_no_key(): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {} + mock.return_value.get_tool_config.return_value = tool_config + yield mock + + +def _make_serper_response(organic: list) -> MagicMock: + mock_resp = MagicMock() + mock_resp.json.return_value = {"organic": organic} + mock_resp.raise_for_status = MagicMock() + return mock_resp + + +class TestGetApiKey: + def test_returns_config_key_when_present(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": "from-config"} + mock.return_value.get_tool_config.return_value = tool_config + + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key() == "from-config" + + def test_falls_back_to_env_when_config_key_empty(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": ""} + mock.return_value.get_tool_config.return_value = tool_config + with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key() == "env-key" + + def test_falls_back_to_env_when_config_key_whitespace(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": " "} + mock.return_value.get_tool_config.return_value = tool_config + with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key() == "env-key" + + def test_falls_back_to_env_when_config_key_null(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": None} + mock.return_value.get_tool_config.return_value = tool_config + with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key() == "env-key" + + def test_falls_back_to_env_when_no_config(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + mock.return_value.get_tool_config.return_value = None + with patch.dict("os.environ", {"SERPER_API_KEY": "env-only"}): + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key() == "env-only" + + def test_returns_none_when_no_key_anywhere(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + mock.return_value.get_tool_config.return_value = None + with patch.dict("os.environ", {}, clear=True): + import os + + os.environ.pop("SERPER_API_KEY", None) + from deerflow.community.serper.tools import _get_api_key + + assert _get_api_key() is None + + +class TestWebSearchTool: + def test_basic_search_returns_normalized_results(self, mock_config_with_key): + organic = [ + {"title": "Result 1", "link": "https://example.com/1", "snippet": "Snippet 1"}, + {"title": "Result 2", "link": "https://example.com/2", "snippet": "Snippet 2"}, + ] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "python tutorial"}) + parsed = json.loads(result) + + assert parsed["query"] == "python tutorial" + assert parsed["total_results"] == 2 + assert parsed["results"][0]["title"] == "Result 1" + assert parsed["results"][0]["url"] == "https://example.com/1" + assert parsed["results"][0]["content"] == "Snippet 1" + + def test_respects_max_results_from_config(self, mock_config_with_key): + mock_config_with_key.return_value.get_tool_config.return_value.model_extra = { + "api_key": "test-key", + "max_results": 3, + } + organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["total_results"] == 3 + assert len(parsed["results"]) == 3 + + def test_max_results_parameter_accepted(self, mock_config_no_key): + """Tool accepts max_results as a call parameter when config does not override it.""" + organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)] + mock_resp = _make_serper_response(organic) + + with patch.dict("os.environ", {"SERPER_API_KEY": "env-key"}): + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test", "max_results": 2}) + parsed = json.loads(result) + + assert parsed["total_results"] == 2 + + def test_config_max_results_overrides_parameter(self): + """Config max_results overrides the parameter passed at call time, matching ddg_search behaviour.""" + with patch("deerflow.community.serper.tools.get_app_config") as mock: + tool_config = MagicMock() + tool_config.model_extra = {"api_key": "test-key", "max_results": 3} + mock.return_value.get_tool_config.return_value = tool_config + + organic = [{"title": f"R{i}", "link": f"https://x.com/{i}", "snippet": f"S{i}"} for i in range(10)] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test", "max_results": 8}) + parsed = json.loads(result) + + assert parsed["total_results"] == 3 + + def test_empty_organic_returns_error_json(self, mock_config_with_key): + """Empty organic list returns structured error, matching ddg_search convention.""" + mock_resp = _make_serper_response([]) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "no results"}) + parsed = json.loads(result) + + assert "error" in parsed + assert parsed["error"] == "No results found" + assert parsed["query"] == "no results" + + def test_missing_api_key_returns_error_json(self, mock_config_no_key): + with patch.dict("os.environ", {}, clear=True): + import os + + os.environ.pop("SERPER_API_KEY", None) + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert "SERPER_API_KEY" in parsed["error"] + + def test_missing_api_key_logs_warning_once(self, mock_config_no_key, caplog): + import logging + + with patch.dict("os.environ", {}, clear=True): + import os + + os.environ.pop("SERPER_API_KEY", None) + + from deerflow.community.serper.tools import web_search_tool + + with caplog.at_level(logging.WARNING, logger="deerflow.community.serper.tools"): + web_search_tool.invoke({"query": "q1"}) + web_search_tool.invoke({"query": "q2"}) + + warnings = [r for r in caplog.records if r.levelno == logging.WARNING] + assert len(warnings) == 1 + + def test_http_error_returns_structured_error(self, mock_config_with_key): + mock_error_response = MagicMock() + mock_error_response.status_code = 403 + mock_error_response.text = "Forbidden" + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.side_effect = httpx.HTTPStatusError("403", request=MagicMock(), response=mock_error_response) + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + assert "403" in parsed["error"] + + def test_network_exception_returns_error_json(self, mock_config_with_key): + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.side_effect = Exception("timeout") + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert "error" in parsed + + def test_sends_correct_headers_and_payload(self, mock_config_with_key): + organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + web_search_tool.invoke({"query": "hello world"}) + + call_kwargs = mock_post.call_args + headers = call_kwargs.kwargs["headers"] + payload = call_kwargs.kwargs["json"] + + assert headers["X-API-KEY"] == "test-serper-key" + assert payload["q"] == "hello world" + assert payload["num"] == 5 + + def test_uses_env_key_when_config_absent(self): + with patch("deerflow.community.serper.tools.get_app_config") as mock: + mock.return_value.get_tool_config.return_value = None + with patch.dict("os.environ", {"SERPER_API_KEY": "env-only-key"}): + organic = [{"title": "T", "link": "https://x.com", "snippet": "S"}] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_post = mock_client_cls.return_value.__enter__.return_value.post + mock_post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + web_search_tool.invoke({"query": "env key test"}) + headers = mock_post.call_args.kwargs["headers"] + + assert headers["X-API-KEY"] == "env-only-key" + + def test_partial_fields_in_organic_result(self, mock_config_with_key): + """Missing title/link/snippet should default to empty string.""" + organic = [{}] + mock_resp = _make_serper_response(organic) + + with patch("deerflow.community.serper.tools.httpx.Client") as mock_client_cls: + mock_client_cls.return_value.__enter__.return_value.post.return_value = mock_resp + + from deerflow.community.serper.tools import web_search_tool + + result = web_search_tool.invoke({"query": "test"}) + parsed = json.loads(result) + + assert parsed["results"][0] == {"title": "", "url": "", "content": ""} diff --git a/config.example.yaml b/config.example.yaml index b16b4a6bb..7e282e46e 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -373,6 +373,16 @@ tools: use: deerflow.community.ddg_search.tools:web_search_tool max_results: 5 + # Web search tool (uses Serper - Google Search API, requires SERPER_API_KEY) + # Serper provides real-time Google Search results. Sign up at https://serper.dev + # Note: set SERPER_API_KEY in your environment before starting the app, or + # uncomment and fill in api_key below (the $VAR syntax is resolved at startup). + # - name: web_search + # group: web + # use: deerflow.community.serper.tools:web_search_tool + # max_results: 5 + # # api_key: $SERPER_API_KEY # Optional if SERPER_API_KEY env var is set + # Web search tool (requires Tavily API key) # - name: web_search # group: web