diff --git a/backend/packages/harness/deerflow/community/jina_ai/tools.py b/backend/packages/harness/deerflow/community/jina_ai/tools.py index 9f243ecd9..760e6a3b6 100644 --- a/backend/packages/harness/deerflow/community/jina_ai/tools.py +++ b/backend/packages/harness/deerflow/community/jina_ai/tools.py @@ -1,3 +1,5 @@ +import asyncio + from langchain.tools import tool from deerflow.community.jina_ai.jina_client import JinaClient @@ -26,5 +28,5 @@ async def web_fetch_tool(url: str) -> str: html_content = await jina_client.crawl(url, return_format="html", timeout=timeout) if isinstance(html_content, str) and html_content.startswith("Error:"): return html_content - article = readability_extractor.extract_article(html_content) + article = await asyncio.to_thread(readability_extractor.extract_article, html_content) return article.to_markdown()[:4096] diff --git a/backend/tests/test_jina_client.py b/backend/tests/test_jina_client.py index 037436f73..5a1d6f6fa 100644 --- a/backend/tests/test_jina_client.py +++ b/backend/tests/test_jina_client.py @@ -175,3 +175,30 @@ async def test_web_fetch_tool_returns_markdown_on_success(monkeypatch): result = await web_fetch_tool.ainvoke("https://example.com") assert "Hello world" in result assert not result.startswith("Error:") + + +@pytest.mark.anyio +async def test_web_fetch_tool_offloads_extraction_to_thread(monkeypatch): + """Test that readability extraction is offloaded via asyncio.to_thread to avoid blocking the event loop.""" + import asyncio + + async def mock_crawl(self, url, **kwargs): + return "

threaded

" + + mock_config = MagicMock() + mock_config.get_tool_config.return_value = None + monkeypatch.setattr("deerflow.community.jina_ai.tools.get_app_config", lambda: mock_config) + monkeypatch.setattr(JinaClient, "crawl", mock_crawl) + + to_thread_called = False + original_to_thread = asyncio.to_thread + + async def tracking_to_thread(func, *args, **kwargs): + nonlocal to_thread_called + to_thread_called = True + return await original_to_thread(func, *args, **kwargs) + + monkeypatch.setattr("deerflow.community.jina_ai.tools.asyncio.to_thread", tracking_to_thread) + result = await web_fetch_tool.ainvoke("https://example.com") + assert to_thread_called, "extract_article must be called via asyncio.to_thread to avoid blocking the event loop" + assert "threaded" in result