From e8572b9d0c39fbfcf6b20fdf3d5871912345593a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?d=20=F0=9F=94=B9?= Date: Fri, 24 Apr 2026 16:00:14 +0800 Subject: [PATCH] fix(jina): log transient failures at WARNING without traceback (#2484) (#2485) The exception handler in JinaClient.crawl used logger.exception, which emits an ERROR-level record with the full httpx/httpcore/anyio traceback for every transient network failure (timeout, connection refused). Other search/crawl providers in the project log the same class of recoverable failures as a single line. One offline/slow-network session could produce dozens of multi-frame ERROR stack traces, drowning out real problems. Switch to logger.warning with a concise message that includes the exception type and its str, matching the style used elsewhere for recoverable transient failures (aio_sandbox, ddg, etc.). The exception type now also surfaces into the returned "Error: ..." string so callers retain diagnostic signal. Adds a regression test that asserts the log record is WARNING, carries no exc_info, and includes the exception class name. Co-authored-by: voidborne-d Co-authored-by: Willem Jiang --- .../deerflow/community/jina_ai/jina_client.py | 4 ++-- backend/tests/test_jina_client.py | 22 +++++++++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/backend/packages/harness/deerflow/community/jina_ai/jina_client.py b/backend/packages/harness/deerflow/community/jina_ai/jina_client.py index 3adc5458a..c4fc1ac81 100644 --- a/backend/packages/harness/deerflow/community/jina_ai/jina_client.py +++ b/backend/packages/harness/deerflow/community/jina_ai/jina_client.py @@ -38,6 +38,6 @@ class JinaClient: return response.text except Exception as e: - error_message = f"Request to Jina API failed: {str(e)}" - logger.exception(error_message) + error_message = f"Request to Jina API failed: {type(e).__name__}: {e}" + logger.warning(error_message) return f"Error: {error_message}" diff --git a/backend/tests/test_jina_client.py b/backend/tests/test_jina_client.py index 5a1d6f6fa..b1856e4ae 100644 --- a/backend/tests/test_jina_client.py +++ b/backend/tests/test_jina_client.py @@ -80,6 +80,28 @@ async def test_crawl_network_error(jina_client, monkeypatch): assert "failed" in result.lower() +@pytest.mark.anyio +async def test_crawl_transient_failure_logs_without_traceback(jina_client, monkeypatch, caplog): + """Transient network failures must log at WARNING without a traceback and include the exception type.""" + + async def mock_post(self, url, **kwargs): + raise httpx.ConnectTimeout("timed out") + + monkeypatch.setattr(httpx.AsyncClient, "post", mock_post) + + with caplog.at_level(logging.DEBUG, logger="deerflow.community.jina_ai.jina_client"): + result = await jina_client.crawl("https://example.com") + + jina_records = [r for r in caplog.records if r.name == "deerflow.community.jina_ai.jina_client"] + assert len(jina_records) == 1, f"expected exactly one log record, got {len(jina_records)}" + record = jina_records[0] + assert record.levelno == logging.WARNING, f"expected WARNING, got {record.levelname}" + assert record.exc_info is None, "transient failures must not attach a traceback" + assert "ConnectTimeout" in record.getMessage() + assert result.startswith("Error:") + assert "ConnectTimeout" in result + + @pytest.mark.anyio async def test_crawl_passes_headers(jina_client, monkeypatch): """Test that correct headers are sent."""