fix(jina): log transient failures at WARNING without traceback (#2484) (#2485)

The exception handler in JinaClient.crawl used logger.exception, which
emits an ERROR-level record with the full httpx/httpcore/anyio traceback
for every transient network failure (timeout, connection refused). Other
search/crawl providers in the project log the same class of recoverable
failures as a single line. One offline/slow-network session could produce
dozens of multi-frame ERROR stack traces, drowning out real problems.

Switch to logger.warning with a concise message that includes the
exception type and its str, matching the style used elsewhere for
recoverable transient failures (aio_sandbox, ddg, etc.). The exception
type now also surfaces into the returned "Error: ..." string so callers
retain diagnostic signal.

Adds a regression test that asserts the log record is WARNING, carries
no exc_info, and includes the exception class name.

Co-authored-by: voidborne-d <voidborne-d@users.noreply.github.com>
Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
d 🔹 2026-04-24 16:00:14 +08:00 committed by GitHub
parent 80a7446fd6
commit e8572b9d0c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 24 additions and 2 deletions

View File

@ -38,6 +38,6 @@ class JinaClient:
return response.text return response.text
except Exception as e: except Exception as e:
error_message = f"Request to Jina API failed: {str(e)}" error_message = f"Request to Jina API failed: {type(e).__name__}: {e}"
logger.exception(error_message) logger.warning(error_message)
return f"Error: {error_message}" return f"Error: {error_message}"

View File

@ -80,6 +80,28 @@ async def test_crawl_network_error(jina_client, monkeypatch):
assert "failed" in result.lower() assert "failed" in result.lower()
@pytest.mark.anyio
async def test_crawl_transient_failure_logs_without_traceback(jina_client, monkeypatch, caplog):
"""Transient network failures must log at WARNING without a traceback and include the exception type."""
async def mock_post(self, url, **kwargs):
raise httpx.ConnectTimeout("timed out")
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
with caplog.at_level(logging.DEBUG, logger="deerflow.community.jina_ai.jina_client"):
result = await jina_client.crawl("https://example.com")
jina_records = [r for r in caplog.records if r.name == "deerflow.community.jina_ai.jina_client"]
assert len(jina_records) == 1, f"expected exactly one log record, got {len(jina_records)}"
record = jina_records[0]
assert record.levelno == logging.WARNING, f"expected WARNING, got {record.levelname}"
assert record.exc_info is None, "transient failures must not attach a traceback"
assert "ConnectTimeout" in record.getMessage()
assert result.startswith("Error:")
assert "ConnectTimeout" in result
@pytest.mark.anyio @pytest.mark.anyio
async def test_crawl_passes_headers(jina_client, monkeypatch): async def test_crawl_passes_headers(jina_client, monkeypatch):
"""Test that correct headers are sent.""" """Test that correct headers are sent."""