mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-25 11:18:22 +00:00
The exception handler in JinaClient.crawl used logger.exception, which emits an ERROR-level record with the full httpx/httpcore/anyio traceback for every transient network failure (timeout, connection refused). Other search/crawl providers in the project log the same class of recoverable failures as a single line. One offline/slow-network session could produce dozens of multi-frame ERROR stack traces, drowning out real problems. Switch to logger.warning with a concise message that includes the exception type and its str, matching the style used elsewhere for recoverable transient failures (aio_sandbox, ddg, etc.). The exception type now also surfaces into the returned "Error: ..." string so callers retain diagnostic signal. Adds a regression test that asserts the log record is WARNING, carries no exc_info, and includes the exception class name. Co-authored-by: voidborne-d <voidborne-d@users.noreply.github.com> Co-authored-by: Willem Jiang <willem.jiang@gmail.com>
This commit is contained in:
parent
80a7446fd6
commit
e8572b9d0c
@ -38,6 +38,6 @@ class JinaClient:
|
||||
|
||||
return response.text
|
||||
except Exception as e:
|
||||
error_message = f"Request to Jina API failed: {str(e)}"
|
||||
logger.exception(error_message)
|
||||
error_message = f"Request to Jina API failed: {type(e).__name__}: {e}"
|
||||
logger.warning(error_message)
|
||||
return f"Error: {error_message}"
|
||||
|
||||
@ -80,6 +80,28 @@ async def test_crawl_network_error(jina_client, monkeypatch):
|
||||
assert "failed" in result.lower()
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_transient_failure_logs_without_traceback(jina_client, monkeypatch, caplog):
|
||||
"""Transient network failures must log at WARNING without a traceback and include the exception type."""
|
||||
|
||||
async def mock_post(self, url, **kwargs):
|
||||
raise httpx.ConnectTimeout("timed out")
|
||||
|
||||
monkeypatch.setattr(httpx.AsyncClient, "post", mock_post)
|
||||
|
||||
with caplog.at_level(logging.DEBUG, logger="deerflow.community.jina_ai.jina_client"):
|
||||
result = await jina_client.crawl("https://example.com")
|
||||
|
||||
jina_records = [r for r in caplog.records if r.name == "deerflow.community.jina_ai.jina_client"]
|
||||
assert len(jina_records) == 1, f"expected exactly one log record, got {len(jina_records)}"
|
||||
record = jina_records[0]
|
||||
assert record.levelno == logging.WARNING, f"expected WARNING, got {record.levelname}"
|
||||
assert record.exc_info is None, "transient failures must not attach a traceback"
|
||||
assert "ConnectTimeout" in record.getMessage()
|
||||
assert result.startswith("Error:")
|
||||
assert "ConnectTimeout" in result
|
||||
|
||||
|
||||
@pytest.mark.anyio
|
||||
async def test_crawl_passes_headers(jina_client, monkeypatch):
|
||||
"""Test that correct headers are sent."""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user