From a664d2f5c4b2cbeb683e67e8bc48e2654d59695e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?d=20=F0=9F=94=B9?= Date: Thu, 16 Apr 2026 09:06:38 +0800 Subject: [PATCH] fix(checkpointer): create parent directory before opening SQLite in sync provider (#2272) * fix(checkpointer): create parent directory before opening SQLite in sync provider The sync checkpointer factory (_sync_checkpointer_cm) opens a SQLite connection without first ensuring the parent directory exists. The async provider and both store providers already call ensure_sqlite_parent_dir(), but this call was missing from the sync path. When the deer-flow harness package is used from an external virtualenv (where the .deer-flow directory is not pre-created), the missing parent directory causes: sqlite3.OperationalError: unable to open database file Add the missing ensure_sqlite_parent_dir() call in the sync SQLite branch, consistent with the async provider, and add a regression test. Closes #2259 * style: fix ruff format + add call-order assertion for ensure_parent_dir - Fix formatting in test_checkpointer.py (ruff format) - Add test_sqlite_ensure_parent_dir_before_connect to verify ensure_sqlite_parent_dir is called before from_conn_string (addresses Copilot review suggestion) --------- Co-authored-by: voidborne-d --- .../deerflow/agents/checkpointer/provider.py | 3 +- backend/tests/test_checkpointer.py | 73 +++++++++++++++++++ 2 files changed, 75 insertions(+), 1 deletion(-) diff --git a/backend/packages/harness/deerflow/agents/checkpointer/provider.py b/backend/packages/harness/deerflow/agents/checkpointer/provider.py index 6f09aac94..252e58be5 100644 --- a/backend/packages/harness/deerflow/agents/checkpointer/provider.py +++ b/backend/packages/harness/deerflow/agents/checkpointer/provider.py @@ -27,7 +27,7 @@ from langgraph.types import Checkpointer from deerflow.config.app_config import get_app_config from deerflow.config.checkpointer_config import CheckpointerConfig -from deerflow.runtime.store._sqlite_utils import resolve_sqlite_conn_str +from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str logger = logging.getLogger(__name__) @@ -67,6 +67,7 @@ def _sync_checkpointer_cm(config: CheckpointerConfig) -> Iterator[Checkpointer]: raise ImportError(SQLITE_INSTALL) from exc conn_str = resolve_sqlite_conn_str(config.connection_string or "store.db") + ensure_sqlite_parent_dir(conn_str) with SqliteSaver.from_conn_string(conn_str) as saver: saver.setup() logger.info("Checkpointer: using SqliteSaver (%s)", conn_str) diff --git a/backend/tests/test_checkpointer.py b/backend/tests/test_checkpointer.py index 79a4912d9..44db0e2d1 100644 --- a/backend/tests/test_checkpointer.py +++ b/backend/tests/test_checkpointer.py @@ -150,6 +150,79 @@ class TestGetCheckpointer: mock_saver_cls.from_conn_string.assert_called_once() mock_saver_instance.setup.assert_called_once() + def test_sqlite_creates_parent_dir(self): + """Sync SQLite checkpointer should call ensure_sqlite_parent_dir before connecting. + + This mirrors the async checkpointer's behaviour and prevents + 'sqlite3.OperationalError: unable to open database file' when the + parent directory for the database file does not yet exist (e.g. when + using the harness package from an external virtualenv where the + .deer-flow directory has not been created). + """ + load_checkpointer_config_from_dict({"type": "sqlite", "connection_string": "relative/test.db"}) + + mock_saver_instance = MagicMock() + mock_cm = MagicMock() + mock_cm.__enter__ = MagicMock(return_value=mock_saver_instance) + mock_cm.__exit__ = MagicMock(return_value=False) + + mock_saver_cls = MagicMock() + mock_saver_cls.from_conn_string = MagicMock(return_value=mock_cm) + + mock_module = MagicMock() + mock_module.SqliteSaver = mock_saver_cls + + with ( + patch.dict(sys.modules, {"langgraph.checkpoint.sqlite": mock_module}), + patch("deerflow.agents.checkpointer.provider.ensure_sqlite_parent_dir") as mock_ensure, + patch( + "deerflow.agents.checkpointer.provider.resolve_sqlite_conn_str", + return_value="/tmp/resolved/relative/test.db", + ), + ): + reset_checkpointer() + cp = get_checkpointer() + + assert cp is mock_saver_instance + mock_ensure.assert_called_once_with("/tmp/resolved/relative/test.db") + mock_saver_cls.from_conn_string.assert_called_once_with("/tmp/resolved/relative/test.db") + + def test_sqlite_ensure_parent_dir_before_connect(self): + """ensure_sqlite_parent_dir must be called before from_conn_string.""" + load_checkpointer_config_from_dict({"type": "sqlite", "connection_string": "relative/test.db"}) + + call_order = [] + + mock_saver_instance = MagicMock() + mock_cm = MagicMock() + mock_cm.__enter__ = MagicMock(return_value=mock_saver_instance) + mock_cm.__exit__ = MagicMock(return_value=False) + + mock_saver_cls = MagicMock() + mock_saver_cls.from_conn_string = MagicMock(side_effect=lambda *a, **kw: (call_order.append("connect"), mock_cm)[1]) + + mock_module = MagicMock() + mock_module.SqliteSaver = mock_saver_cls + + def record_ensure(*a, **kw): + call_order.append("ensure") + + with ( + patch.dict(sys.modules, {"langgraph.checkpoint.sqlite": mock_module}), + patch( + "deerflow.agents.checkpointer.provider.ensure_sqlite_parent_dir", + side_effect=record_ensure, + ), + patch( + "deerflow.agents.checkpointer.provider.resolve_sqlite_conn_str", + return_value="/tmp/resolved/relative/test.db", + ), + ): + reset_checkpointer() + get_checkpointer() + + assert call_order == ["ensure", "connect"] + def test_postgres_creates_saver(self): """Postgres checkpointer is created when packages are available.""" load_checkpointer_config_from_dict({"type": "postgres", "connection_string": "postgresql://localhost/db"})