fix(checkpointer): create parent directory before opening SQLite in sync provider (#2272)

* fix(checkpointer): create parent directory before opening SQLite in sync provider

The sync checkpointer factory (_sync_checkpointer_cm) opens a SQLite
connection without first ensuring the parent directory exists.  The async
provider and both store providers already call ensure_sqlite_parent_dir(),
but this call was missing from the sync path.

When the deer-flow harness package is used from an external virtualenv
(where the .deer-flow directory is not pre-created), the missing parent
directory causes:

    sqlite3.OperationalError: unable to open database file

Add the missing ensure_sqlite_parent_dir() call in the sync SQLite
branch, consistent with the async provider, and add a regression test.

Closes #2259

* style: fix ruff format + add call-order assertion for ensure_parent_dir

- Fix formatting in test_checkpointer.py (ruff format)
- Add test_sqlite_ensure_parent_dir_before_connect to verify
  ensure_sqlite_parent_dir is called before from_conn_string
  (addresses Copilot review suggestion)

---------

Co-authored-by: voidborne-d <voidborne-d@users.noreply.github.com>
This commit is contained in:
d 🔹 2026-04-16 09:06:38 +08:00 committed by GitHub
parent 105db00987
commit a664d2f5c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 75 additions and 1 deletions

View File

@ -27,7 +27,7 @@ from langgraph.types import Checkpointer
from deerflow.config.app_config import get_app_config
from deerflow.config.checkpointer_config import CheckpointerConfig
from deerflow.runtime.store._sqlite_utils import resolve_sqlite_conn_str
from deerflow.runtime.store._sqlite_utils import ensure_sqlite_parent_dir, resolve_sqlite_conn_str
logger = logging.getLogger(__name__)
@ -67,6 +67,7 @@ def _sync_checkpointer_cm(config: CheckpointerConfig) -> Iterator[Checkpointer]:
raise ImportError(SQLITE_INSTALL) from exc
conn_str = resolve_sqlite_conn_str(config.connection_string or "store.db")
ensure_sqlite_parent_dir(conn_str)
with SqliteSaver.from_conn_string(conn_str) as saver:
saver.setup()
logger.info("Checkpointer: using SqliteSaver (%s)", conn_str)

View File

@ -150,6 +150,79 @@ class TestGetCheckpointer:
mock_saver_cls.from_conn_string.assert_called_once()
mock_saver_instance.setup.assert_called_once()
def test_sqlite_creates_parent_dir(self):
"""Sync SQLite checkpointer should call ensure_sqlite_parent_dir before connecting.
This mirrors the async checkpointer's behaviour and prevents
'sqlite3.OperationalError: unable to open database file' when the
parent directory for the database file does not yet exist (e.g. when
using the harness package from an external virtualenv where the
.deer-flow directory has not been created).
"""
load_checkpointer_config_from_dict({"type": "sqlite", "connection_string": "relative/test.db"})
mock_saver_instance = MagicMock()
mock_cm = MagicMock()
mock_cm.__enter__ = MagicMock(return_value=mock_saver_instance)
mock_cm.__exit__ = MagicMock(return_value=False)
mock_saver_cls = MagicMock()
mock_saver_cls.from_conn_string = MagicMock(return_value=mock_cm)
mock_module = MagicMock()
mock_module.SqliteSaver = mock_saver_cls
with (
patch.dict(sys.modules, {"langgraph.checkpoint.sqlite": mock_module}),
patch("deerflow.agents.checkpointer.provider.ensure_sqlite_parent_dir") as mock_ensure,
patch(
"deerflow.agents.checkpointer.provider.resolve_sqlite_conn_str",
return_value="/tmp/resolved/relative/test.db",
),
):
reset_checkpointer()
cp = get_checkpointer()
assert cp is mock_saver_instance
mock_ensure.assert_called_once_with("/tmp/resolved/relative/test.db")
mock_saver_cls.from_conn_string.assert_called_once_with("/tmp/resolved/relative/test.db")
def test_sqlite_ensure_parent_dir_before_connect(self):
"""ensure_sqlite_parent_dir must be called before from_conn_string."""
load_checkpointer_config_from_dict({"type": "sqlite", "connection_string": "relative/test.db"})
call_order = []
mock_saver_instance = MagicMock()
mock_cm = MagicMock()
mock_cm.__enter__ = MagicMock(return_value=mock_saver_instance)
mock_cm.__exit__ = MagicMock(return_value=False)
mock_saver_cls = MagicMock()
mock_saver_cls.from_conn_string = MagicMock(side_effect=lambda *a, **kw: (call_order.append("connect"), mock_cm)[1])
mock_module = MagicMock()
mock_module.SqliteSaver = mock_saver_cls
def record_ensure(*a, **kw):
call_order.append("ensure")
with (
patch.dict(sys.modules, {"langgraph.checkpoint.sqlite": mock_module}),
patch(
"deerflow.agents.checkpointer.provider.ensure_sqlite_parent_dir",
side_effect=record_ensure,
),
patch(
"deerflow.agents.checkpointer.provider.resolve_sqlite_conn_str",
return_value="/tmp/resolved/relative/test.db",
),
):
reset_checkpointer()
get_checkpointer()
assert call_order == ["ensure", "connect"]
def test_postgres_creates_saver(self):
"""Postgres checkpointer is created when packages are available."""
load_checkpointer_config_from_dict({"type": "postgres", "connection_string": "postgresql://localhost/db"})