Fix the issues when reviewing 2566 persistant part (#2604)

* Fix the code review command of journal & event store P0,P1 issues * Fix the code review command of journal & event store P2 issues * Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Update backend/packages/harness/deerflow/runtime/journal.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Refactor logger debug message formatting --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
2026-04-28 12:48:40 +00:00 · 2026-04-28 11:44:40 +08:00 · 2026-04-28 11:44:40 +08:00 · 69649d8aae
commit 69649d8aae
parent 4e4e4f92a0
3 changed files with 82 additions and 19 deletions
--- a/backend/packages/harness/deerflow/runtime/converters.py
+++ b/backend/packages/harness/deerflow/runtime/converters.py
@ -1,6 +1,8 @@
 """Pure functions to convert LangChain message objects to OpenAI Chat Completions format.

-Used by RunJournal to build content dicts for event storage.
+Utility for translating LangChain message types to OpenAI-compatible dicts.
+Not currently wired into RunJournal (which uses message.model_dump() directly),
+but available for consumers that need the OpenAI wire format.
 """

 from __future__ import annotations
--- a/backend/packages/harness/deerflow/runtime/journal.py
+++ b/backend/packages/harness/deerflow/runtime/journal.py
@ -62,9 +62,6 @@ class RunJournal(BaseCallbackHandler):
        self._total_output_tokens = 0
        self._total_tokens = 0
        self._llm_call_count = 0
-        self._lead_agent_tokens = 0
-        self._subagent_tokens = 0
-        self._middleware_tokens = 0

        # Convenience fields
        self._last_ai_msg: str | None = None
@ -76,7 +73,7 @@ class RunJournal(BaseCallbackHandler):

        # LLM request/response tracking
        self._llm_call_index = 0
-        self._cached_prompts: dict[str, list[dict]] = {}  # langchain run_id -> OpenAI messages
+        self._seen_llm_starts: set[str] = set()  # langchain run_ids that fired on_chat_model_start

    # -- Lifecycle callbacks --

@ -135,8 +132,7 @@ class RunJournal(BaseCallbackHandler):
        rid = str(run_id)
        self._llm_start_times[rid] = time.monotonic()
        self._llm_call_index += 1
-        # Mark this run_id as seen so on_llm_end knows not to increment again.
-        self._cached_prompts[rid] = []
+        self._seen_llm_starts.add(rid)

        logger.debug(
            "on_chat_model_start %s: tags=%s num_batches=%d message_counts=%s",
@ -148,8 +144,8 @@ class RunJournal(BaseCallbackHandler):

        # Capture the first human message sent to any LLM in this run.
        if not self._first_human_msg and messages:
-            for batch in messages.reversed():
-                for m in batch.reversed():
+            for batch in reversed(messages):
+                for m in reversed(batch):
                    if isinstance(m, HumanMessage) and m.name != "summary":
                        caller = self._identify_caller(tags)
                        self.set_first_human_message(m.text)
@ -167,9 +163,17 @@ class RunJournal(BaseCallbackHandler):
        # Fallback: on_chat_model_start is preferred. This just tracks latency.
        self._llm_start_times[str(run_id)] = time.monotonic()

-    def on_llm_end(self, response, *, run_id, parent_run_id, tags, **kwargs) -> None:
+    def on_llm_end(
+        self,
+        response: Any,
+        *,
+        run_id: UUID,
+        parent_run_id: UUID | None = None,
+        tags: list[str] | None = None,
+        **kwargs: Any,
+    ) -> None:
        messages: list[AnyMessage] = []
-        logger.info(f"on_llm_end {run_id}: response: {tags} {kwargs}")
+        logger.debug("on_llm_end %s: tags=%s", run_id, tags)
        for generation in response.generations:
            for gen in generation:
                if hasattr(gen, "message"):
@ -191,10 +195,11 @@ class RunJournal(BaseCallbackHandler):

            # Resolve call index
            call_index = self._llm_call_index
-            if rid not in self._cached_prompts:
+            if rid not in self._seen_llm_starts:
                # Fallback: on_chat_model_start was not called
                self._llm_call_index += 1
                call_index = self._llm_call_index
+                self._seen_llm_starts.add(rid)

            # Trace event: llm_response (OpenAI completion format)
            self._put(
@ -229,7 +234,7 @@ class RunJournal(BaseCallbackHandler):
    def on_tool_start(self, serialized, input_str, *, run_id, parent_run_id=None, tags=None, metadata=None, inputs=None, **kwargs):
        """Handle tool start event, cache tool call ID for later correlation"""
        tool_call_id = str(run_id)
-        logger.info(f"Tool start for node {run_id}, tool_call_id={tool_call_id}, tags={tags}, metadata={metadata}")
+        logger.debug("Tool start for node %s, tool_call_id=%s, tags=%s", run_id, tool_call_id, tags)

    def on_tool_end(self, output, *, run_id, parent_run_id=None, **kwargs):
        """Handle tool end event, append message and clear node data"""
@ -248,7 +253,7 @@ class RunJournal(BaseCallbackHandler):
            else:
                logger.warning(f"on_tool_end {run_id}: output is not ToolMessage: {type(output)}")
        finally:
-            logger.info(f"Tool end for node {run_id}")
+            logger.debug("Tool end for node %s", run_id)

    # -- Internal methods --

@ -313,8 +318,8 @@ class RunJournal(BaseCallbackHandler):
        if exc:
            logger.warning("Journal flush task failed: %s", exc)

-    def _identify_caller(self, tags: list[str] | None, **kwargs) -> str:
-        _tags = tags or kwargs.get("tags", [])
+    def _identify_caller(self, tags: list[str] | None) -> str:
+        _tags = tags or []
        for tag in _tags:
            if isinstance(tag, str) and (tag.startswith("subagent:") or tag.startswith("middleware:") or tag == "lead_agent"):
                return tag
@ -371,9 +376,6 @@ class RunJournal(BaseCallbackHandler):
            "total_output_tokens": self._total_output_tokens,
            "total_tokens": self._total_tokens,
            "llm_call_count": self._llm_call_count,
-            "lead_agent_tokens": self._lead_agent_tokens,
-            "subagent_tokens": self._subagent_tokens,
-            "middleware_tokens": self._middleware_tokens,
            "message_count": self._msg_count,
            "last_ai_message": self._last_ai_msg,
            "first_human_message": self._first_human_msg,
--- a/backend/tests/test_run_journal.py
+++ b/backend/tests/test_run_journal.py
@ -381,3 +381,62 @@ class TestMiddlewareEvents:
        event_types = {e["event_type"] for e in events}
        assert "middleware:title" in event_types
        assert "middleware:guardrail" in event_types
+
+
+class TestChatModelStartHumanMessage:
+    """Tests for on_chat_model_start extracting the first human message."""
+
+    @pytest.mark.anyio
+    async def test_extracts_first_human_message(self, journal_setup):
+        """on_chat_model_start captures the first HumanMessage from prompts."""
+        from langchain_core.messages import AIMessage, HumanMessage
+
+        j, store = journal_setup
+        messages_batch = [
+            [HumanMessage(content="What is AI?"), AIMessage(content="Hi there")],
+        ]
+        j.on_chat_model_start({}, messages_batch, run_id=uuid4(), tags=["lead_agent"])
+        await j.flush()
+
+        assert j._first_human_msg == "What is AI?"
+        events = await store.list_events("t1", "r1")
+        human_events = [e for e in events if e["event_type"] == "llm.human.input"]
+        assert len(human_events) == 1
+        assert human_events[0]["content"]["content"] == "What is AI?"
+
+    @pytest.mark.anyio
+    async def test_skips_summary_named_human_messages(self, journal_setup):
+        """HumanMessages with name='summary' are skipped."""
+        from langchain_core.messages import HumanMessage
+
+        j, store = journal_setup
+        messages_batch = [
+            [HumanMessage(content="Summarized context", name="summary"), HumanMessage(content="Real question")],
+        ]
+        j.on_chat_model_start({}, messages_batch, run_id=uuid4(), tags=["lead_agent"])
+        await j.flush()
+
+        assert j._first_human_msg == "Real question"
+
+    @pytest.mark.anyio
+    async def test_only_first_human_message_captured(self, journal_setup):
+        """Subsequent on_chat_model_start calls do not overwrite the first message."""
+        from langchain_core.messages import HumanMessage
+
+        j, store = journal_setup
+        j.on_chat_model_start({}, [[HumanMessage(content="First question")]], run_id=uuid4(), tags=["lead_agent"])
+        j.on_chat_model_start({}, [[HumanMessage(content="Second question")]], run_id=uuid4(), tags=["lead_agent"])
+        await j.flush()
+
+        assert j._first_human_msg == "First question"
+        events = await store.list_events("t1", "r1")
+        human_events = [e for e in events if e["event_type"] == "llm.human.input"]
+        assert len(human_events) == 1
+
+    @pytest.mark.anyio
+    async def test_empty_messages_no_crash(self, journal_setup):
+        """on_chat_model_start with empty messages does not crash."""
+        j, store = journal_setup
+        j.on_chat_model_start({}, [], run_id=uuid4(), tags=["lead_agent"])
+        await j.flush()
+        assert j._first_human_msg is None