From bfbb3e1b8ddef87b02ceb53cc65e61ca4dc40384 Mon Sep 17 00:00:00 2001 From: rayhpeng Date: Sat, 4 Apr 2026 09:00:12 +0800 Subject: [PATCH] feat(converters): add LangChain-to-OpenAI message format converters Pure functions langchain_to_openai_message, langchain_to_openai_completion, langchain_messages_to_openai, and _infer_finish_reason for converting LangChain BaseMessage objects to OpenAI Chat Completions format, used by RunJournal for event storage. 15 unit tests added. Co-Authored-By: Claude Opus 4.6 (1M context) --- .../harness/deerflow/runtime/converters.py | 132 ++++++++++++ backend/tests/test_converters.py | 195 ++++++++++++++++++ 2 files changed, 327 insertions(+) create mode 100644 backend/packages/harness/deerflow/runtime/converters.py create mode 100644 backend/tests/test_converters.py diff --git a/backend/packages/harness/deerflow/runtime/converters.py b/backend/packages/harness/deerflow/runtime/converters.py new file mode 100644 index 000000000..8fa2068dc --- /dev/null +++ b/backend/packages/harness/deerflow/runtime/converters.py @@ -0,0 +1,132 @@ +"""Pure functions to convert LangChain message objects to OpenAI Chat Completions format. + +Used by RunJournal to build content dicts for event storage. +""" + +from __future__ import annotations + +import json +from typing import Any + +_ROLE_MAP = { + "human": "user", + "ai": "assistant", + "system": "system", + "tool": "tool", +} + + +def langchain_to_openai_message(message: Any) -> dict: + """Convert a single LangChain BaseMessage to an OpenAI message dict. + + Handles: + - HumanMessage → {"role": "user", "content": "..."} + - AIMessage (text only) → {"role": "assistant", "content": "..."} + - AIMessage (with tool_calls) → {"role": "assistant", "content": null, "tool_calls": [...]} + - AIMessage (text + tool_calls) → both content and tool_calls present + - AIMessage (list content / multimodal) → content preserved as list + - SystemMessage → {"role": "system", "content": "..."} + - ToolMessage → {"role": "tool", "tool_call_id": "...", "content": "..."} + """ + msg_type = getattr(message, "type", "") + role = _ROLE_MAP.get(msg_type, msg_type) + content = getattr(message, "content", "") + + if role == "tool": + return { + "role": "tool", + "tool_call_id": getattr(message, "tool_call_id", ""), + "content": content, + } + + if role == "assistant": + tool_calls = getattr(message, "tool_calls", None) or [] + result: dict = {"role": "assistant"} + + if tool_calls: + openai_tool_calls = [] + for tc in tool_calls: + args = tc.get("args", {}) + openai_tool_calls.append({ + "id": tc.get("id", ""), + "type": "function", + "function": { + "name": tc.get("name", ""), + "arguments": json.dumps(args) if not isinstance(args, str) else args, + }, + }) + # If no text content, set content to null per OpenAI spec + result["content"] = content if (isinstance(content, list) or content) else None + result["tool_calls"] = openai_tool_calls + else: + result["content"] = content + + return result + + # user / system / unknown + return {"role": role, "content": content} + + +def _infer_finish_reason(message: Any) -> str: + """Infer OpenAI finish_reason from an AIMessage. + + Returns "tool_calls" if tool_calls present, else looks in + response_metadata.finish_reason, else returns "stop". + """ + tool_calls = getattr(message, "tool_calls", None) or [] + if tool_calls: + return "tool_calls" + resp_meta = getattr(message, "response_metadata", None) or {} + if isinstance(resp_meta, dict): + finish = resp_meta.get("finish_reason") + if finish: + return finish + return "stop" + + +def langchain_to_openai_completion(message: Any) -> dict: + """Convert an AIMessage and its metadata to an OpenAI completion response dict. + + Returns: + { + "id": message.id, + "model": message.response_metadata.get("model_name"), + "choices": [{"index": 0, "message": , "finish_reason": }], + "usage": {"prompt_tokens": ..., "completion_tokens": ..., "total_tokens": ...} or None, + } + """ + resp_meta = getattr(message, "response_metadata", None) or {} + model_name = resp_meta.get("model_name") if isinstance(resp_meta, dict) else None + + openai_msg = langchain_to_openai_message(message) + finish_reason = _infer_finish_reason(message) + + usage_metadata = getattr(message, "usage_metadata", None) + if usage_metadata is not None: + input_tokens = usage_metadata.get("input_tokens", 0) or 0 + output_tokens = usage_metadata.get("output_tokens", 0) or 0 + usage: dict | None = { + "prompt_tokens": input_tokens, + "completion_tokens": output_tokens, + "total_tokens": input_tokens + output_tokens, + } + else: + usage = None + + return { + "id": getattr(message, "id", None), + "model": model_name, + "choices": [ + { + "index": 0, + "message": openai_msg, + "finish_reason": finish_reason, + } + ], + "usage": usage, + } + + +def langchain_messages_to_openai(messages: list) -> list[dict]: + """Convert a list of LangChain BaseMessages to OpenAI message dicts.""" + return [langchain_to_openai_message(m) for m in messages] diff --git a/backend/tests/test_converters.py b/backend/tests/test_converters.py new file mode 100644 index 000000000..0f049611e --- /dev/null +++ b/backend/tests/test_converters.py @@ -0,0 +1,195 @@ +"""Tests for LangChain-to-OpenAI message format converters.""" + +from __future__ import annotations + +import json +from unittest.mock import MagicMock + +import pytest + +from deerflow.runtime.converters import ( + _infer_finish_reason, + langchain_messages_to_openai, + langchain_to_openai_completion, + langchain_to_openai_message, +) + + +def _make_ai_message(content="", tool_calls=None, id="msg-123", usage_metadata=None, response_metadata=None): + msg = MagicMock() + msg.type = "ai" + msg.content = content + msg.tool_calls = tool_calls or [] + msg.id = id + msg.usage_metadata = usage_metadata + msg.response_metadata = response_metadata or {} + return msg + + +def _make_human_message(content="Hello"): + msg = MagicMock() + msg.type = "human" + msg.content = content + return msg + + +def _make_system_message(content="You are an assistant."): + msg = MagicMock() + msg.type = "system" + msg.content = content + return msg + + +def _make_tool_message(content="result", tool_call_id="call-abc"): + msg = MagicMock() + msg.type = "tool" + msg.content = content + msg.tool_call_id = tool_call_id + return msg + + +class TestLangchainToOpenaiMessage: + def test_ai_message_text_only(self): + msg = _make_ai_message(content="Hello world") + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == "Hello world" + assert "tool_calls" not in result + + def test_ai_message_with_tool_calls(self): + tool_calls = [ + {"id": "call-1", "name": "bash", "args": {"command": "ls"}}, + ] + msg = _make_ai_message(content="", tool_calls=tool_calls) + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] is None + assert len(result["tool_calls"]) == 1 + tc = result["tool_calls"][0] + assert tc["id"] == "call-1" + assert tc["type"] == "function" + assert tc["function"]["name"] == "bash" + # arguments must be a JSON string + args = json.loads(tc["function"]["arguments"]) + assert args == {"command": "ls"} + + def test_ai_message_text_and_tool_calls(self): + tool_calls = [ + {"id": "call-2", "name": "read_file", "args": {"path": "/tmp/x"}}, + ] + msg = _make_ai_message(content="Reading the file", tool_calls=tool_calls) + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == "Reading the file" + assert len(result["tool_calls"]) == 1 + + def test_ai_message_empty_content_no_tools(self): + msg = _make_ai_message(content="") + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == "" + assert "tool_calls" not in result + + def test_ai_message_list_content(self): + # Multimodal content is preserved as-is + list_content = [ + {"type": "text", "text": "Here is an image"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}}, + ] + msg = _make_ai_message(content=list_content) + result = langchain_to_openai_message(msg) + assert result["role"] == "assistant" + assert result["content"] == list_content + + def test_human_message(self): + msg = _make_human_message("Tell me a joke") + result = langchain_to_openai_message(msg) + assert result["role"] == "user" + assert result["content"] == "Tell me a joke" + + def test_tool_message(self): + msg = _make_tool_message(content="file contents here", tool_call_id="call-xyz") + result = langchain_to_openai_message(msg) + assert result["role"] == "tool" + assert result["tool_call_id"] == "call-xyz" + assert result["content"] == "file contents here" + + def test_system_message(self): + msg = _make_system_message("You are a helpful assistant.") + result = langchain_to_openai_message(msg) + assert result["role"] == "system" + assert result["content"] == "You are a helpful assistant." + + +class TestLangchainToOpenaiCompletion: + def test_basic_completion(self): + usage = MagicMock() + usage.__getitem__ = lambda self, k: {"input_tokens": 10, "output_tokens": 20}[k] + usage.get = lambda k, d=None: {"input_tokens": 10, "output_tokens": 20}.get(k, d) + # Use a real dict for usage_metadata + usage_metadata = {"input_tokens": 10, "output_tokens": 20} + msg = _make_ai_message( + content="Hello", + id="msg-abc", + usage_metadata=usage_metadata, + response_metadata={"model_name": "gpt-4o", "finish_reason": "stop"}, + ) + result = langchain_to_openai_completion(msg) + assert result["id"] == "msg-abc" + assert result["model"] == "gpt-4o" + assert len(result["choices"]) == 1 + choice = result["choices"][0] + assert choice["index"] == 0 + assert choice["finish_reason"] == "stop" + assert choice["message"]["role"] == "assistant" + assert choice["message"]["content"] == "Hello" + assert result["usage"] is not None + assert result["usage"]["prompt_tokens"] == 10 + assert result["usage"]["completion_tokens"] == 20 + assert result["usage"]["total_tokens"] == 30 + + def test_completion_with_tool_calls(self): + tool_calls = [{"id": "call-1", "name": "bash", "args": {}}] + msg = _make_ai_message( + content="", + tool_calls=tool_calls, + id="msg-tc", + response_metadata={"model_name": "gpt-4o"}, + ) + result = langchain_to_openai_completion(msg) + assert result["choices"][0]["finish_reason"] == "tool_calls" + + def test_completion_no_usage(self): + msg = _make_ai_message(content="Hi", id="msg-nousage", usage_metadata=None) + result = langchain_to_openai_completion(msg) + assert result["usage"] is None + + def test_finish_reason_from_response_metadata(self): + msg = _make_ai_message( + content="Done", + id="msg-fr", + response_metadata={"model_name": "claude-3", "finish_reason": "end_turn"}, + ) + result = langchain_to_openai_completion(msg) + assert result["choices"][0]["finish_reason"] == "end_turn" + + def test_finish_reason_default_stop(self): + msg = _make_ai_message(content="Done", id="msg-defstop", response_metadata={}) + result = langchain_to_openai_completion(msg) + assert result["choices"][0]["finish_reason"] == "stop" + + +class TestMessagesToOpenai: + def test_convert_message_list(self): + human = _make_human_message("Hi") + ai = _make_ai_message(content="Hello!") + tool_msg = _make_tool_message("result", "call-1") + messages = [human, ai, tool_msg] + result = langchain_messages_to_openai(messages) + assert len(result) == 3 + assert result[0]["role"] == "user" + assert result[1]["role"] == "assistant" + assert result[2]["role"] == "tool" + + def test_empty_list(self): + assert langchain_messages_to_openai([]) == []