mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-26 03:38:06 +00:00
feat(converters): add LangChain-to-OpenAI message format converters
Pure functions langchain_to_openai_message, langchain_to_openai_completion, langchain_messages_to_openai, and _infer_finish_reason for converting LangChain BaseMessage objects to OpenAI Chat Completions format, used by RunJournal for event storage. 15 unit tests added. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
74dc663c23
commit
bfbb3e1b8d
132
backend/packages/harness/deerflow/runtime/converters.py
Normal file
132
backend/packages/harness/deerflow/runtime/converters.py
Normal file
@ -0,0 +1,132 @@
|
||||
"""Pure functions to convert LangChain message objects to OpenAI Chat Completions format.
|
||||
|
||||
Used by RunJournal to build content dicts for event storage.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from typing import Any
|
||||
|
||||
_ROLE_MAP = {
|
||||
"human": "user",
|
||||
"ai": "assistant",
|
||||
"system": "system",
|
||||
"tool": "tool",
|
||||
}
|
||||
|
||||
|
||||
def langchain_to_openai_message(message: Any) -> dict:
|
||||
"""Convert a single LangChain BaseMessage to an OpenAI message dict.
|
||||
|
||||
Handles:
|
||||
- HumanMessage → {"role": "user", "content": "..."}
|
||||
- AIMessage (text only) → {"role": "assistant", "content": "..."}
|
||||
- AIMessage (with tool_calls) → {"role": "assistant", "content": null, "tool_calls": [...]}
|
||||
- AIMessage (text + tool_calls) → both content and tool_calls present
|
||||
- AIMessage (list content / multimodal) → content preserved as list
|
||||
- SystemMessage → {"role": "system", "content": "..."}
|
||||
- ToolMessage → {"role": "tool", "tool_call_id": "...", "content": "..."}
|
||||
"""
|
||||
msg_type = getattr(message, "type", "")
|
||||
role = _ROLE_MAP.get(msg_type, msg_type)
|
||||
content = getattr(message, "content", "")
|
||||
|
||||
if role == "tool":
|
||||
return {
|
||||
"role": "tool",
|
||||
"tool_call_id": getattr(message, "tool_call_id", ""),
|
||||
"content": content,
|
||||
}
|
||||
|
||||
if role == "assistant":
|
||||
tool_calls = getattr(message, "tool_calls", None) or []
|
||||
result: dict = {"role": "assistant"}
|
||||
|
||||
if tool_calls:
|
||||
openai_tool_calls = []
|
||||
for tc in tool_calls:
|
||||
args = tc.get("args", {})
|
||||
openai_tool_calls.append({
|
||||
"id": tc.get("id", ""),
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tc.get("name", ""),
|
||||
"arguments": json.dumps(args) if not isinstance(args, str) else args,
|
||||
},
|
||||
})
|
||||
# If no text content, set content to null per OpenAI spec
|
||||
result["content"] = content if (isinstance(content, list) or content) else None
|
||||
result["tool_calls"] = openai_tool_calls
|
||||
else:
|
||||
result["content"] = content
|
||||
|
||||
return result
|
||||
|
||||
# user / system / unknown
|
||||
return {"role": role, "content": content}
|
||||
|
||||
|
||||
def _infer_finish_reason(message: Any) -> str:
|
||||
"""Infer OpenAI finish_reason from an AIMessage.
|
||||
|
||||
Returns "tool_calls" if tool_calls present, else looks in
|
||||
response_metadata.finish_reason, else returns "stop".
|
||||
"""
|
||||
tool_calls = getattr(message, "tool_calls", None) or []
|
||||
if tool_calls:
|
||||
return "tool_calls"
|
||||
resp_meta = getattr(message, "response_metadata", None) or {}
|
||||
if isinstance(resp_meta, dict):
|
||||
finish = resp_meta.get("finish_reason")
|
||||
if finish:
|
||||
return finish
|
||||
return "stop"
|
||||
|
||||
|
||||
def langchain_to_openai_completion(message: Any) -> dict:
|
||||
"""Convert an AIMessage and its metadata to an OpenAI completion response dict.
|
||||
|
||||
Returns:
|
||||
{
|
||||
"id": message.id,
|
||||
"model": message.response_metadata.get("model_name"),
|
||||
"choices": [{"index": 0, "message": <openai_message>, "finish_reason": <inferred>}],
|
||||
"usage": {"prompt_tokens": ..., "completion_tokens": ..., "total_tokens": ...} or None,
|
||||
}
|
||||
"""
|
||||
resp_meta = getattr(message, "response_metadata", None) or {}
|
||||
model_name = resp_meta.get("model_name") if isinstance(resp_meta, dict) else None
|
||||
|
||||
openai_msg = langchain_to_openai_message(message)
|
||||
finish_reason = _infer_finish_reason(message)
|
||||
|
||||
usage_metadata = getattr(message, "usage_metadata", None)
|
||||
if usage_metadata is not None:
|
||||
input_tokens = usage_metadata.get("input_tokens", 0) or 0
|
||||
output_tokens = usage_metadata.get("output_tokens", 0) or 0
|
||||
usage: dict | None = {
|
||||
"prompt_tokens": input_tokens,
|
||||
"completion_tokens": output_tokens,
|
||||
"total_tokens": input_tokens + output_tokens,
|
||||
}
|
||||
else:
|
||||
usage = None
|
||||
|
||||
return {
|
||||
"id": getattr(message, "id", None),
|
||||
"model": model_name,
|
||||
"choices": [
|
||||
{
|
||||
"index": 0,
|
||||
"message": openai_msg,
|
||||
"finish_reason": finish_reason,
|
||||
}
|
||||
],
|
||||
"usage": usage,
|
||||
}
|
||||
|
||||
|
||||
def langchain_messages_to_openai(messages: list) -> list[dict]:
|
||||
"""Convert a list of LangChain BaseMessages to OpenAI message dicts."""
|
||||
return [langchain_to_openai_message(m) for m in messages]
|
||||
195
backend/tests/test_converters.py
Normal file
195
backend/tests/test_converters.py
Normal file
@ -0,0 +1,195 @@
|
||||
"""Tests for LangChain-to-OpenAI message format converters."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from deerflow.runtime.converters import (
|
||||
_infer_finish_reason,
|
||||
langchain_messages_to_openai,
|
||||
langchain_to_openai_completion,
|
||||
langchain_to_openai_message,
|
||||
)
|
||||
|
||||
|
||||
def _make_ai_message(content="", tool_calls=None, id="msg-123", usage_metadata=None, response_metadata=None):
|
||||
msg = MagicMock()
|
||||
msg.type = "ai"
|
||||
msg.content = content
|
||||
msg.tool_calls = tool_calls or []
|
||||
msg.id = id
|
||||
msg.usage_metadata = usage_metadata
|
||||
msg.response_metadata = response_metadata or {}
|
||||
return msg
|
||||
|
||||
|
||||
def _make_human_message(content="Hello"):
|
||||
msg = MagicMock()
|
||||
msg.type = "human"
|
||||
msg.content = content
|
||||
return msg
|
||||
|
||||
|
||||
def _make_system_message(content="You are an assistant."):
|
||||
msg = MagicMock()
|
||||
msg.type = "system"
|
||||
msg.content = content
|
||||
return msg
|
||||
|
||||
|
||||
def _make_tool_message(content="result", tool_call_id="call-abc"):
|
||||
msg = MagicMock()
|
||||
msg.type = "tool"
|
||||
msg.content = content
|
||||
msg.tool_call_id = tool_call_id
|
||||
return msg
|
||||
|
||||
|
||||
class TestLangchainToOpenaiMessage:
|
||||
def test_ai_message_text_only(self):
|
||||
msg = _make_ai_message(content="Hello world")
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "assistant"
|
||||
assert result["content"] == "Hello world"
|
||||
assert "tool_calls" not in result
|
||||
|
||||
def test_ai_message_with_tool_calls(self):
|
||||
tool_calls = [
|
||||
{"id": "call-1", "name": "bash", "args": {"command": "ls"}},
|
||||
]
|
||||
msg = _make_ai_message(content="", tool_calls=tool_calls)
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "assistant"
|
||||
assert result["content"] is None
|
||||
assert len(result["tool_calls"]) == 1
|
||||
tc = result["tool_calls"][0]
|
||||
assert tc["id"] == "call-1"
|
||||
assert tc["type"] == "function"
|
||||
assert tc["function"]["name"] == "bash"
|
||||
# arguments must be a JSON string
|
||||
args = json.loads(tc["function"]["arguments"])
|
||||
assert args == {"command": "ls"}
|
||||
|
||||
def test_ai_message_text_and_tool_calls(self):
|
||||
tool_calls = [
|
||||
{"id": "call-2", "name": "read_file", "args": {"path": "/tmp/x"}},
|
||||
]
|
||||
msg = _make_ai_message(content="Reading the file", tool_calls=tool_calls)
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "assistant"
|
||||
assert result["content"] == "Reading the file"
|
||||
assert len(result["tool_calls"]) == 1
|
||||
|
||||
def test_ai_message_empty_content_no_tools(self):
|
||||
msg = _make_ai_message(content="")
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "assistant"
|
||||
assert result["content"] == ""
|
||||
assert "tool_calls" not in result
|
||||
|
||||
def test_ai_message_list_content(self):
|
||||
# Multimodal content is preserved as-is
|
||||
list_content = [
|
||||
{"type": "text", "text": "Here is an image"},
|
||||
{"type": "image_url", "image_url": {"url": "data:image/png;base64,abc"}},
|
||||
]
|
||||
msg = _make_ai_message(content=list_content)
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "assistant"
|
||||
assert result["content"] == list_content
|
||||
|
||||
def test_human_message(self):
|
||||
msg = _make_human_message("Tell me a joke")
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "user"
|
||||
assert result["content"] == "Tell me a joke"
|
||||
|
||||
def test_tool_message(self):
|
||||
msg = _make_tool_message(content="file contents here", tool_call_id="call-xyz")
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "tool"
|
||||
assert result["tool_call_id"] == "call-xyz"
|
||||
assert result["content"] == "file contents here"
|
||||
|
||||
def test_system_message(self):
|
||||
msg = _make_system_message("You are a helpful assistant.")
|
||||
result = langchain_to_openai_message(msg)
|
||||
assert result["role"] == "system"
|
||||
assert result["content"] == "You are a helpful assistant."
|
||||
|
||||
|
||||
class TestLangchainToOpenaiCompletion:
|
||||
def test_basic_completion(self):
|
||||
usage = MagicMock()
|
||||
usage.__getitem__ = lambda self, k: {"input_tokens": 10, "output_tokens": 20}[k]
|
||||
usage.get = lambda k, d=None: {"input_tokens": 10, "output_tokens": 20}.get(k, d)
|
||||
# Use a real dict for usage_metadata
|
||||
usage_metadata = {"input_tokens": 10, "output_tokens": 20}
|
||||
msg = _make_ai_message(
|
||||
content="Hello",
|
||||
id="msg-abc",
|
||||
usage_metadata=usage_metadata,
|
||||
response_metadata={"model_name": "gpt-4o", "finish_reason": "stop"},
|
||||
)
|
||||
result = langchain_to_openai_completion(msg)
|
||||
assert result["id"] == "msg-abc"
|
||||
assert result["model"] == "gpt-4o"
|
||||
assert len(result["choices"]) == 1
|
||||
choice = result["choices"][0]
|
||||
assert choice["index"] == 0
|
||||
assert choice["finish_reason"] == "stop"
|
||||
assert choice["message"]["role"] == "assistant"
|
||||
assert choice["message"]["content"] == "Hello"
|
||||
assert result["usage"] is not None
|
||||
assert result["usage"]["prompt_tokens"] == 10
|
||||
assert result["usage"]["completion_tokens"] == 20
|
||||
assert result["usage"]["total_tokens"] == 30
|
||||
|
||||
def test_completion_with_tool_calls(self):
|
||||
tool_calls = [{"id": "call-1", "name": "bash", "args": {}}]
|
||||
msg = _make_ai_message(
|
||||
content="",
|
||||
tool_calls=tool_calls,
|
||||
id="msg-tc",
|
||||
response_metadata={"model_name": "gpt-4o"},
|
||||
)
|
||||
result = langchain_to_openai_completion(msg)
|
||||
assert result["choices"][0]["finish_reason"] == "tool_calls"
|
||||
|
||||
def test_completion_no_usage(self):
|
||||
msg = _make_ai_message(content="Hi", id="msg-nousage", usage_metadata=None)
|
||||
result = langchain_to_openai_completion(msg)
|
||||
assert result["usage"] is None
|
||||
|
||||
def test_finish_reason_from_response_metadata(self):
|
||||
msg = _make_ai_message(
|
||||
content="Done",
|
||||
id="msg-fr",
|
||||
response_metadata={"model_name": "claude-3", "finish_reason": "end_turn"},
|
||||
)
|
||||
result = langchain_to_openai_completion(msg)
|
||||
assert result["choices"][0]["finish_reason"] == "end_turn"
|
||||
|
||||
def test_finish_reason_default_stop(self):
|
||||
msg = _make_ai_message(content="Done", id="msg-defstop", response_metadata={})
|
||||
result = langchain_to_openai_completion(msg)
|
||||
assert result["choices"][0]["finish_reason"] == "stop"
|
||||
|
||||
|
||||
class TestMessagesToOpenai:
|
||||
def test_convert_message_list(self):
|
||||
human = _make_human_message("Hi")
|
||||
ai = _make_ai_message(content="Hello!")
|
||||
tool_msg = _make_tool_message("result", "call-1")
|
||||
messages = [human, ai, tool_msg]
|
||||
result = langchain_messages_to_openai(messages)
|
||||
assert len(result) == 3
|
||||
assert result[0]["role"] == "user"
|
||||
assert result[1]["role"] == "assistant"
|
||||
assert result[2]["role"] == "tool"
|
||||
|
||||
def test_empty_list(self):
|
||||
assert langchain_messages_to_openai([]) == []
|
||||
Loading…
x
Reference in New Issue
Block a user