fix: use SystemMessage+HumanMessage for follow-up question generation (#1751)

* fix: use SystemMessage+HumanMessage for follow-up question generation (fixes #1697)

Some models (e.g. MiniMax-M2.7) require the system prompt and user
content to be passed as separate message objects rather than a single
combined string. Invoking with a plain string sends everything as a
HumanMessage, which causes these models to ignore the generation
instructions and fail to produce valid follow-up questions.

* test: verify model is invoked with SystemMessage and HumanMessage
This commit is contained in:
Octopus 2026-04-03 07:09:01 -05:00 committed by GitHub
parent 3d4f9a88fe
commit 83039fa22c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 30 additions and 5 deletions

View File

@ -2,6 +2,7 @@ import json
import logging
from fastapi import APIRouter
from langchain_core.messages import HumanMessage, SystemMessage
from pydantic import BaseModel, Field
from deerflow.models import create_chat_model
@ -106,7 +107,7 @@ async def generate_suggestions(thread_id: str, request: SuggestionsRequest) -> S
if not conversation:
return SuggestionsResponse(suggestions=[])
prompt = (
system_instruction = (
"You are generating follow-up questions to help the user continue the conversation.\n"
f"Based on the conversation below, produce EXACTLY {n} short questions the user might ask next.\n"
"Requirements:\n"
@ -114,14 +115,13 @@ async def generate_suggestions(thread_id: str, request: SuggestionsRequest) -> S
"- Questions must be written in the same language as the user.\n"
"- Keep each question concise (ideally <= 20 words / <= 40 Chinese characters).\n"
"- Do NOT include numbering, markdown, or any extra text.\n"
"- Output MUST be a JSON array of strings only.\n\n"
"Conversation:\n"
f"{conversation}\n"
"- Output MUST be a JSON array of strings only.\n"
)
user_content = f"Conversation Context:\n{conversation}\n\nGenerate {n} follow-up questions"
try:
model = create_chat_model(name=request.model_name, thinking_enabled=False)
response = model.invoke(prompt)
response = model.invoke([SystemMessage(content=system_instruction), HumanMessage(content=user_content)])
raw = _extract_response_text(response.content)
suggestions = _parse_json_string_list(raw) or []
cleaned = [s.replace("\n", " ").strip() for s in suggestions if s.strip()]

View File

@ -1,6 +1,8 @@
import asyncio
from unittest.mock import MagicMock
from langchain_core.messages import HumanMessage, SystemMessage
from app.gateway.routers import suggestions
@ -100,3 +102,26 @@ def test_generate_suggestions_returns_empty_on_model_error(monkeypatch):
result = asyncio.run(suggestions.generate_suggestions("t1", req))
assert result.suggestions == []
def test_generate_suggestions_invokes_model_with_system_and_human_messages(monkeypatch):
req = suggestions.SuggestionsRequest(
messages=[
suggestions.SuggestionMessage(role="user", content="What is Python?"),
suggestions.SuggestionMessage(role="assistant", content="Python is a programming language."),
],
n=2,
model_name=None,
)
fake_model = MagicMock()
fake_model.invoke.return_value = MagicMock(content='["Q1", "Q2"]')
monkeypatch.setattr(suggestions, "create_chat_model", lambda **kwargs: fake_model)
asyncio.run(suggestions.generate_suggestions("t1", req))
call_args = fake_model.invoke.call_args[0][0]
assert len(call_args) == 2
assert isinstance(call_args[0], SystemMessage)
assert isinstance(call_args[1], HumanMessage)
assert "follow-up questions" in call_args[0].content
assert "What is Python?" in call_args[1].content