NarratoAI/app/services/SDP/utils/step1_subtitle_analyzer_openai.py

163 lines
5.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
使用统一LLM服务分析字幕文件返回剧情梗概和爆点
"""
import traceback
import json
from loguru import logger
from .utils import load_srt, load_srt_from_content
# 导入新的提示词管理系统
from app.services.prompts import PromptManager
# 导入统一LLM服务
from app.services.llm.unified_service import UnifiedLLMService
# 导入安全的异步执行函数
from app.services.llm.migration_adapter import _run_async_safely
def analyze_subtitle(
model_name: str,
api_key: str = None,
base_url: str = None,
custom_clips: int = 5,
provider: str = None,
srt_path: str = None,
subtitle_content: str = None
) -> dict:
"""分析字幕内容,返回完整的分析结果
Args:
model_name (str): 大模型名称
api_key (str, optional): 大模型API密钥. Defaults to None.
base_url (str, optional): 大模型API基础URL. Defaults to None.
custom_clips (int): 需要提取的片段数量. Defaults to 5.
provider (str, optional): LLM服务提供商. Defaults to None.
srt_path (str, optional): SRT字幕文件路径与subtitle_content二选一
subtitle_content (str, optional): SRT字幕文本内容与srt_path二选一
Returns:
dict: 包含剧情梗概和结构化的时间段分析的字典
"""
try:
# 加载字幕文件或内容
if subtitle_content and subtitle_content.strip():
subtitles = load_srt_from_content(subtitle_content)
source_label = "字幕内容(直接传入)"
elif srt_path:
subtitles = load_srt(srt_path)
source_label = f"字幕文件: {srt_path}"
else:
raise ValueError("必须提供 srt_path 或 subtitle_content 参数")
# 检查字幕是否为空
if not subtitles:
error_msg = (
f"字幕来源 [{source_label}] 解析后无有效内容。\n"
f"请检查:\n"
f"1. 文件格式是否为标准 SRT\n"
f"2. 文件编码是否为 UTF-8、GBK 或 GB2312\n"
f"3. 文件内容是否为空"
)
logger.error(error_msg)
raise ValueError(error_msg)
logger.info(f"成功加载字幕来源 [{source_label}],共 {len(subtitles)} 条有效字幕")
subtitle_content = "\n".join([f"{sub['timestamp']}\n{sub['text']}" for sub in subtitles])
# 如果没有指定provider根据model_name推断
if not provider:
if "deepseek" in model_name.lower():
provider = "deepseek"
elif "gpt" in model_name.lower():
provider = "openai"
elif "gemini" in model_name.lower():
provider = "gemini"
else:
provider = "openai" # 默认使用openai
logger.info(f"使用LLM服务分析字幕提供商: {provider}, 模型: {model_name}")
# 使用新的提示词管理系统
subtitle_analysis_prompt = PromptManager.get_prompt(
category="short_drama_editing",
name="subtitle_analysis",
parameters={
"subtitle_content": subtitle_content,
"custom_clips": custom_clips
}
)
# 使用统一LLM服务生成文本
logger.info("开始分析字幕内容...")
response = _run_async_safely(
UnifiedLLMService.generate_text,
prompt=subtitle_analysis_prompt,
provider=provider,
model=model_name,
api_key=api_key,
base_url=base_url,
temperature=0.1, # 使用较低的温度以获得更稳定的结果
max_tokens=4000
)
# 解析JSON响应
from webui.tools.generate_short_summary import parse_and_fix_json
summary_data = parse_and_fix_json(response)
if not summary_data:
raise Exception("无法解析LLM返回的JSON数据")
logger.info(f"字幕分析完成,找到 {len(summary_data.get('plot_titles', []))} 个关键情节")
print(json.dumps(summary_data, indent=4, ensure_ascii=False))
# 构建爆点标题列表
plot_titles_text = ""
print(f"找到 {len(summary_data['plot_titles'])} 个片段")
for i, point in enumerate(summary_data['plot_titles'], 1):
plot_titles_text += f"{i}. {point}\n"
# 使用新的提示词管理系统
plot_extraction_prompt = PromptManager.get_prompt(
category="short_drama_editing",
name="plot_extraction",
parameters={
"subtitle_content": subtitle_content,
"plot_summary": summary_data['summary'],
"plot_titles": plot_titles_text
}
)
# 使用统一LLM服务进行爆点时间段分析
logger.info("开始分析爆点时间段...")
response = _run_async_safely(
UnifiedLLMService.generate_text,
prompt=plot_extraction_prompt,
provider=provider,
model=model_name,
api_key=api_key,
base_url=base_url,
temperature=0.1,
max_tokens=4000
)
# 解析JSON响应
plot_data = parse_and_fix_json(response)
if not plot_data:
raise Exception("无法解析爆点分析的JSON数据")
logger.info(f"爆点分析完成,找到 {len(plot_data.get('plot_points', []))} 个时间段")
# 合并结果
result = {
"summary": summary_data.get("summary", ""),
"plot_titles": summary_data.get("plot_titles", []),
"plot_points": plot_data.get("plot_points", [])
}
return result
except Exception as e:
logger.error(f"分析字幕时发生错误: {str(e)}")
raise Exception(f"分析字幕时发生错误:{str(e)}\n{traceback.format_exc()}")