NarratoAI/app/services/SDP/utils/step1_subtitle_analyzer_openai.py
linyq dbbb06eda8 feat(prompts): 引入新的提示词管理系统以优化解说文案生成
更新generate_narration_script.py、short_drama_explanation.py和step1_subtitle_analyzer_openai.py文件,集成新的提示词管理系统,提升解说文案和短剧分析的生成效率与准确性。通过使用PromptManager简化提示词构建过程,增强系统的灵活性和可维护性。
2025-07-07 17:13:54 +08:00

147 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
使用OpenAI API分析字幕文件返回剧情梗概和爆点
"""
import traceback
from openai import OpenAI, BadRequestError
import os
import json
from .utils import load_srt
# 导入新的提示词管理系统
from app.services.prompts import PromptManager
def analyze_subtitle(
srt_path: str,
model_name: str,
api_key: str = None,
base_url: str = None,
custom_clips: int = 5
) -> dict:
"""分析字幕内容,返回完整的分析结果
Args:
srt_path (str): SRT字幕文件路径
api_key (str, optional): 大模型API密钥. Defaults to None.
model_name (str, optional): 大模型名称. Defaults to "gpt-4o-2024-11-20".
base_url (str, optional): 大模型API基础URL. Defaults to None.
Returns:
dict: 包含剧情梗概和结构化的时间段分析的字典
"""
try:
# 加载字幕文件
subtitles = load_srt(srt_path)
subtitle_content = "\n".join([f"{sub['timestamp']}\n{sub['text']}" for sub in subtitles])
# 初始化客户端
global client
if "deepseek" in model_name.lower():
client = OpenAI(
api_key=api_key or os.getenv('DeepSeek_API_KEY'),
base_url="https://api.siliconflow.cn/v1" # 使用第三方 硅基流动 API
)
else:
client = OpenAI(
api_key=api_key or os.getenv('OPENAI_API_KEY'),
base_url=base_url
)
# 使用新的提示词管理系统
subtitle_analysis_prompt = PromptManager.get_prompt(
category="short_drama_editing",
name="subtitle_analysis",
parameters={
"subtitle_content": subtitle_content,
"custom_clips": custom_clips
}
)
messages = [
{
"role": "system",
"content": "你是一名短剧编剧和内容分析师,擅长从字幕中提取剧情要点和关键情节。"
},
{
"role": "user",
"content": subtitle_analysis_prompt
}
]
# DeepSeek R1 和 V3 不支持 response_format=json_object
try:
completion = client.chat.completions.create(
model=model_name,
messages=messages,
response_format={"type": "json_object"}
)
summary_data = json.loads(completion.choices[0].message.content)
except BadRequestError as e:
completion = client.chat.completions.create(
model=model_name,
messages=messages
)
# 去除 completion 字符串前的 ```json 和 结尾的 ```
completion = completion.choices[0].message.content.replace("```json", "").replace("```", "")
summary_data = json.loads(completion)
except Exception as e:
raise Exception(f"大模型解析发生错误:{str(e)}\n{traceback.format_exc()}")
print(json.dumps(summary_data, indent=4, ensure_ascii=False))
# 构建爆点标题列表
plot_titles_text = ""
print(f"找到 {len(summary_data['plot_titles'])} 个片段")
for i, point in enumerate(summary_data['plot_titles'], 1):
plot_titles_text += f"{i}. {point}\n"
# 使用新的提示词管理系统
plot_extraction_prompt = PromptManager.get_prompt(
category="short_drama_editing",
name="plot_extraction",
parameters={
"subtitle_content": subtitle_content,
"plot_summary": summary_data['summary'],
"plot_titles": plot_titles_text
}
)
messages = [
{
"role": "system",
"content": "你是一名短剧编剧,非常擅长根据字幕中分析视频中关键剧情出现的具体时间段。"
},
{
"role": "user",
"content": plot_extraction_prompt
}
]
# DeepSeek R1 和 V3 不支持 response_format=json_object
try:
completion = client.chat.completions.create(
model=model_name,
messages=messages,
response_format={"type": "json_object"}
)
plot_points_data = json.loads(completion.choices[0].message.content)
except BadRequestError as e:
completion = client.chat.completions.create(
model=model_name,
messages=messages
)
# 去除 completion 字符串前的 ```json 和 结尾的 ```
completion = completion.choices[0].message.content.replace("```json", "").replace("```", "")
plot_points_data = json.loads(completion)
except Exception as e:
raise Exception(f"大模型解析错误:{str(e)}\n{traceback.format_exc()}")
print(json.dumps(plot_points_data, indent=4, ensure_ascii=False))
# 合并结果
return {
"plot_summary": summary_data,
"plot_points": plot_points_data["plot_points"]
}
except Exception as e:
raise Exception(f"分析字幕时发生错误:{str(e)}\n{traceback.format_exc()}")