mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-06-16 20:32:06 +00:00
feat(tts,search,video): 新增OmniVoice TTS、联网搜索与多视频剪辑支持
新增OmniVoice语音合成引擎全流程支持,包含配置项、WebUI界面与服务实现 集成Tavily联网搜索能力,支持短剧剧情分析前自动检索剧情背景信息 新增多视频源剪辑支持,完善脚本校验规则并重构剪辑逻辑适配多视频路径 重构LLM剧情分析Prompt,优化输出格式适配多场景与联网检索结果 调整streamlit版本至1.56.0修复兼容性问题 新增相关单元测试与多语言翻译,更新配置示例文件
This commit is contained in:
parent
d147fe66e4
commit
342fc15f3b
@ -13,8 +13,11 @@ INDEXTTS_ENGINE = "indextts"
|
||||
INDEXTTS_DISPLAY_NAME = "IndexTTS-1.5"
|
||||
INDEXTTS2_ENGINE = "indextts2"
|
||||
INDEXTTS2_DISPLAY_NAME = "IndexTTS-2"
|
||||
OMNIVOICE_ENGINE = "omnivoice"
|
||||
OMNIVOICE_DISPLAY_NAME = "OmniVoice"
|
||||
INDEXTTS_VOICE_PREFIX = f"{INDEXTTS_ENGINE}:"
|
||||
INDEXTTS2_VOICE_PREFIX = f"{INDEXTTS2_ENGINE}:"
|
||||
OMNIVOICE_VOICE_PREFIX = f"{OMNIVOICE_ENGINE}:"
|
||||
|
||||
|
||||
def normalize_tts_engine_name(tts_engine: str) -> str:
|
||||
@ -131,6 +134,7 @@ def save_config():
|
||||
_cfg["fun_asr"] = fun_asr
|
||||
_cfg["indextts"] = indextts
|
||||
_cfg["indextts2"] = indextts2
|
||||
_cfg["omnivoice"] = omnivoice
|
||||
_cfg["doubaotts"] = doubaotts
|
||||
f.write(toml.dumps(_cfg))
|
||||
|
||||
@ -148,6 +152,7 @@ tts_qwen = _cfg.get("tts_qwen", {})
|
||||
fun_asr = _cfg.get("fun_asr", {})
|
||||
indextts = _cfg.get("indextts", {})
|
||||
indextts2 = _cfg.get("indextts2", {})
|
||||
omnivoice = _cfg.get("omnivoice", {})
|
||||
doubaotts = _cfg.get("doubaotts", {})
|
||||
|
||||
hostname = socket.gethostname()
|
||||
|
||||
@ -35,6 +35,9 @@ DEFAULT_LLM_APP_CONFIG = {
|
||||
"text_openai_model_name": DEFAULT_TEXT_OPENAI_MODEL_NAME,
|
||||
"text_openai_api_key": "",
|
||||
"text_openai_base_url": DEFAULT_OPENAI_COMPATIBLE_BASE_URL,
|
||||
"tavily_api_key": "",
|
||||
"tavily_search_depth": "basic",
|
||||
"tavily_max_results": 5,
|
||||
}
|
||||
DEFAULT_LLM_APP_CONFIG.update(DEFAULT_LLM_GENERATION_APP_CONFIG)
|
||||
|
||||
|
||||
@ -32,6 +32,82 @@ def parse_timestamp(timestamp: str) -> tuple:
|
||||
return start_time, end_time
|
||||
|
||||
|
||||
def _normalize_video_origin_paths(
|
||||
video_origin_path: str,
|
||||
video_origin_paths: Optional[List[str]] = None,
|
||||
) -> List[str]:
|
||||
paths = []
|
||||
if video_origin_paths:
|
||||
paths.extend(video_origin_paths)
|
||||
if video_origin_path:
|
||||
paths.insert(0, video_origin_path)
|
||||
|
||||
normalized_paths = []
|
||||
seen = set()
|
||||
for item in paths:
|
||||
if not isinstance(item, str):
|
||||
continue
|
||||
item = item.strip()
|
||||
if not item or item in seen:
|
||||
continue
|
||||
normalized_paths.append(item)
|
||||
seen.add(item)
|
||||
return normalized_paths
|
||||
|
||||
|
||||
def _coerce_video_id(value) -> Optional[int]:
|
||||
try:
|
||||
video_id = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return video_id if video_id > 0 else None
|
||||
|
||||
|
||||
def _match_video_id_by_name(video_name: str, video_origin_paths: List[str]) -> Optional[int]:
|
||||
video_name = str(video_name or "").strip()
|
||||
if not video_name:
|
||||
return None
|
||||
|
||||
expected_name = os.path.basename(video_name)
|
||||
for index, video_path in enumerate(video_origin_paths, start=1):
|
||||
if os.path.basename(video_path) == expected_name:
|
||||
return index
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_script_video_path(script_item: Dict, video_origin_paths: List[str]) -> str:
|
||||
explicit_path = (
|
||||
script_item.get("source_video_path")
|
||||
or script_item.get("video_origin_path")
|
||||
or script_item.get("origin_video_path")
|
||||
)
|
||||
if explicit_path and os.path.exists(explicit_path):
|
||||
return explicit_path
|
||||
|
||||
video_id = _coerce_video_id(script_item.get("video_id") or script_item.get("video_index"))
|
||||
matched_video_id = _match_video_id_by_name(
|
||||
script_item.get("video_name") or script_item.get("source_video"),
|
||||
video_origin_paths,
|
||||
)
|
||||
if matched_video_id:
|
||||
video_id = matched_video_id
|
||||
|
||||
if video_id is not None:
|
||||
if video_id <= len(video_origin_paths):
|
||||
return video_origin_paths[video_id - 1]
|
||||
logger.warning(
|
||||
f"片段 {script_item.get('_id')} 的 video_id={video_id} 超出视频数量 "
|
||||
f"{len(video_origin_paths)},默认使用第一个视频"
|
||||
)
|
||||
|
||||
return video_origin_paths[0]
|
||||
|
||||
|
||||
def _safe_output_id(value) -> str:
|
||||
safe_value = str(value if value is not None else "unknown")
|
||||
return "".join(char if char.isalnum() or char in ("-", "_") else "_" for char in safe_value)
|
||||
|
||||
|
||||
def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
|
||||
"""
|
||||
根据开始时间和持续时间计算结束时间
|
||||
@ -579,7 +655,7 @@ def _process_narration_only_segment(
|
||||
# 生成输出文件名
|
||||
safe_start_time = start_time.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"ost0_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_filename = f"ost0_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 构建FFmpeg命令 - 移除音频
|
||||
@ -622,7 +698,7 @@ def _process_original_audio_segment(
|
||||
# 生成输出文件名
|
||||
safe_start_time = start_time.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = end_time.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"ost1_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_filename = f"ost1_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 构建FFmpeg命令 - 保持原声
|
||||
@ -674,7 +750,7 @@ def _process_mixed_segment(
|
||||
# 生成输出文件名
|
||||
safe_start_time = start_time.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"ost2_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_filename = f"ost2_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 构建FFmpeg命令 - 保持原声
|
||||
@ -782,28 +858,34 @@ def clip_video_unified(
|
||||
script_list: List[Dict],
|
||||
tts_results: List[Dict],
|
||||
output_dir: Optional[str] = None,
|
||||
task_id: Optional[str] = None
|
||||
task_id: Optional[str] = None,
|
||||
video_origin_paths: Optional[List[str]] = None
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
基于OST类型的统一视频裁剪策略 - 消除双重裁剪问题
|
||||
|
||||
Args:
|
||||
video_origin_path: 原始视频的路径
|
||||
video_origin_path: 原始视频的路径;旧脚本或无 video_id 片段默认使用该视频
|
||||
script_list: 完整的脚本列表,包含所有片段信息
|
||||
tts_results: TTS结果列表,仅包含OST=0和OST=2的片段
|
||||
output_dir: 输出目录路径,默认为None时会自动生成
|
||||
task_id: 任务ID,用于生成唯一的输出目录,默认为None时会自动生成
|
||||
video_origin_paths: 多个原始视频路径,脚本片段可用 video_id/video_name 指定来源
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: 片段ID到裁剪后视频路径的映射
|
||||
"""
|
||||
# 检查视频文件是否存在
|
||||
if not os.path.exists(video_origin_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
|
||||
video_source_paths = _normalize_video_origin_paths(video_origin_path, video_origin_paths)
|
||||
if not video_source_paths:
|
||||
raise FileNotFoundError("视频文件不存在: 未提供原始视频路径")
|
||||
|
||||
missing_video_paths = [item for item in video_source_paths if not os.path.exists(item)]
|
||||
if missing_video_paths:
|
||||
raise FileNotFoundError(f"视频文件不存在: {', '.join(missing_video_paths)}")
|
||||
|
||||
# 如果未提供task_id,则根据输入生成一个唯一ID
|
||||
if task_id is None:
|
||||
content_for_hash = f"{video_origin_path}_{json.dumps(script_list)}"
|
||||
content_for_hash = f"{json.dumps(video_source_paths, ensure_ascii=False)}_{json.dumps(script_list, ensure_ascii=False)}"
|
||||
task_id = hashlib.md5(content_for_hash.encode()).hexdigest()
|
||||
|
||||
# 设置输出目录
|
||||
@ -840,29 +922,33 @@ def clip_video_unified(
|
||||
failed_clips = []
|
||||
success_count = 0
|
||||
|
||||
logger.info(f"📹 开始统一视频裁剪,总共{total_clips}个片段")
|
||||
logger.info(f"📹 开始统一视频裁剪,总共{total_clips}个片段,源视频{len(video_source_paths)}个")
|
||||
|
||||
for i, script_item in enumerate(script_list, 1):
|
||||
_id = script_item.get("_id")
|
||||
ost = script_item.get("OST", 0)
|
||||
timestamp = script_item["timestamp"]
|
||||
source_video_path = _resolve_script_video_path(script_item, video_source_paths)
|
||||
|
||||
logger.info(f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, 时间戳:{timestamp}")
|
||||
logger.info(
|
||||
f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, "
|
||||
f"视频:{os.path.basename(source_video_path)}, 时间戳:{timestamp}"
|
||||
)
|
||||
|
||||
try:
|
||||
if ost == 0: # 纯解说片段
|
||||
output_path = _process_narration_only_segment(
|
||||
video_origin_path, script_item, tts_map, output_dir,
|
||||
source_video_path, script_item, tts_map, output_dir,
|
||||
encoder_config, hwaccel_args
|
||||
)
|
||||
elif ost == 1: # 纯原声片段
|
||||
output_path = _process_original_audio_segment(
|
||||
video_origin_path, script_item, output_dir,
|
||||
source_video_path, script_item, output_dir,
|
||||
encoder_config, hwaccel_args
|
||||
)
|
||||
elif ost == 2: # 解说+原声混合片段
|
||||
output_path = _process_mixed_segment(
|
||||
video_origin_path, script_item, tts_map, output_dir,
|
||||
source_video_path, script_item, tts_map, output_dir,
|
||||
encoder_config, hwaccel_args
|
||||
)
|
||||
else:
|
||||
|
||||
@ -107,7 +107,7 @@ def _clamp_duration_to_media(
|
||||
|
||||
|
||||
def _normalize_indextts_reference_audio(params: VideoClipParams) -> None:
|
||||
"""Ensure IndexTTS engines use the configured reference audio instead of a stale UI voice."""
|
||||
"""Ensure local clone TTS engines use configured reference audio instead of a stale UI voice."""
|
||||
params.tts_engine = config.normalize_tts_engine_name(params.tts_engine)
|
||||
if params.tts_engine == config.INDEXTTS_ENGINE:
|
||||
tts_config = config.indextts
|
||||
@ -117,6 +117,12 @@ def _normalize_indextts_reference_audio(params: VideoClipParams) -> None:
|
||||
tts_config = config.indextts2
|
||||
voice_prefix = config.INDEXTTS2_VOICE_PREFIX
|
||||
display_name = "IndexTTS-2"
|
||||
elif params.tts_engine == config.OMNIVOICE_ENGINE:
|
||||
tts_config = config.omnivoice
|
||||
if tts_config.get("mode", "auto") != "voice_clone":
|
||||
return
|
||||
voice_prefix = config.OMNIVOICE_VOICE_PREFIX
|
||||
display_name = "OmniVoice"
|
||||
else:
|
||||
return
|
||||
|
||||
@ -199,6 +205,7 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
|
||||
logger.info("\n\n## 3. 统一视频裁剪(基于OST类型)")
|
||||
video_clip_result = clip_video.clip_video_unified(
|
||||
video_origin_path=params.video_origin_path,
|
||||
video_origin_paths=getattr(params, "video_origin_paths", []),
|
||||
script_list=list_script,
|
||||
tts_results=tts_results
|
||||
)
|
||||
|
||||
@ -12,6 +12,7 @@ from loguru import logger
|
||||
from .manager import LLMServiceManager
|
||||
from .validators import OutputValidator
|
||||
from .exceptions import LLMServiceError
|
||||
from app.services.prompts import PromptManager
|
||||
|
||||
# 提供商注册由 webui.py:main() 显式调用(见 LLM 提供商注册机制重构)
|
||||
# 这样更可靠,错误也更容易调试
|
||||
@ -181,12 +182,20 @@ class UnifiedLLMService:
|
||||
LLMServiceError: 服务调用失败时抛出
|
||||
"""
|
||||
try:
|
||||
# 构建分析提示词
|
||||
system_prompt = "你是一位专业的剧本分析师和剧情概括助手。请仔细分析字幕内容,提取关键剧情信息。"
|
||||
prompt = PromptManager.get_prompt(
|
||||
category="short_drama_narration",
|
||||
name="plot_analysis",
|
||||
parameters={"subtitle_content": subtitle_content},
|
||||
)
|
||||
prompt_object = PromptManager.get_prompt_object(
|
||||
category="short_drama_narration",
|
||||
name="plot_analysis",
|
||||
)
|
||||
system_prompt = prompt_object.get_system_prompt()
|
||||
|
||||
# 生成分析结果
|
||||
result = await UnifiedLLMService.generate_text(
|
||||
prompt=subtitle_content,
|
||||
prompt=prompt,
|
||||
system_prompt=system_prompt,
|
||||
provider=provider,
|
||||
temperature=temperature,
|
||||
|
||||
@ -113,6 +113,8 @@ class OutputValidator:
|
||||
"required": ["_id", "timestamp", "picture", "narration"],
|
||||
"properties": {
|
||||
"_id": {"type": "number"},
|
||||
"video_id": {"type": "number"},
|
||||
"video_name": {"type": "string"},
|
||||
"timestamp": {"type": "string"},
|
||||
"picture": {"type": "string"},
|
||||
"narration": {"type": "string"},
|
||||
@ -161,6 +163,16 @@ class OutputValidator:
|
||||
item_id = item.get("_id")
|
||||
if not isinstance(item_id, (int, float)) or item_id <= 0:
|
||||
raise ValidationError(f"第{index+1}项ID必须为正整数: {item_id}", "invalid_id")
|
||||
|
||||
video_id = item.get("video_id")
|
||||
if video_id not in (None, "") and (
|
||||
not isinstance(video_id, (int, float)) or video_id <= 0
|
||||
):
|
||||
raise ValidationError(f"第{index+1}项video_id必须为正整数: {video_id}", "invalid_video_id")
|
||||
|
||||
video_name = item.get("video_name")
|
||||
if video_name not in (None, "") and not isinstance(video_name, str):
|
||||
raise ValidationError(f"第{index+1}项video_name必须为字符串: {video_name}", "invalid_video_name")
|
||||
|
||||
@staticmethod
|
||||
def validate_subtitle_analysis(output: str) -> str:
|
||||
|
||||
@ -19,72 +19,79 @@ class PlotAnalysisPrompt(TextPrompt):
|
||||
metadata = PromptMetadata(
|
||||
name="plot_analysis",
|
||||
category="short_drama_narration",
|
||||
version="v1.0",
|
||||
description="分析短剧字幕内容,提供详细的剧情分析和分段解析",
|
||||
version="v1.1",
|
||||
description="结合字幕和可选联网检索上下文,输出适合短剧解说脚本生成的结构化剧情理解",
|
||||
model_type=ModelType.TEXT,
|
||||
output_format=OutputFormat.TEXT,
|
||||
tags=["短剧", "剧情分析", "字幕解析", "分段分析"],
|
||||
tags=["短剧", "剧情分析", "字幕解析", "分段分析", "联网检索", "解说脚本素材"],
|
||||
parameters=["subtitle_content"]
|
||||
)
|
||||
super().__init__(metadata)
|
||||
|
||||
self._system_prompt = "你是一位专业的剧本分析师和剧情概括助手。"
|
||||
self._system_prompt = "你是一位专业的短剧解说策划和剧本分析师。请输出克制、结构化、可直接供下游解说脚本生成使用的剧情理解材料。"
|
||||
|
||||
def get_template(self) -> str:
|
||||
return """# 角色
|
||||
你是一位专业的剧本分析师和剧情概括助手。
|
||||
你是一位专业的短剧解说策划和剧本分析师。你的输出不是给观众看的成片文案,而是给下游“短剧解说脚本生成器”使用的结构化剧情理解材料。
|
||||
|
||||
# 任务
|
||||
我将为你提供一部短剧的完整字幕文本。请你基于这些字幕,完成以下任务:
|
||||
1. **整体剧情分析**:简要概括整个短剧的核心剧情脉络、主要冲突和结局(如果有的话)。
|
||||
2. **分段剧情解析与时间戳定位**:
|
||||
* 将整个短剧划分为若干个关键的剧情段落(例如:开端、发展、转折、高潮、结局,或根据具体情节自然划分)。
|
||||
* 段落数应该与字幕长度成正比。
|
||||
* 对于每一个剧情段落:
|
||||
* **概括该段落的主要内容**:用简洁的语言描述这段剧情发生了什么。
|
||||
* **标注对应的时间戳范围**:明确指出该剧情段落对应的开始字幕时间戳和结束字幕时间戳。请直接从字幕中提取时间信息。
|
||||
# 输入说明
|
||||
下面的输入可能只包含一个视频的原始字幕,也可能包含多个视频文件的字幕;也可能同时包含 Tavily 联网检索结果和原始字幕。
|
||||
- 联网检索结果只能用于辅助识别短剧名称、人物关系、时代背景、公开剧情梗概。
|
||||
- 原始字幕是唯一可信的当前片段事实来源。
|
||||
- 如果联网检索结果与字幕冲突,必须以字幕为准。
|
||||
- 如果联网检索结果包含当前字幕尚未出现的后续剧情,只能放在“字幕未覆盖/需谨慎信息”中,不能写进当前剧情事实。
|
||||
- 多个视频字幕会以“视频 1: 文件名”“视频 2: 文件名”等标题分隔。时间戳均为对应视频内部时间,不是拼接后的累计时间。
|
||||
|
||||
# 输入格式
|
||||
字幕内容通常包含时间戳和对话,例如:
|
||||
```
|
||||
00:00:05,000 --> 00:00:10,000
|
||||
[角色A]: 你好吗?
|
||||
00:00:10,500 --> 00:00:15,000
|
||||
[角色B]: 我很好,谢谢。发生了一些有趣的事情。
|
||||
... (更多字幕内容) ...
|
||||
```
|
||||
我将把实际字幕粘贴在下方。
|
||||
# 核心任务
|
||||
请基于输入完成剧情理解,目标是帮助后续生成高质量短剧解说脚本:
|
||||
1. 识别短剧名称、当前字幕范围、视频来源、联网检索辅助信息和字幕事实边界。
|
||||
2. 统一人物称呼,避免同一人物出现多个名字写法。
|
||||
3. 用 100-180 字概括当前字幕覆盖的剧情,不提前剧透字幕未出现的内容。
|
||||
4. 按视频来源和字幕时间顺序拆分关键剧情段落,并为每段标注准确 video_id / video_name / 时间戳。
|
||||
5. 提炼解说创作可用的钩子、冲突、爽点/泪点/悬念点和建议保留原声片段。
|
||||
|
||||
# 输出格式要求
|
||||
请按照以下格式清晰地呈现分析结果:
|
||||
# 强制输出规则
|
||||
1. 禁止输出寒暄、解释身份或“好的,我将……”等聊天式开场。
|
||||
2. 禁止编造字幕中没有的具体事件、对白、关系进展或结局。
|
||||
3. 时间戳必须直接来自对应视频字幕;无法确定时写“字幕未明确”,不要猜测。
|
||||
4. 多视频场景下必须明确每段来自哪个视频文件,禁止把不同视频的同名时间戳混在一起。
|
||||
5. 人名必须统一:优先采用联网检索中的正式名称;如果字幕写法不同,在人物表中保留“字幕称呼”。
|
||||
6. 内容要简洁、客观、可复用,避免散文化长段落。
|
||||
7. 必须严格按照下面的 Markdown 格式输出,不要添加额外章节。
|
||||
|
||||
**一、整体剧情概括:**
|
||||
[此处填写对整个短剧剧情的概括]
|
||||
# 输出格式
|
||||
## 一、基础识别
|
||||
- 短剧名称:[如输入可判断则填写,否则写“未知”]
|
||||
- 当前字幕范围:[开始时间戳] --> [结束时间戳];无法确定则写“字幕未明确”
|
||||
- 视频来源:[列出视频编号、文件名和各自字幕时间范围;单视频也要写]
|
||||
- 联网检索确认:[仅写可辅助理解的公开信息;没有联网结果则写“未启用/未提供”]
|
||||
- 字幕内实际出现:[列出当前字幕真实出现的关键事实,2-4 条]
|
||||
- 字幕未覆盖/需谨慎信息:[列出联网结果提到但当前字幕未发生的内容;没有则写“无”]
|
||||
|
||||
**二、分段剧情解析:**
|
||||
## 二、人物与关系
|
||||
| 统一称呼 | 字幕称呼 | 身份/关系 | 当前剧情作用 | 确定性 |
|
||||
|---|---|---|---|---|
|
||||
| [人物名] | [字幕原文称呼] | [身份或关系] | [在当前片段中的作用] | 字幕明确/联网辅助/合理推断 |
|
||||
|
||||
**剧情段落 1:[段落主题/概括,例如:主角登场与背景介绍]**
|
||||
* **时间戳:** [开始时间戳] --> [结束时间戳]
|
||||
* **内容概要:** [对这段剧情的详细描述]
|
||||
## 三、整体剧情概括
|
||||
[100-180 字,只概括当前字幕覆盖的剧情。必须包含核心冲突、人物动机和当前悬念。]
|
||||
|
||||
**剧情段落 2:[段落主题/概括,例如:第一个冲突出现]**
|
||||
* **时间戳:** [开始时间戳] --> [结束时间戳]
|
||||
* **内容概要:** [对这段剧情的详细描述]
|
||||
## 四、分段剧情解析
|
||||
| 视频 | 时间戳 | 段落主题 | 剧情事件 | 情绪/冲突功能 |
|
||||
|---|---|---|---|---|
|
||||
| [video_id + video_name] | [开始] --> [结束] | [简短主题] | [当前段落发生了什么] | [铺垫/冲突升级/人物塑造/反转/悬念/情绪爆发等] |
|
||||
|
||||
... (根据实际剧情段落数量继续) ...
|
||||
## 五、解说创作重点
|
||||
- 开场钩子:[用一句话指出最适合开场抓人的冲突或疑问]
|
||||
- 核心冲突:[当前片段最主要的矛盾]
|
||||
- 爽点/泪点/情绪点:[列 1-3 条,没有则写“无明显”]
|
||||
- 悬念点:[当前片段留下的疑问或后续期待]
|
||||
- 建议保留原声片段:
|
||||
1. [video_id + video_name + 时间戳]:[保留理由;如果没有合适原声,写“无明显”]
|
||||
|
||||
**剧情段落 N:[段落主题/概括,例如:结局与反思]**
|
||||
* **时间戳:** [开始时间戳] --> [结束时间戳]
|
||||
* **内容概要:** [对这段剧情的详细描述]
|
||||
## 六、联网信息校验
|
||||
- 可用于辅助理解的信息:[联网结果中可帮助理解当前字幕的信息;没有则写“无”]
|
||||
- 与字幕不一致或字幕未覆盖的信息:[必须列出,不要混入当前剧情事实;没有则写“无”]
|
||||
|
||||
# 注意事项
|
||||
* 请确保时间戳的准确性,直接引用字幕中的时间。
|
||||
* 剧情段落的划分应合乎逻辑,能够反映剧情的起承转合。
|
||||
* 语言表达应简洁、准确、客观。
|
||||
|
||||
# 限制
|
||||
1. 严禁输出与分析结果无关的内容
|
||||
2. 时间戳必须严格按照字幕中的实际时间
|
||||
|
||||
# 请处理以下字幕:
|
||||
# 输入内容
|
||||
${subtitle_content}"""
|
||||
|
||||
@ -43,11 +43,14 @@ class ScriptGenerationPrompt(ParameterizedPrompt):
|
||||
${plot_analysis}
|
||||
</plot>
|
||||
|
||||
### 原始字幕(含精确时间戳)
|
||||
### 原始字幕(含视频编号和精确时间戳)
|
||||
<subtitles>
|
||||
${subtitle_content}
|
||||
</subtitles>
|
||||
|
||||
字幕可能来自多个视频文件。每个字幕分段标题会以“视频 1: 文件名”“视频 2: 文件名”等形式标识来源。
|
||||
生成脚本时必须把每个片段绑定到对应视频来源,时间戳表示该视频文件内部的局部时间,不是把多个视频拼接后的全局时间。
|
||||
|
||||
## 短剧解说创作核心要素
|
||||
|
||||
### 1. 黄金开场(3秒法则)
|
||||
@ -137,11 +140,18 @@ ${subtitle_content}
|
||||
|
||||
### 时间戳管理(绝对不能违反)
|
||||
- **时间戳绝对不能重叠**,确保剪辑后无重复画面
|
||||
- **时间段必须连续且不交叉**,严格按时间顺序排列
|
||||
- **每个时间戳都必须在原始字幕中找到对应范围**
|
||||
- **同一个 video_id 内的时间段必须连续且不交叉**,严格按该视频内时间顺序排列
|
||||
- **跨视频可以切换 video_id**,但每个时间戳都必须来自对应视频字幕分段
|
||||
- **每个时间戳都必须在对应视频的原始字幕中找到对应范围**
|
||||
- 可以拆分原时间片段,但必须保持时间连续性
|
||||
- 时间戳的格式必须与原始字幕中的格式完全一致
|
||||
|
||||
### 多视频来源规范(多集/多文件必须遵守)
|
||||
- **video_id**:必须填写,取字幕分段标题里的视频编号,例如“视频 3”就填 3
|
||||
- **video_name**:必须填写对应的视频文件名,例如“3_20260607002212.mp4”
|
||||
- **timestamp**:只填写对应 video_id 内部的时间范围,不要换算成多个视频拼接后的累计时间
|
||||
- 如果剧情跨多个视频推进,脚本可以按故事顺序在不同 video_id 之间切换,但不得把视频 2 的时间戳写到 video_id=1
|
||||
|
||||
### 时长控制(1/3原则)
|
||||
- **解说视频总长度 = 原视频长度的 1/3**
|
||||
- 精确控制节奏和密度,既不能过短也不能过长
|
||||
@ -159,6 +169,8 @@ ${subtitle_content}
|
||||
```json
|
||||
{
|
||||
"_id": 序号,
|
||||
"video_id": 视频编号,
|
||||
"video_name": "视频文件名",
|
||||
"timestamp": "开始时间-结束时间",
|
||||
"picture": "画面内容描述",
|
||||
"narration": "播放原片+序号",
|
||||
@ -242,6 +254,8 @@ ${subtitle_content}
|
||||
"items": [
|
||||
{
|
||||
"_id": 1,
|
||||
"video_id": 1,
|
||||
"video_name": "1.mp4",
|
||||
"timestamp": "00:00:01,000-00:00:05,500",
|
||||
"picture": "女主角林小雨慌张地道歉,男主角沈墨轩冷漠地看着她",
|
||||
"narration": "一个普通女孩的命运即将因为一杯咖啡彻底改变!她撞到的这个男人,竟然是...",
|
||||
@ -249,6 +263,8 @@ ${subtitle_content}
|
||||
},
|
||||
{
|
||||
"_id": 2,
|
||||
"video_id": 1,
|
||||
"video_name": "1.mp4",
|
||||
"timestamp": "00:00:05,500-00:00:08,000",
|
||||
"picture": "沈墨轩质问林小雨,语气冷厉威严",
|
||||
"narration": "播放原片2",
|
||||
@ -256,6 +272,8 @@ ${subtitle_content}
|
||||
},
|
||||
{
|
||||
"_id": 3,
|
||||
"video_id": 2,
|
||||
"video_name": "2.mp4",
|
||||
"timestamp": "00:00:08,000-00:00:12,000",
|
||||
"picture": "林小雨惊慌失措,沈墨轩眼中闪过一丝兴趣",
|
||||
"narration": "霸道总裁的经典开场!一杯咖啡引发的爱情故事就这样开始了...",
|
||||
@ -281,6 +299,7 @@ ${subtitle_content}
|
||||
- **原声片段标识**:OST=1表示原声,OST=0表示解说
|
||||
- **原声格式规范**:narration字段必须使用"播放原片+序号"格式
|
||||
- **关键情绪点**:必须保留原片原声,增强观众代入感
|
||||
- **视频来源**:每个片段必须包含 video_id 和 video_name,用于定位多个上传视频中的源文件
|
||||
- **时间戳精度**:精确到毫秒级别,确保与字幕完美匹配
|
||||
- **逻辑连贯性**:严格遵循剧情发展顺序
|
||||
|
||||
|
||||
@ -225,6 +225,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
# 使用新的统一裁剪策略
|
||||
video_clip_result = clip_video.clip_video_unified(
|
||||
video_origin_path=params.video_origin_path,
|
||||
video_origin_paths=getattr(params, "video_origin_paths", []),
|
||||
script_list=list_script,
|
||||
tts_results=tts_results
|
||||
)
|
||||
@ -477,6 +478,7 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
|
||||
# 使用新的统一裁剪策略
|
||||
video_clip_result = clip_video.clip_video_unified(
|
||||
video_origin_path=params.video_origin_path,
|
||||
video_origin_paths=getattr(params, "video_origin_paths", []),
|
||||
script_list=list_script,
|
||||
tts_results=tts_results
|
||||
)
|
||||
|
||||
116
app/services/tavily_search.py
Normal file
116
app/services/tavily_search.py
Normal file
@ -0,0 +1,116 @@
|
||||
"""Tavily-powered web search helpers for plot analysis."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import requests
|
||||
from loguru import logger
|
||||
|
||||
|
||||
TAVILY_API_BASE_URL = "https://api.tavily.com"
|
||||
DEFAULT_SEARCH_DEPTH = "basic"
|
||||
DEFAULT_MAX_RESULTS = 5
|
||||
DEFAULT_TIMEOUT = 20
|
||||
|
||||
|
||||
class TavilySearchError(RuntimeError):
|
||||
"""Raised when Tavily search cannot be completed."""
|
||||
|
||||
|
||||
def _trim_text(value: Any, max_chars: int) -> str:
|
||||
text = str(value or "").strip()
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
return f"{text[:max_chars].rstrip()}..."
|
||||
|
||||
|
||||
def search_short_drama(
|
||||
short_name: str,
|
||||
api_key: str | None = None,
|
||||
*,
|
||||
search_depth: str = DEFAULT_SEARCH_DEPTH,
|
||||
max_results: int = DEFAULT_MAX_RESULTS,
|
||||
timeout: int = DEFAULT_TIMEOUT,
|
||||
) -> dict[str, Any]:
|
||||
"""Search web context for a short drama name with Tavily."""
|
||||
short_name = str(short_name or "").strip()
|
||||
if not short_name:
|
||||
raise TavilySearchError("短剧名称不能为空")
|
||||
|
||||
api_key = (api_key or os.getenv("TAVILY_API_KEY") or "").strip()
|
||||
if not api_key:
|
||||
raise TavilySearchError("Tavily API Key 未配置")
|
||||
|
||||
query = f"{short_name} 短剧 剧情 介绍 人物 结局"
|
||||
payload = {
|
||||
"query": query,
|
||||
"search_depth": search_depth or DEFAULT_SEARCH_DEPTH,
|
||||
"topic": "general",
|
||||
"max_results": max(1, min(int(max_results or DEFAULT_MAX_RESULTS), 10)),
|
||||
"include_answer": True,
|
||||
"include_raw_content": False,
|
||||
"include_images": False,
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{TAVILY_API_BASE_URL}/search",
|
||||
headers={
|
||||
"Authorization": f"Bearer {api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
json=payload,
|
||||
timeout=timeout,
|
||||
)
|
||||
except requests.RequestException as exc:
|
||||
raise TavilySearchError(f"Tavily 请求失败: {exc}") from exc
|
||||
|
||||
if response.status_code >= 400:
|
||||
message = _trim_text(response.text, 500)
|
||||
raise TavilySearchError(f"Tavily 请求失败: HTTP {response.status_code} {message}")
|
||||
|
||||
try:
|
||||
data = response.json()
|
||||
except ValueError as exc:
|
||||
raise TavilySearchError("Tavily 返回内容不是有效 JSON") from exc
|
||||
|
||||
logger.info(
|
||||
"Tavily 短剧检索完成: query={}, results={}",
|
||||
query,
|
||||
len(data.get("results") or []),
|
||||
)
|
||||
return data
|
||||
|
||||
|
||||
def format_search_context(search_data: dict[str, Any], *, max_chars: int = 6000) -> str:
|
||||
"""Format Tavily response into compact LLM context."""
|
||||
if not search_data:
|
||||
return ""
|
||||
|
||||
lines = [
|
||||
"# Tavily 联网检索结果",
|
||||
f"检索 query: {search_data.get('query', '')}",
|
||||
]
|
||||
|
||||
answer = _trim_text(search_data.get("answer"), 1200)
|
||||
if answer:
|
||||
lines.extend(["", "## 综合回答", answer])
|
||||
|
||||
results = search_data.get("results") or []
|
||||
if results:
|
||||
lines.extend(["", "## 搜索来源"])
|
||||
for index, result in enumerate(results, start=1):
|
||||
title = _trim_text(result.get("title"), 120)
|
||||
url = _trim_text(result.get("url"), 240)
|
||||
content = _trim_text(result.get("content") or result.get("raw_content"), 700)
|
||||
lines.extend(
|
||||
[
|
||||
f"{index}. 标题: {title}",
|
||||
f" 来源: {url}",
|
||||
f" 摘要: {content}",
|
||||
]
|
||||
)
|
||||
|
||||
return _trim_text("\n".join(lines).strip(), max_chars)
|
||||
@ -51,6 +51,23 @@ class JianyingTaskTests(unittest.TestCase):
|
||||
|
||||
self.assertEqual(f"indextts2:{ref_path}", params.voice_name)
|
||||
|
||||
def test_normalize_omnivoice_clone_uses_valid_param_reference(self):
|
||||
with tempfile.NamedTemporaryFile(suffix=".wav") as ref:
|
||||
params = VideoClipParams(tts_engine="omnivoice", voice_name=f"omnivoice:{ref.name}")
|
||||
|
||||
with patch.dict(jianying_task.config.omnivoice, {"mode": "voice_clone"}, clear=False):
|
||||
jianying_task._normalize_indextts_reference_audio(params)
|
||||
|
||||
self.assertEqual(f"omnivoice:{ref.name}", params.voice_name)
|
||||
|
||||
def test_normalize_omnivoice_auto_does_not_require_reference(self):
|
||||
params = VideoClipParams(tts_engine="omnivoice", voice_name="omnivoice:auto")
|
||||
|
||||
with patch.dict(jianying_task.config.omnivoice, {"mode": "auto", "reference_audio": ""}, clear=False):
|
||||
jianying_task._normalize_indextts_reference_audio(params)
|
||||
|
||||
self.assertEqual("omnivoice:auto", params.voice_name)
|
||||
|
||||
def test_normalize_indextts_requires_existing_reference_audio(self):
|
||||
params = VideoClipParams(tts_engine="indextts", voice_name="zh-CN-YunjianNeural")
|
||||
|
||||
|
||||
84
app/services/test_multi_video_script_sources_unittest.py
Normal file
84
app/services/test_multi_video_script_sources_unittest.py
Normal file
@ -0,0 +1,84 @@
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
from app.services import clip_video
|
||||
from app.utils import check_script
|
||||
|
||||
|
||||
class TestMultiVideoScriptSources(unittest.TestCase):
|
||||
def test_check_format_accepts_optional_video_source_fields(self):
|
||||
script = [
|
||||
{
|
||||
"_id": 1,
|
||||
"video_id": 2,
|
||||
"video_name": "2.mp4",
|
||||
"timestamp": "00:00:00,000-00:00:03,000",
|
||||
"picture": "画面",
|
||||
"narration": "解说",
|
||||
"OST": 0,
|
||||
}
|
||||
]
|
||||
|
||||
result = check_script.check_format(json.dumps(script, ensure_ascii=False))
|
||||
|
||||
self.assertTrue(result["success"])
|
||||
|
||||
def test_clip_video_unified_resolves_source_by_video_id_and_name(self):
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
video_1 = os.path.join(temp_dir, "1.mp4")
|
||||
video_2 = os.path.join(temp_dir, "2.mp4")
|
||||
for video_path in [video_1, video_2]:
|
||||
with open(video_path, "wb") as file:
|
||||
file.write(b"video")
|
||||
|
||||
output_dir = os.path.join(temp_dir, "clips")
|
||||
used_sources = []
|
||||
|
||||
def fake_process(source_video_path, script_item, output_dir_arg, *_args):
|
||||
used_sources.append(source_video_path)
|
||||
output_path = os.path.join(output_dir_arg, f"{script_item['_id']}.mp4")
|
||||
with open(output_path, "wb") as file:
|
||||
file.write(b"clip")
|
||||
return output_path
|
||||
|
||||
script_list = [
|
||||
{
|
||||
"_id": 1,
|
||||
"video_id": 2,
|
||||
"timestamp": "00:00:00,000-00:00:03,000",
|
||||
"picture": "视频2画面",
|
||||
"narration": "播放原片1",
|
||||
"OST": 1,
|
||||
},
|
||||
{
|
||||
"_id": 2,
|
||||
"video_name": "1.mp4",
|
||||
"timestamp": "00:00:03,000-00:00:06,000",
|
||||
"picture": "视频1画面",
|
||||
"narration": "播放原片2",
|
||||
"OST": 1,
|
||||
},
|
||||
]
|
||||
|
||||
with (
|
||||
mock.patch.object(clip_video, "check_hardware_acceleration", return_value=None),
|
||||
mock.patch.object(clip_video, "_process_original_audio_segment", side_effect=fake_process),
|
||||
):
|
||||
result = clip_video.clip_video_unified(
|
||||
video_origin_path=video_1,
|
||||
video_origin_paths=[video_1, video_2],
|
||||
script_list=script_list,
|
||||
tts_results=[],
|
||||
output_dir=output_dir,
|
||||
task_id="multi-video-test",
|
||||
)
|
||||
|
||||
self.assertEqual([video_2, video_1], used_sources)
|
||||
self.assertEqual({1, 2}, set(result.keys()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@ -1,3 +1,5 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import re
|
||||
import json
|
||||
@ -1298,6 +1300,10 @@ def tts(
|
||||
if tts_engine == config.INDEXTTS2_ENGINE:
|
||||
logger.info("分发到 IndexTTS-2")
|
||||
return indextts2_tts(text, voice_name, voice_file)
|
||||
|
||||
if tts_engine == config.OMNIVOICE_ENGINE:
|
||||
logger.info("分发到 OmniVoice")
|
||||
return omnivoice_tts(text, voice_name, voice_file, speed=voice_rate)
|
||||
|
||||
if tts_engine == "doubaotts":
|
||||
logger.info("分发到豆包语音 TTS")
|
||||
@ -1783,7 +1789,11 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
voice_name = config.normalize_indextts_voice_prefix(parse_voice_name(voice_name))
|
||||
output_dir = utils.task_dir(task_id)
|
||||
tts_results = []
|
||||
audio_extension = ".wav" if tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE) else ".mp3"
|
||||
audio_extension = ".wav" if tts_engine in (
|
||||
config.INDEXTTS_ENGINE,
|
||||
config.INDEXTTS2_ENGINE,
|
||||
config.OMNIVOICE_ENGINE,
|
||||
) else ".mp3"
|
||||
|
||||
for item in list_script:
|
||||
if item['OST'] != 1:
|
||||
@ -1809,11 +1819,11 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
f"或者使用其他 tts 引擎")
|
||||
continue
|
||||
else:
|
||||
# SoulVoice、Qwen3、IndexTTS、豆包语音 引擎不生成精确字幕文件
|
||||
# SoulVoice、Qwen3、IndexTTS、OmniVoice、豆包语音 引擎不生成精确字幕文件
|
||||
if (
|
||||
is_soulvoice_voice(voice_name)
|
||||
or is_qwen_engine(tts_engine)
|
||||
or tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE)
|
||||
or tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE, config.OMNIVOICE_ENGINE)
|
||||
or tts_engine == "doubaotts"
|
||||
):
|
||||
# 获取实际音频文件的时长
|
||||
@ -2256,6 +2266,17 @@ def parse_indextts2_voice(voice_name: str) -> str:
|
||||
return voice_name
|
||||
|
||||
|
||||
def parse_omnivoice_voice(voice_name: str) -> str:
|
||||
"""
|
||||
解析 OmniVoice 语音名称
|
||||
支持格式:omnivoice:reference_audio_path
|
||||
返回参考音频文件路径或模式名
|
||||
"""
|
||||
if isinstance(voice_name, str) and voice_name.startswith(config.OMNIVOICE_VOICE_PREFIX):
|
||||
return voice_name[len(config.OMNIVOICE_VOICE_PREFIX):]
|
||||
return voice_name
|
||||
|
||||
|
||||
def indextts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
|
||||
"""
|
||||
使用 IndexTTS-1.5 API 进行零样本语音克隆
|
||||
@ -2493,3 +2514,141 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str) -> Union[SubMaker
|
||||
|
||||
logger.error("IndexTTS-2 TTS 生成失败,已达到最大重试次数")
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_omnivoice_api_url(api_url: str) -> str:
|
||||
api_url = (api_url or "http://127.0.0.1:7866/tts").strip()
|
||||
if api_url.endswith("/tts"):
|
||||
return api_url
|
||||
if api_url.endswith("/tts/json"):
|
||||
return f"{api_url[:-len('/tts/json')]}/tts"
|
||||
return f"{api_url.rstrip('/')}/tts"
|
||||
|
||||
|
||||
def _download_omnivoice_audio(response: requests.Response, api_url: str, voice_file: str, proxies: dict) -> bool:
|
||||
content_type = response.headers.get("content-type", "").lower()
|
||||
if "application/json" not in content_type:
|
||||
with open(voice_file, "wb") as f:
|
||||
f.write(response.content)
|
||||
return os.path.getsize(voice_file) > 0
|
||||
|
||||
result = response.json()
|
||||
audio_url = result.get("audio_url") if isinstance(result, dict) else ""
|
||||
if not audio_url:
|
||||
logger.error(f"OmniVoice API 响应中没有音频下载地址: {result}")
|
||||
return False
|
||||
|
||||
audio_response = requests.get(urljoin(api_url, audio_url), proxies=proxies, timeout=180)
|
||||
if audio_response.status_code != 200:
|
||||
logger.error(f"OmniVoice 音频下载失败: {audio_response.status_code} - {audio_response.text}")
|
||||
return False
|
||||
|
||||
with open(voice_file, "wb") as f:
|
||||
f.write(audio_response.content)
|
||||
return os.path.getsize(voice_file) > 0
|
||||
|
||||
|
||||
def _optional_omnivoice_generation_data(voice_speed: float) -> dict:
|
||||
omnivoice_config = getattr(config, "omnivoice", {}) or {}
|
||||
data = {
|
||||
"speed": voice_speed or omnivoice_config.get("speed", 1.0),
|
||||
}
|
||||
|
||||
optional_fields = {
|
||||
"num_step": omnivoice_config.get("num_step"),
|
||||
"guidance_scale": omnivoice_config.get("guidance_scale"),
|
||||
"duration": omnivoice_config.get("duration"),
|
||||
}
|
||||
for key, value in optional_fields.items():
|
||||
if value not in (None, ""):
|
||||
data[key] = value
|
||||
|
||||
for key in ("denoise", "postprocess_output", "preprocess_prompt"):
|
||||
if key in omnivoice_config:
|
||||
data[key] = str(bool(omnivoice_config.get(key))).lower()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def omnivoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
|
||||
"""
|
||||
使用 OmniVoice-Pack FastAPI 服务进行语音合成。
|
||||
支持自动音色、指令音色和参考音频克隆三种模式。
|
||||
"""
|
||||
omnivoice_config = getattr(config, "omnivoice", {}) or {}
|
||||
api_url = _normalize_omnivoice_api_url(omnivoice_config.get("api_url", "http://127.0.0.1:7866/tts"))
|
||||
mode = omnivoice_config.get("mode", "auto")
|
||||
language = (omnivoice_config.get("language", "zh") or "").strip()
|
||||
instruct = (omnivoice_config.get("instruct", "") or "").strip()
|
||||
ref_text = (omnivoice_config.get("ref_text", "") or "").strip()
|
||||
parsed_voice = parse_omnivoice_voice(voice_name)
|
||||
if mode != "voice_clone" and parsed_voice and os.path.isfile(parsed_voice):
|
||||
mode = "voice_clone"
|
||||
|
||||
reference_audio_path = ""
|
||||
if mode == "voice_clone":
|
||||
candidate = parsed_voice
|
||||
if candidate and os.path.isfile(candidate):
|
||||
reference_audio_path = candidate
|
||||
else:
|
||||
reference_audio_path = parse_omnivoice_voice(omnivoice_config.get("reference_audio", "") or "")
|
||||
|
||||
if not reference_audio_path or not os.path.exists(reference_audio_path):
|
||||
logger.error(f"OmniVoice 参考音频文件不存在: {reference_audio_path}")
|
||||
return None
|
||||
elif mode != "voice_design":
|
||||
instruct = ""
|
||||
|
||||
data = {
|
||||
"text": text.strip(),
|
||||
"language": language,
|
||||
**_optional_omnivoice_generation_data(speed),
|
||||
}
|
||||
if mode == "voice_design" and instruct:
|
||||
data["instruct"] = instruct
|
||||
if mode == "voice_clone" and ref_text:
|
||||
data["ref_text"] = ref_text
|
||||
|
||||
proxies = _get_configured_proxies()
|
||||
for attempt in range(3):
|
||||
files = {}
|
||||
try:
|
||||
if reference_audio_path:
|
||||
files["ref_audio"] = open(reference_audio_path, "rb")
|
||||
|
||||
logger.info(f"第 {attempt + 1} 次调用 OmniVoice API: {api_url}, mode={mode}")
|
||||
response = requests.post(
|
||||
api_url,
|
||||
files=files or None,
|
||||
data=data,
|
||||
proxies=proxies,
|
||||
timeout=240,
|
||||
)
|
||||
|
||||
if response.status_code == 200 and _download_omnivoice_audio(response, api_url, voice_file, proxies):
|
||||
logger.info(f"OmniVoice 成功生成音频: {voice_file}, 大小: {os.path.getsize(voice_file)} 字节")
|
||||
sub_maker = new_sub_maker()
|
||||
duration = get_audio_duration_from_file(voice_file)
|
||||
duration_ms = int(duration * 1000) if duration > 0 else max(1000, int(len(text) * 200))
|
||||
add_subtitle_event(sub_maker, 0, duration_ms * 10000, text)
|
||||
return sub_maker
|
||||
|
||||
logger.error(f"OmniVoice API 调用失败: {response.status_code} - {response.text}")
|
||||
except requests.exceptions.Timeout:
|
||||
logger.error(f"OmniVoice API 调用超时 (尝试 {attempt + 1}/3)")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"OmniVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
|
||||
except Exception as e:
|
||||
logger.error(f"OmniVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
|
||||
finally:
|
||||
for file_obj in files.values():
|
||||
try:
|
||||
file_obj.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if attempt < 2:
|
||||
time.sleep(2)
|
||||
|
||||
logger.error("OmniVoice TTS 生成失败,已达到最大重试次数")
|
||||
return None
|
||||
|
||||
@ -57,6 +57,23 @@ def check_format(script_content: str) -> Dict[str, Any]:
|
||||
'details': f'当前值: {clip["_id"]} (类型: {type(clip["_id"]).__name__})'
|
||||
}
|
||||
|
||||
# 验证可选视频来源字段。旧脚本可以不包含,新脚本用于多视频定位。
|
||||
if 'video_id' in clip and clip['video_id'] not in ("", None):
|
||||
if not isinstance(clip['video_id'], int) or clip['video_id'] <= 0:
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的video_id必须是正整数',
|
||||
'details': f'当前值: {clip["video_id"]} (类型: {type(clip["video_id"]).__name__})'
|
||||
}
|
||||
|
||||
if 'video_name' in clip and clip['video_name'] not in ("", None):
|
||||
if not isinstance(clip['video_name'], str):
|
||||
return {
|
||||
'success': False,
|
||||
'message': f'第{i+1}个片段的video_name必须是字符串',
|
||||
'details': f'当前值: {clip["video_name"]} (类型: {type(clip["video_name"]).__name__})'
|
||||
}
|
||||
|
||||
# 验证 timestamp 字段格式
|
||||
timestamp_pattern = r'^\d{2}:\d{2}:\d{2},\d{3}-\d{2}:\d{2}:\d{2},\d{3}$'
|
||||
if not isinstance(clip['timestamp'], str) or not re.match(timestamp_pattern, clip['timestamp']):
|
||||
|
||||
@ -49,6 +49,12 @@
|
||||
text_openai_max_tokens = 65536
|
||||
text_openai_thinking_level = "auto" # auto/off/low/medium/high
|
||||
|
||||
# ===== Tavily 联网搜索配置 =====
|
||||
# 用于短剧剧情理解前,按短剧名称检索公开剧情/人物/分集信息
|
||||
tavily_api_key = "" # 获取地址:https://app.tavily.com
|
||||
tavily_search_depth = "basic" # basic / advanced / fast / ultra-fast
|
||||
tavily_max_results = 5
|
||||
|
||||
# ===== API Keys 参考 =====
|
||||
# 主流 LLM Providers API Key 获取地址:
|
||||
#
|
||||
@ -171,6 +177,30 @@
|
||||
repetition_penalty = 10.0
|
||||
max_mel_tokens = 1500
|
||||
|
||||
[omnivoice]
|
||||
# OmniVoice-Pack 语音合成配置
|
||||
# 支持 OmniVoice-Pack FastAPI 接口:POST /tts
|
||||
api_url = "http://127.0.0.1:7866/tts"
|
||||
language = "zh"
|
||||
|
||||
# 生成模式:auto / voice_design / voice_clone
|
||||
mode = "auto"
|
||||
instruct = ""
|
||||
|
||||
# voice_clone 模式下使用,音色列表复用 IndexTTS-1.5 的资源目录
|
||||
reference_audio_source = "resource"
|
||||
reference_audio = ""
|
||||
ref_text = ""
|
||||
|
||||
# 高级生成参数
|
||||
num_step = 32
|
||||
guidance_scale = 2.0
|
||||
speed = 1.0
|
||||
duration = ""
|
||||
denoise = true
|
||||
postprocess_output = true
|
||||
preprocess_prompt = true
|
||||
|
||||
[doubaotts]
|
||||
# 豆包语音 TTS 配置
|
||||
# 申请流程:
|
||||
@ -189,7 +219,7 @@
|
||||
silence_duration = 0.125
|
||||
|
||||
[ui]
|
||||
# TTS引擎选择 (indextts, indextts2, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
|
||||
# TTS引擎选择 (indextts, indextts2, omnivoice, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
|
||||
tts_engine = "indextts"
|
||||
|
||||
# Edge TTS 配置
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
requests>=2.32.0
|
||||
moviepy==2.1.1
|
||||
edge-tts==7.2.7
|
||||
streamlit>=1.57.0
|
||||
streamlit==1.56.0
|
||||
watchdog==6.0.0
|
||||
loguru>=0.7.3
|
||||
tomli>=2.2.1
|
||||
|
||||
7
webui.py
7
webui.py
@ -243,6 +243,12 @@ def get_voice_name_for_tts_engine(tts_engine: str) -> str:
|
||||
if reference_audio:
|
||||
return f"{config.INDEXTTS_VOICE_PREFIX}{reference_audio}"
|
||||
return config.ui.get('voice_name', '')
|
||||
if tts_engine == config.OMNIVOICE_ENGINE:
|
||||
mode = config.omnivoice.get('mode', 'auto')
|
||||
reference_audio = config.omnivoice.get('reference_audio', '')
|
||||
if mode == 'voice_clone' and reference_audio:
|
||||
return f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
|
||||
return f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
|
||||
if tts_engine == 'doubaotts':
|
||||
return config.ui.get('doubaotts_voice_type', 'BV700_streaming')
|
||||
if tts_engine == 'soulvoice':
|
||||
@ -263,6 +269,7 @@ def get_jianying_export_params(draft_name=None) -> VideoClipParams:
|
||||
return VideoClipParams(
|
||||
video_clip_json_path=st.session_state['video_clip_json_path'],
|
||||
video_origin_path=st.session_state['video_origin_path'],
|
||||
video_origin_paths=st.session_state.get('video_origin_paths', []),
|
||||
tts_engine=tts_engine,
|
||||
voice_name=voice_name,
|
||||
voice_rate=voice_rate,
|
||||
|
||||
@ -40,6 +40,11 @@ BGM_RESOURCE_DIR = "/Users/viccy/Downloads/tts-mp3-clone/bgms-safe"
|
||||
BGM_TRACKS_JSON = os.path.join(BGM_RESOURCE_DIR, "tracks.json")
|
||||
BGM_UPLOAD_SUBDIR = "uploaded_bgms"
|
||||
BGM_AUDIO_EXTENSIONS = (".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg")
|
||||
LOCAL_TTS_ENGINES = {
|
||||
config.INDEXTTS_ENGINE,
|
||||
config.INDEXTTS2_ENGINE,
|
||||
config.OMNIVOICE_ENGINE,
|
||||
}
|
||||
|
||||
|
||||
def get_soulvoice_voices():
|
||||
@ -55,9 +60,10 @@ def get_soulvoice_voices():
|
||||
|
||||
def get_tts_engine_options(tr=lambda key: key):
|
||||
"""获取TTS引擎选项"""
|
||||
return {
|
||||
engine_options = {
|
||||
config.INDEXTTS_ENGINE: config.INDEXTTS_DISPLAY_NAME,
|
||||
config.INDEXTTS2_ENGINE: config.INDEXTTS2_DISPLAY_NAME,
|
||||
config.OMNIVOICE_ENGINE: config.OMNIVOICE_DISPLAY_NAME,
|
||||
"edge_tts": "Edge TTS",
|
||||
"qwen3_tts": tr("Tongyi Qwen3 TTS"),
|
||||
"tencent_tts": tr("Tencent Cloud TTS"),
|
||||
@ -65,6 +71,25 @@ def get_tts_engine_options(tr=lambda key: key):
|
||||
"azure_speech": "Azure Speech Services"
|
||||
}
|
||||
|
||||
return {
|
||||
engine: format_tts_engine_option(engine, display_name, tr)
|
||||
for engine, display_name in engine_options.items()
|
||||
}
|
||||
|
||||
|
||||
def get_tts_engine_deployment_label(tts_engine, tr=lambda key: key):
|
||||
"""获取TTS引擎部署类型标签"""
|
||||
if tts_engine in LOCAL_TTS_ENGINES:
|
||||
return tr("Local Deployment")
|
||||
|
||||
return tr("Cloud Service")
|
||||
|
||||
|
||||
def format_tts_engine_option(tts_engine, display_name, tr=lambda key: key):
|
||||
"""格式化TTS引擎下拉显示名"""
|
||||
deployment_label = get_tts_engine_deployment_label(tts_engine, tr)
|
||||
return f"{display_name} [{deployment_label}]"
|
||||
|
||||
|
||||
def get_tts_engine_descriptions(tr=lambda key: key):
|
||||
"""获取TTS引擎详细描述"""
|
||||
@ -105,6 +130,12 @@ def get_tts_engine_descriptions(tr=lambda key: key):
|
||||
"use_case": tr("IndexTTS2 use case"),
|
||||
"registration": None
|
||||
},
|
||||
config.OMNIVOICE_ENGINE: {
|
||||
"title": config.OMNIVOICE_DISPLAY_NAME,
|
||||
"features": tr("OmniVoice features"),
|
||||
"use_case": tr("OmniVoice use case"),
|
||||
"registration": None
|
||||
},
|
||||
"doubaotts": {
|
||||
"title": tr("Doubao TTS"),
|
||||
"features": tr("Doubao TTS features"),
|
||||
@ -546,6 +577,8 @@ def render_tts_settings(tr):
|
||||
render_indextts_tts_settings(tr)
|
||||
elif selected_engine == config.INDEXTTS2_ENGINE:
|
||||
render_indextts2_tts_settings(tr)
|
||||
elif selected_engine == config.OMNIVOICE_ENGINE:
|
||||
render_omnivoice_tts_settings(tr)
|
||||
elif selected_engine == "doubaotts":
|
||||
render_doubaotts_settings(tr)
|
||||
|
||||
@ -1274,6 +1307,148 @@ def render_indextts2_tts_settings(tr):
|
||||
st.session_state['voice_pitch'] = 1.0
|
||||
|
||||
|
||||
def render_omnivoice_tts_settings(tr):
|
||||
"""渲染 OmniVoice TTS 设置"""
|
||||
omnivoice_config = config.omnivoice
|
||||
|
||||
api_url = st.text_input(
|
||||
tr("API URL"),
|
||||
value=omnivoice_config.get("api_url", "http://127.0.0.1:7866/tts"),
|
||||
help=tr("OmniVoice API URL Help"),
|
||||
)
|
||||
|
||||
language = st.text_input(
|
||||
tr("OmniVoice Language Code"),
|
||||
value=omnivoice_config.get("language", "zh"),
|
||||
help=tr("OmniVoice Language Code Help"),
|
||||
placeholder="zh",
|
||||
)
|
||||
|
||||
mode_options = [
|
||||
("auto", tr("OmniVoice Mode Auto")),
|
||||
("voice_design", tr("OmniVoice Mode Voice Design")),
|
||||
("voice_clone", tr("OmniVoice Mode Voice Clone")),
|
||||
]
|
||||
mode_values = [item[0] for item in mode_options]
|
||||
saved_mode = omnivoice_config.get("mode", "auto")
|
||||
if saved_mode not in mode_values:
|
||||
saved_mode = "auto"
|
||||
|
||||
mode = mode_options[st.selectbox(
|
||||
tr("OmniVoice Generation Mode"),
|
||||
options=range(len(mode_options)),
|
||||
index=mode_values.index(saved_mode),
|
||||
format_func=lambda x: mode_options[x][1],
|
||||
help=tr("OmniVoice Generation Mode Help"),
|
||||
)][0]
|
||||
|
||||
instruct = omnivoice_config.get("instruct", "")
|
||||
reference_audio_source = omnivoice_config.get("reference_audio_source", "resource")
|
||||
reference_audio = omnivoice_config.get("reference_audio", "")
|
||||
ref_text = omnivoice_config.get("ref_text", "")
|
||||
|
||||
if mode == "voice_design":
|
||||
instruct = st.text_area(
|
||||
tr("OmniVoice Instruct"),
|
||||
value=instruct,
|
||||
help=tr("OmniVoice Instruct Help"),
|
||||
placeholder=tr("OmniVoice Instruct Placeholder"),
|
||||
height=80,
|
||||
)
|
||||
elif mode == "voice_clone":
|
||||
reference_audio_source, reference_audio = render_indextts_reference_audio_selector(
|
||||
tr,
|
||||
omnivoice_config,
|
||||
"omnivoice",
|
||||
)
|
||||
ref_text = st.text_area(
|
||||
tr("OmniVoice Reference Text"),
|
||||
value=ref_text,
|
||||
help=tr("OmniVoice Reference Text Help"),
|
||||
placeholder=tr("OmniVoice Reference Text Placeholder"),
|
||||
height=90,
|
||||
)
|
||||
|
||||
with st.expander(tr("Advanced Parameters"), expanded=False):
|
||||
col1, col2 = st.columns(2)
|
||||
with col1:
|
||||
num_step = st.slider(
|
||||
"Num Step",
|
||||
min_value=4,
|
||||
max_value=64,
|
||||
value=int(omnivoice_config.get("num_step", 32)),
|
||||
step=1,
|
||||
help=tr("OmniVoice Num Step Help"),
|
||||
)
|
||||
guidance_scale = st.slider(
|
||||
"Guidance Scale",
|
||||
min_value=0.1,
|
||||
max_value=10.0,
|
||||
value=float(omnivoice_config.get("guidance_scale", 2.0)),
|
||||
step=0.1,
|
||||
help=tr("OmniVoice Guidance Scale Help"),
|
||||
)
|
||||
voice_rate = st.slider(
|
||||
tr("Voice Rate"),
|
||||
min_value=0.5,
|
||||
max_value=2.0,
|
||||
value=float(omnivoice_config.get("speed", 1.0)),
|
||||
step=0.1,
|
||||
help=tr("Voice Rate Help 0.5-2.0"),
|
||||
)
|
||||
with col2:
|
||||
saved_duration = omnivoice_config.get("duration", "")
|
||||
duration_value = float(saved_duration) if saved_duration not in (None, "") else 0.0
|
||||
duration = st.number_input(
|
||||
tr("OmniVoice Duration"),
|
||||
min_value=0.0,
|
||||
max_value=120.0,
|
||||
value=duration_value,
|
||||
step=0.5,
|
||||
help=tr("OmniVoice Duration Help"),
|
||||
)
|
||||
denoise = st.checkbox(
|
||||
tr("OmniVoice Denoise"),
|
||||
value=bool(omnivoice_config.get("denoise", True)),
|
||||
help=tr("OmniVoice Denoise Help"),
|
||||
)
|
||||
postprocess_output = st.checkbox(
|
||||
tr("OmniVoice Postprocess Output"),
|
||||
value=bool(omnivoice_config.get("postprocess_output", True)),
|
||||
help=tr("OmniVoice Postprocess Output Help"),
|
||||
)
|
||||
preprocess_prompt = st.checkbox(
|
||||
tr("OmniVoice Preprocess Prompt"),
|
||||
value=bool(omnivoice_config.get("preprocess_prompt", True)),
|
||||
help=tr("OmniVoice Preprocess Prompt Help"),
|
||||
)
|
||||
|
||||
with st.expander(tr("OmniVoice Usage Instructions Title"), expanded=False):
|
||||
st.markdown(tr("OmniVoice Usage Instructions"))
|
||||
|
||||
config.omnivoice["api_url"] = api_url
|
||||
config.omnivoice["language"] = language
|
||||
config.omnivoice["mode"] = mode
|
||||
config.omnivoice["instruct"] = instruct
|
||||
config.omnivoice["reference_audio_source"] = reference_audio_source
|
||||
config.omnivoice["reference_audio"] = reference_audio
|
||||
config.omnivoice["ref_text"] = ref_text
|
||||
config.omnivoice["num_step"] = num_step
|
||||
config.omnivoice["guidance_scale"] = guidance_scale
|
||||
config.omnivoice["speed"] = voice_rate
|
||||
config.omnivoice["duration"] = duration if duration > 0 else ""
|
||||
config.omnivoice["denoise"] = denoise
|
||||
config.omnivoice["postprocess_output"] = postprocess_output
|
||||
config.omnivoice["preprocess_prompt"] = preprocess_prompt
|
||||
|
||||
if mode == "voice_clone" and reference_audio:
|
||||
config.ui["voice_name"] = f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
|
||||
else:
|
||||
config.ui["voice_name"] = f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
|
||||
st.session_state["voice_rate"] = voice_rate
|
||||
st.session_state["voice_pitch"] = 1.0
|
||||
|
||||
|
||||
def render_doubaotts_settings(tr):
|
||||
"""渲染豆包语音 TTS 设置"""
|
||||
# AK 输入
|
||||
@ -1567,6 +1742,15 @@ def render_voice_preview_new(tr, selected_engine):
|
||||
voice_name = f"{config.INDEXTTS2_VOICE_PREFIX}{reference_audio}"
|
||||
voice_rate = 1.0 # IndexTTS-2 使用自身生成参数
|
||||
voice_pitch = 1.0
|
||||
elif selected_engine == config.OMNIVOICE_ENGINE:
|
||||
mode = config.omnivoice.get("mode", "auto")
|
||||
reference_audio = config.omnivoice.get("reference_audio", "")
|
||||
if mode == "voice_clone" and reference_audio:
|
||||
voice_name = f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
|
||||
else:
|
||||
voice_name = f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
|
||||
voice_rate = config.omnivoice.get("speed", 1.0)
|
||||
voice_pitch = 1.0
|
||||
elif selected_engine == "doubaotts":
|
||||
voice_type = config.ui.get("doubaotts_voice_type", "BV700_streaming")
|
||||
voice_name = voice_type
|
||||
@ -1579,7 +1763,11 @@ def render_voice_preview_new(tr, selected_engine):
|
||||
|
||||
with st.spinner(tr("Synthesizing Voice")):
|
||||
temp_dir = utils.storage_dir("temp", create=True)
|
||||
audio_format = "audio/wav" if selected_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE) else "audio/mp3"
|
||||
audio_format = "audio/wav" if selected_engine in (
|
||||
config.INDEXTTS_ENGINE,
|
||||
config.INDEXTTS2_ENGINE,
|
||||
config.OMNIVOICE_ENGINE,
|
||||
) else "audio/mp3"
|
||||
audio_extension = ".wav" if audio_format == "audio/wav" else ".mp3"
|
||||
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}{audio_extension}")
|
||||
|
||||
|
||||
@ -260,6 +260,7 @@ def render_basic_settings(tr):
|
||||
with left_config_panel:
|
||||
render_language_settings(tr)
|
||||
render_proxy_settings(tr)
|
||||
render_tavily_search_settings(tr)
|
||||
|
||||
with middle_config_panel:
|
||||
render_vision_llm_settings(tr) # 视觉分析模型设置
|
||||
@ -345,6 +346,32 @@ def render_proxy_settings(tr):
|
||||
config.ui["jianying_draft_path"] = jianying_draft_path
|
||||
|
||||
|
||||
def render_tavily_search_settings(tr):
|
||||
"""Render Tavily API key settings used by short drama web search."""
|
||||
st.subheader(tr("Tavily Search Settings"))
|
||||
st.markdown(
|
||||
f"{tr('API Key URL')}: "
|
||||
"[https://app.tavily.com](https://app.tavily.com)"
|
||||
)
|
||||
|
||||
tavily_api_key = st.text_input(
|
||||
tr("Tavily API Key"),
|
||||
value=config.app.get("tavily_api_key", ""),
|
||||
type="password",
|
||||
help=tr("Tavily API Key Help"),
|
||||
key="tavily_api_key_input",
|
||||
)
|
||||
|
||||
if update_app_config_if_changed("tavily_api_key", str(tavily_api_key or "").strip()):
|
||||
try:
|
||||
config.save_config()
|
||||
st.session_state["tavily_api_key"] = str(tavily_api_key or "").strip()
|
||||
st.success(tr("Tavily config saved"))
|
||||
except Exception as e:
|
||||
st.error(f"{tr('Failed to save config')}: {str(e)}")
|
||||
logger.error(f"保存 Tavily 配置失败: {str(e)}")
|
||||
|
||||
|
||||
def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
"""测试视觉模型连接
|
||||
|
||||
|
||||
@ -17,7 +17,7 @@ from webui.tools.generate_script_short import generate_script_short
|
||||
from webui.tools.generate_short_summary import analyze_short_drama_plot, generate_script_short_sunmmary
|
||||
|
||||
|
||||
SCRIPT_TABLE_BASE_COLUMNS = ["_id", "timestamp", "picture", "narration", "OST"]
|
||||
SCRIPT_TABLE_BASE_COLUMNS = ["_id", "video_id", "video_name", "timestamp", "picture", "narration", "OST"]
|
||||
VIDEO_UPLOAD_TYPES = ["mp4", "mov", "avi", "flv", "mkv", "mpeg4"]
|
||||
VIDEO_GLOB_PATTERNS = [f"*.{suffix}" for suffix in VIDEO_UPLOAD_TYPES]
|
||||
|
||||
@ -99,15 +99,24 @@ def _read_subtitle_file(path):
|
||||
return f.read()
|
||||
|
||||
|
||||
def _build_combined_subtitle_content(subtitle_paths):
|
||||
def _build_combined_subtitle_content(subtitle_paths, video_paths=None):
|
||||
sections = []
|
||||
subtitle_contents = {}
|
||||
for subtitle_path in subtitle_paths:
|
||||
video_paths = _normalize_video_paths(video_paths)
|
||||
for index, subtitle_path in enumerate(subtitle_paths, start=1):
|
||||
if not subtitle_path or not os.path.exists(subtitle_path):
|
||||
continue
|
||||
content = _read_subtitle_file(subtitle_path)
|
||||
subtitle_contents[subtitle_path] = content
|
||||
sections.append(f"# {os.path.basename(subtitle_path)}\n{content}".strip())
|
||||
video_path = video_paths[index - 1] if index <= len(video_paths) else ""
|
||||
if video_path:
|
||||
header = (
|
||||
f"# 视频 {index}: {os.path.basename(video_path)}\n"
|
||||
f"字幕文件: {os.path.basename(subtitle_path)}"
|
||||
)
|
||||
else:
|
||||
header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
|
||||
sections.append(f"{header}\n{content}".strip())
|
||||
return "\n\n".join(sections), subtitle_contents
|
||||
|
||||
|
||||
@ -120,7 +129,10 @@ def _selected_subtitle_paths():
|
||||
|
||||
def _set_subtitle_state(subtitle_paths):
|
||||
subtitle_paths = _normalize_video_paths(subtitle_paths)
|
||||
subtitle_content, subtitle_contents = _build_combined_subtitle_content(subtitle_paths)
|
||||
subtitle_content, subtitle_contents = _build_combined_subtitle_content(
|
||||
subtitle_paths,
|
||||
_selected_video_paths(),
|
||||
)
|
||||
st.session_state['subtitle_path'] = subtitle_paths[0] if subtitle_paths else None
|
||||
st.session_state['subtitle_paths'] = subtitle_paths
|
||||
st.session_state['subtitle_content'] = subtitle_content if subtitle_content else None
|
||||
@ -128,6 +140,20 @@ def _set_subtitle_state(subtitle_paths):
|
||||
st.session_state['subtitle_file_processed'] = bool(subtitle_paths)
|
||||
|
||||
|
||||
def _short_drama_plot_analysis_signature(subtitle_paths, video_theme, web_search_enabled, video_paths=None):
|
||||
theme = str(video_theme or "").strip() if web_search_enabled else ""
|
||||
return json.dumps(
|
||||
{
|
||||
"subtitle_paths": _normalize_video_paths(subtitle_paths),
|
||||
"video_paths": _normalize_video_paths(video_paths),
|
||||
"video_theme": theme,
|
||||
"web_search_enabled": bool(web_search_enabled),
|
||||
},
|
||||
ensure_ascii=False,
|
||||
sort_keys=True,
|
||||
)
|
||||
|
||||
|
||||
def render_script_panel(tr):
|
||||
"""渲染脚本配置面板"""
|
||||
with st.container(border=True):
|
||||
@ -525,16 +551,71 @@ def short_drama_summary(tr):
|
||||
render_fun_asr_transcription(tr)
|
||||
render_subtitle_preview(tr)
|
||||
|
||||
current_subtitle_path = st.session_state.get('subtitle_path', '')
|
||||
plot_analysis_source = st.session_state.get('short_drama_plot_analysis_subtitle_path')
|
||||
if plot_analysis_source and plot_analysis_source != current_subtitle_path:
|
||||
st.session_state['short_drama_plot_analysis'] = ""
|
||||
st.session_state['short_drama_plot_analysis_subtitle_path'] = ""
|
||||
current_subtitle_paths = _selected_subtitle_paths()
|
||||
current_subtitle_path = current_subtitle_paths[0] if current_subtitle_paths else ''
|
||||
|
||||
name_cols = st.columns([4, 1.2], vertical_alignment="bottom")
|
||||
st.markdown(
|
||||
"""
|
||||
<style>
|
||||
.st-key-short_drama_web_search_enabled [data-testid="stMarkdownContainer"] {
|
||||
display: none;
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled [data-testid="stWidgetLabel"] {
|
||||
min-width: 0;
|
||||
transform: translateX(-1.2rem);
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled label {
|
||||
align-items: center;
|
||||
gap: 0.45rem;
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled label > div:first-child {
|
||||
width: 3rem !important;
|
||||
min-width: 3rem !important;
|
||||
height: 1.55rem !important;
|
||||
border-radius: 999px !important;
|
||||
border: 1px solid #d1d5db !important;
|
||||
background: #e5e7eb !important;
|
||||
box-shadow: inset 0 1px 2px rgba(15, 23, 42, 0.08) !important;
|
||||
transition: background 160ms ease, border-color 160ms ease, box-shadow 160ms ease !important;
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled label:hover > div:first-child {
|
||||
background: #dbe3ef !important;
|
||||
border-color: #b8c2d3 !important;
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled label:has(input[aria-checked="true"]) > div:first-child {
|
||||
border-color: transparent !important;
|
||||
background: linear-gradient(135deg, #2563eb, #14b8a6) !important;
|
||||
box-shadow: 0 6px 14px rgba(37, 99, 235, 0.22) !important;
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled label > div:first-child > div {
|
||||
width: 1.05rem !important;
|
||||
height: 1.05rem !important;
|
||||
border-radius: 999px !important;
|
||||
background: #ffffff !important;
|
||||
box-shadow: 0 2px 6px rgba(15, 23, 42, 0.24) !important;
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled button[aria-label^="Help for"] {
|
||||
color: #6b7280 !important;
|
||||
}
|
||||
.st-key-short_drama_web_search_enabled button[aria-label^="Help for"]:hover {
|
||||
color: #2563eb !important;
|
||||
}
|
||||
</style>
|
||||
""",
|
||||
unsafe_allow_html=True,
|
||||
)
|
||||
|
||||
name_cols = st.columns([3.4, 1.1, 2], vertical_alignment="bottom")
|
||||
with name_cols[0]:
|
||||
video_theme = st.text_input(tr("短剧名称"))
|
||||
with name_cols[1]:
|
||||
web_search_enabled = st.toggle(
|
||||
tr("联网搜索"),
|
||||
key="short_drama_web_search_enabled",
|
||||
help=tr("Enable Web Search Help"),
|
||||
disabled=not current_subtitle_path,
|
||||
)
|
||||
with name_cols[2]:
|
||||
analyze_plot_clicked = st.button(
|
||||
tr("剧情理解"),
|
||||
key="short_drama_plot_analysis_button",
|
||||
@ -543,17 +624,37 @@ def short_drama_summary(tr):
|
||||
)
|
||||
st.session_state['video_theme'] = video_theme
|
||||
|
||||
current_signature = _short_drama_plot_analysis_signature(
|
||||
current_subtitle_paths,
|
||||
video_theme,
|
||||
web_search_enabled,
|
||||
_selected_video_paths(),
|
||||
)
|
||||
saved_signature = st.session_state.get('short_drama_plot_analysis_signature')
|
||||
legacy_source = st.session_state.get('short_drama_plot_analysis_subtitle_path')
|
||||
if (
|
||||
(saved_signature and saved_signature != current_signature)
|
||||
or (legacy_source and legacy_source != current_subtitle_path)
|
||||
):
|
||||
st.session_state['short_drama_plot_analysis'] = ""
|
||||
st.session_state['short_drama_plot_analysis_subtitle_path'] = ""
|
||||
st.session_state['short_drama_plot_analysis_signature'] = ""
|
||||
|
||||
if analyze_plot_clicked:
|
||||
with st.spinner(tr("Analyzing plot...")):
|
||||
plot_analysis = analyze_short_drama_plot(
|
||||
current_subtitle_path,
|
||||
current_subtitle_paths,
|
||||
st.session_state.get('temperature', 0.7),
|
||||
tr,
|
||||
subtitle_content=st.session_state.get('subtitle_content', ''),
|
||||
short_name=video_theme,
|
||||
enable_web_search=web_search_enabled,
|
||||
video_paths=_selected_video_paths(),
|
||||
)
|
||||
if plot_analysis:
|
||||
st.session_state['short_drama_plot_analysis'] = plot_analysis
|
||||
st.session_state['short_drama_plot_analysis_subtitle_path'] = current_subtitle_path
|
||||
st.session_state['short_drama_plot_analysis_signature'] = current_signature
|
||||
st.success(tr("Plot analysis completed"))
|
||||
|
||||
if st.session_state.get('short_drama_plot_analysis'):
|
||||
@ -575,7 +676,10 @@ def render_subtitle_preview(tr):
|
||||
subtitle_contents = {}
|
||||
|
||||
if subtitle_paths and (not subtitle_content or not subtitle_contents):
|
||||
subtitle_content, subtitle_contents = _build_combined_subtitle_content(subtitle_paths)
|
||||
subtitle_content, subtitle_contents = _build_combined_subtitle_content(
|
||||
subtitle_paths,
|
||||
_selected_video_paths(),
|
||||
)
|
||||
st.session_state['subtitle_content'] = subtitle_content
|
||||
st.session_state['subtitle_contents'] = subtitle_contents
|
||||
|
||||
@ -724,7 +828,7 @@ def _normalize_script_table_value(column, value):
|
||||
if _is_blank_table_value(value):
|
||||
return ""
|
||||
|
||||
if column in {"_id", "OST"}:
|
||||
if column in {"_id", "video_id", "OST"}:
|
||||
try:
|
||||
return int(value)
|
||||
except (TypeError, ValueError):
|
||||
@ -783,6 +887,14 @@ def render_video_script_editor(tr):
|
||||
column_order=column_order,
|
||||
column_config={
|
||||
"_id": st.column_config.NumberColumn(tr("Script Column ID"), step=1, format="%d", width=52),
|
||||
"video_id": st.column_config.NumberColumn(
|
||||
tr("Script Column Video ID"),
|
||||
min_value=1,
|
||||
step=1,
|
||||
format="%d",
|
||||
width=80,
|
||||
),
|
||||
"video_name": st.column_config.TextColumn(tr("Script Column Video Name"), width=180),
|
||||
"timestamp": st.column_config.TextColumn(tr("Script Column Timestamp"), width=200),
|
||||
"picture": st.column_config.TextColumn(tr("Script Column Picture"), width=320),
|
||||
"narration": st.column_config.TextColumn(tr("Script Column Narration"), width=480),
|
||||
@ -1057,7 +1169,10 @@ def render_fun_asr_transcription(tr):
|
||||
st.error(tr("Fun-ASR failed without subtitle file"))
|
||||
return
|
||||
|
||||
subtitle_content, subtitle_contents = _build_combined_subtitle_content(generated_paths)
|
||||
subtitle_content, subtitle_contents = _build_combined_subtitle_content(
|
||||
generated_paths,
|
||||
media_paths,
|
||||
)
|
||||
if not subtitle_content.strip():
|
||||
clear_fun_asr_subtitle_state()
|
||||
st.error(tr("Fun-ASR failed without subtitle file"))
|
||||
@ -1112,20 +1227,35 @@ def render_script_buttons(tr, params):
|
||||
generate_script_short(tr, params, custom_clips)
|
||||
elif script_path == "summary":
|
||||
# 执行 短剧解说 脚本生成
|
||||
subtitle_path = st.session_state.get('subtitle_path')
|
||||
subtitle_paths = _selected_subtitle_paths()
|
||||
subtitle_path = subtitle_paths[0] if subtitle_paths else None
|
||||
video_theme = st.session_state.get('video_theme')
|
||||
temperature = st.session_state.get('temperature')
|
||||
web_search_enabled = bool(st.session_state.get('short_drama_web_search_enabled', False))
|
||||
current_signature = _short_drama_plot_analysis_signature(
|
||||
subtitle_paths,
|
||||
video_theme,
|
||||
web_search_enabled,
|
||||
_selected_video_paths(),
|
||||
)
|
||||
plot_analysis = ""
|
||||
if st.session_state.get('short_drama_plot_analysis_subtitle_path') == subtitle_path:
|
||||
if st.session_state.get('short_drama_plot_analysis_signature') == current_signature:
|
||||
plot_analysis = st.session_state.get('short_drama_plot_analysis', '')
|
||||
elif (
|
||||
not web_search_enabled
|
||||
and st.session_state.get('short_drama_plot_analysis_subtitle_path') == subtitle_path
|
||||
):
|
||||
plot_analysis = st.session_state.get('short_drama_plot_analysis', '')
|
||||
generate_script_short_sunmmary(
|
||||
params,
|
||||
subtitle_path,
|
||||
subtitle_paths,
|
||||
video_theme,
|
||||
temperature,
|
||||
tr,
|
||||
plot_analysis=plot_analysis,
|
||||
subtitle_content=st.session_state.get('subtitle_content', ''),
|
||||
enable_web_search=web_search_enabled,
|
||||
video_paths=_selected_video_paths(),
|
||||
)
|
||||
else:
|
||||
load_script(tr, script_path)
|
||||
@ -1172,6 +1302,8 @@ def save_script_with_validation(tr, video_clip_json_details):
|
||||
example_script = [
|
||||
{
|
||||
"_id": 1,
|
||||
"video_id": 1,
|
||||
"video_name": "1.mp4",
|
||||
"timestamp": "00:00:00,600-00:00:07,559",
|
||||
"picture": "工地上,蔡晓艳奋力救人,场面混乱",
|
||||
"narration": "灾后重建,工地上险象环生!泼辣女工蔡晓艳挺身而出,救人第一!",
|
||||
@ -1179,6 +1311,8 @@ def save_script_with_validation(tr, video_clip_json_details):
|
||||
},
|
||||
{
|
||||
"_id": 2,
|
||||
"video_id": 2,
|
||||
"video_name": "2.mp4",
|
||||
"timestamp": "00:00:08,240-00:00:12,359",
|
||||
"picture": "领导视察,蔡晓艳不屑一顾",
|
||||
"narration": "播放原片4",
|
||||
|
||||
@ -604,7 +604,7 @@ def render_font_settings(tr):
|
||||
|
||||
def is_disabled_subtitle_settings(tts_engine:str)->bool:
|
||||
"""是否禁用字幕设置"""
|
||||
return tts_engine=="soulvoice" or tts_engine=="qwen3_tts"
|
||||
return tts_engine=="soulvoice" or tts_engine=="qwen3_tts" or tts_engine==config.OMNIVOICE_ENGINE
|
||||
|
||||
def render_position_settings(tr):
|
||||
"""渲染位置设置"""
|
||||
|
||||
@ -15,6 +15,8 @@
|
||||
"Video script table help": "Edit the full script JSON as a table. You can add or delete rows; saving will validate and write the script file again.",
|
||||
"Raw JSON Preview": "Raw JSON Preview",
|
||||
"Script Column ID": "ID",
|
||||
"Script Column Video ID": "Video",
|
||||
"Script Column Video Name": "Video Name",
|
||||
"Script Column Timestamp": "Timestamp",
|
||||
"Script Column Picture": "Picture",
|
||||
"Script Column Narration": "Narration",
|
||||
@ -286,7 +288,11 @@
|
||||
"IndexTTS download link": "Download link: https://pan.quark.cn/s/0767c9bcefd5",
|
||||
"IndexTTS2 features": "A locally or privately deployed IndexTTS-2 voice-cloning engine with emotion control and fuller generation parameters.",
|
||||
"IndexTTS2 use case": "Best for fixed voices, emotional narration, and local speech synthesis workflows that need finer sampling controls. Start the IndexTTS-2 API service before use.",
|
||||
"OmniVoice features": "A locally or privately deployed OmniVoice-Pack multilingual TTS engine with automatic voice generation, voice design, and reference-audio cloning.",
|
||||
"OmniVoice use case": "Best for local controllable multilingual narration, voice design, or reference-audio cloning. Start the OmniVoice-Pack API service before use.",
|
||||
"Doubao TTS features": "Volcengine Doubao speech synthesis with multiple voices and emotions, plus fast access in mainland China.",
|
||||
"Local Deployment": "Local Deployment",
|
||||
"Cloud Service": "Cloud Service",
|
||||
"Select TTS Engine": "Select TTS Engine",
|
||||
"Select TTS Engine Help": "Choose the text-to-speech engine you want to use.",
|
||||
"TTS Engine Details": "📋 {engine} Details",
|
||||
@ -413,6 +419,16 @@
|
||||
"Subtitle calibration succeeded for multiple files": "Subtitle calibration succeeded for {count} files: {files}",
|
||||
"Subtitle calibration failed": "Subtitle calibration failed",
|
||||
"Transcribed subtitles storage hint": "Previously transcribed subtitles are saved in {path}; drag a file from that folder to upload",
|
||||
"Tavily Search Settings": "Tavily Web Search",
|
||||
"Tavily API Key": "Tavily API Key",
|
||||
"Tavily API Key Help": "Used for web search before short drama plot analysis. When Web Search is enabled, the app searches plot, character, and episode context by drama name, then combines it with subtitles.",
|
||||
"Tavily config saved": "Tavily configuration saved",
|
||||
"联网搜索": "Web Search",
|
||||
"Enable Web Search Help": "When enabled, plot analysis searches the web with Tavily by short drama name before combining those results with subtitles.",
|
||||
"Please configure Tavily API Key in Basic Settings": "Please configure the Tavily API Key in Basic Settings first",
|
||||
"Please enter short drama name before web search": "Please enter the short drama name before enabling web search",
|
||||
"Searching short drama with Tavily...": "Searching short drama context with Tavily...",
|
||||
"Tavily search failed": "Tavily search failed",
|
||||
"剧情理解": "Plot Analysis",
|
||||
"剧情理解结果": "Plot Analysis Result",
|
||||
"Analyzing plot...": "Analyzing plot...",
|
||||
@ -443,6 +459,30 @@
|
||||
"API URL": "API URL",
|
||||
"IndexTTS API URL Help": "IndexTTS-1.5 API service URL",
|
||||
"IndexTTS2 API URL Help": "IndexTTS-2 API service URL. You can enter the service root or the full /tts endpoint.",
|
||||
"OmniVoice API URL Help": "OmniVoice-Pack API service URL. You can enter the service root or the full /tts endpoint.",
|
||||
"OmniVoice Language Code": "Synthesis Language",
|
||||
"OmniVoice Language Code Help": "The language parameter sent to OmniVoice-Pack, such as zh or en.",
|
||||
"OmniVoice Generation Mode": "Generation Mode",
|
||||
"OmniVoice Generation Mode Help": "Automatic voice needs no extra fields; voice design uses an instruction; reference-audio cloning needs reference audio and matching text.",
|
||||
"OmniVoice Mode Auto": "Automatic Voice",
|
||||
"OmniVoice Mode Voice Design": "Voice Design",
|
||||
"OmniVoice Mode Voice Clone": "Reference Audio Clone",
|
||||
"OmniVoice Instruct": "Voice Instruction",
|
||||
"OmniVoice Instruct Help": "Describe the desired voice, such as gender, pitch, accent, or style.",
|
||||
"OmniVoice Instruct Placeholder": "e.g. female, low pitch, british accent",
|
||||
"OmniVoice Reference Text": "Reference Audio Text",
|
||||
"OmniVoice Reference Text Help": "The exact transcript of the reference audio. Required when the deployed service has ASR disabled.",
|
||||
"OmniVoice Reference Text Placeholder": "Enter the text spoken in the reference audio",
|
||||
"OmniVoice Num Step Help": "Diffusion generation steps. Higher values usually improve quality but slow generation.",
|
||||
"OmniVoice Guidance Scale Help": "Controls how strongly text conditions guide generation.",
|
||||
"OmniVoice Duration": "Target Duration (seconds)",
|
||||
"OmniVoice Duration Help": "0 lets the model decide the duration automatically.",
|
||||
"OmniVoice Denoise": "Enable Denoise",
|
||||
"OmniVoice Denoise Help": "Ask OmniVoice-Pack to denoise the generated output.",
|
||||
"OmniVoice Postprocess Output": "Postprocess Output",
|
||||
"OmniVoice Postprocess Output Help": "Enable OmniVoice-Pack output post-processing.",
|
||||
"OmniVoice Preprocess Prompt": "Preprocess Text",
|
||||
"OmniVoice Preprocess Prompt Help": "Enable OmniVoice-Pack text preprocessing.",
|
||||
"Reference Audio Source": "Reference Audio Source",
|
||||
"Reference Audio Source Help": "Choose a reference audio from the resource directory or upload a new one.",
|
||||
"Select from Resource Directory": "Select from Resource Directory",
|
||||
@ -502,6 +542,8 @@
|
||||
"Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.",
|
||||
"IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 Usage Instructions",
|
||||
"IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments",
|
||||
"OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions",
|
||||
"OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
|
||||
"Volcengine Access Key Help": "Volcengine Access Key",
|
||||
"Volcengine Secret Key Help": "Volcengine Secret Key",
|
||||
"Doubao AppID Help": "Doubao TTS application AppID",
|
||||
|
||||
@ -159,6 +159,8 @@
|
||||
"Video script table help": "在表格中编辑完整脚本 JSON。可新增、删除行;保存时会重新校验并写入脚本文件。",
|
||||
"Raw JSON Preview": "原始 JSON 预览",
|
||||
"Script Column ID": "序号",
|
||||
"Script Column Video ID": "视频",
|
||||
"Script Column Video Name": "视频文件",
|
||||
"Script Column Timestamp": "时间戳",
|
||||
"Script Column Picture": "画面描述",
|
||||
"Script Column Narration": "解说台词",
|
||||
@ -267,7 +269,11 @@
|
||||
"IndexTTS download link": "下载地址:https://pan.quark.cn/s/0767c9bcefd5",
|
||||
"IndexTTS2 features": "本地/私有部署的 IndexTTS-2 语音克隆引擎,支持情感控制和更完整的生成参数。",
|
||||
"IndexTTS2 use case": "适合需要固定音色、情绪化旁白或更细致采样控制的本地语音合成场景。使用前请先启动 IndexTTS-2 API 服务。",
|
||||
"OmniVoice features": "本地/私有部署的 OmniVoice-Pack 多语种语音合成引擎,支持自动音色、指令音色和参考音频克隆。",
|
||||
"OmniVoice use case": "适合需要本地可控、多语言旁白、音色设计或参考音频克隆的场景。使用前请先启动 OmniVoice-Pack API 服务。",
|
||||
"Doubao TTS features": "火山引擎豆包语音合成,支持多种音色和情感,国内访问速度快",
|
||||
"Local Deployment": "本地部署",
|
||||
"Cloud Service": "云端服务",
|
||||
"Select TTS Engine": "选择 TTS 引擎",
|
||||
"Select TTS Engine Help": "选择您要使用的文本转语音引擎",
|
||||
"TTS Engine Details": "📋 {engine} 详细说明",
|
||||
@ -395,6 +401,16 @@
|
||||
"Subtitle calibration succeeded for multiple files": "字幕校准成功,共 {count} 个文件: {files}",
|
||||
"Subtitle calibration failed": "字幕校准失败",
|
||||
"Transcribed subtitles storage hint": "之前转录生成的字幕保存在 {path},可从该目录拖入上传",
|
||||
"Tavily Search Settings": "Tavily 联网搜索",
|
||||
"Tavily API Key": "Tavily API Key",
|
||||
"Tavily API Key Help": "用于短剧剧情理解前的联网检索。开启“联网搜索”后,会先按短剧名称检索剧情、人物和分集信息,再结合字幕分析。",
|
||||
"Tavily config saved": "Tavily 配置已保存",
|
||||
"联网搜索": "联网搜索",
|
||||
"Enable Web Search Help": "开启后,剧情理解会先使用 Tavily 按短剧名称联网检索,再结合检索结果和字幕分析剧情。",
|
||||
"Please configure Tavily API Key in Basic Settings": "请先在基础设置中配置 Tavily API Key",
|
||||
"Please enter short drama name before web search": "开启联网搜索前,请先填写短剧名称",
|
||||
"Searching short drama with Tavily...": "正在使用 Tavily 检索短剧信息...",
|
||||
"Tavily search failed": "Tavily 检索失败",
|
||||
"剧情理解": "剧情理解",
|
||||
"剧情理解结果": "剧情理解结果",
|
||||
"Analyzing plot...": "正在理解剧情...",
|
||||
@ -425,6 +441,30 @@
|
||||
"API URL": "API 地址",
|
||||
"IndexTTS API URL Help": "IndexTTS-1.5 API 服务地址",
|
||||
"IndexTTS2 API URL Help": "IndexTTS-2 API 服务地址,可填写服务根地址或完整 /tts 地址",
|
||||
"OmniVoice API URL Help": "OmniVoice-Pack API 服务地址,可填写服务根地址或完整 /tts 地址",
|
||||
"OmniVoice Language Code": "合成语言",
|
||||
"OmniVoice Language Code Help": "传给 OmniVoice-Pack 的 language 参数,例如 zh、en。",
|
||||
"OmniVoice Generation Mode": "生成模式",
|
||||
"OmniVoice Generation Mode Help": "自动音色无需额外参数;指令音色使用描述词;参考音频克隆需要参考音频和对应文本。",
|
||||
"OmniVoice Mode Auto": "自动音色",
|
||||
"OmniVoice Mode Voice Design": "指令音色",
|
||||
"OmniVoice Mode Voice Clone": "参考音频克隆",
|
||||
"OmniVoice Instruct": "音色指令",
|
||||
"OmniVoice Instruct Help": "描述希望生成的音色,例如性别、音高、口音或风格。",
|
||||
"OmniVoice Instruct Placeholder": "例如:female, low pitch, british accent",
|
||||
"OmniVoice Reference Text": "参考音频文本",
|
||||
"OmniVoice Reference Text Help": "参考音频对应的逐字文本;当前部署未启用 ASR 时必须填写。",
|
||||
"OmniVoice Reference Text Placeholder": "请输入参考音频中实际朗读的内容",
|
||||
"OmniVoice Num Step Help": "扩散生成步数,值越大通常质量更高但速度更慢。",
|
||||
"OmniVoice Guidance Scale Help": "控制文本条件的引导强度。",
|
||||
"OmniVoice Duration": "目标时长(秒)",
|
||||
"OmniVoice Duration Help": "0 表示由模型自动决定时长。",
|
||||
"OmniVoice Denoise": "启用降噪",
|
||||
"OmniVoice Denoise Help": "让 OmniVoice-Pack 对生成结果执行降噪处理。",
|
||||
"OmniVoice Postprocess Output": "后处理输出",
|
||||
"OmniVoice Postprocess Output Help": "启用 OmniVoice-Pack 的输出后处理。",
|
||||
"OmniVoice Preprocess Prompt": "预处理文本",
|
||||
"OmniVoice Preprocess Prompt Help": "启用 OmniVoice-Pack 的文本预处理。",
|
||||
"Reference Audio Source": "参考音频来源",
|
||||
"Reference Audio Source Help": "选择从资源目录选择参考音频,或上传新的参考音频",
|
||||
"Select from Resource Directory": "从资源目录选择",
|
||||
@ -484,6 +524,8 @@
|
||||
"Max Mel Tokens Help": "控制单次生成的最大 mel token 数,值越大可生成更长音频",
|
||||
"IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 使用说明",
|
||||
"IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**:复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**:例如 http://192.168.3.6:7863/tts,也可以填写服务根地址\n3. **调整情感参数**:默认使用 speaker,可按需切换到 audio、vector 或 text\n4. **调整生成参数**:temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**:\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型,耗时更长\n- CPU 部署生成速度会明显慢于 GPU",
|
||||
"OmniVoice Usage Instructions Title": "OmniVoice 使用说明",
|
||||
"OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**:只需要设置 API 地址和语言,可直接合成。\n2. **指令音色**:填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**:上传或选择参考音频,并填写该音频对应文本。\n\n**注意事项**:\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频,系统会按音频时长估算字幕段落",
|
||||
"Volcengine Access Key Help": "火山引擎 Access Key",
|
||||
"Volcengine Secret Key Help": "火山引擎 Secret Key",
|
||||
"Doubao AppID Help": "豆包语音应用 AppID",
|
||||
|
||||
@ -17,12 +17,101 @@ from loguru import logger
|
||||
from app.config import config
|
||||
from app.services.SDE.short_drama_explanation import analyze_subtitle, generate_narration_script
|
||||
from app.services.subtitle_text import read_subtitle_text
|
||||
from app.services.tavily_search import TavilySearchError, format_search_context, search_short_drama
|
||||
# 导入新的LLM服务模块 - 确保提供商被注册
|
||||
import app.services.llm # 这会触发提供商注册
|
||||
from app.services.llm.migration_adapter import SubtitleAnalyzerAdapter
|
||||
import re
|
||||
|
||||
|
||||
def _normalize_paths(paths):
|
||||
if isinstance(paths, str):
|
||||
paths = [paths]
|
||||
if not paths:
|
||||
return []
|
||||
|
||||
normalized_paths = []
|
||||
seen = set()
|
||||
for path in paths:
|
||||
if not isinstance(path, str):
|
||||
continue
|
||||
path = path.strip()
|
||||
if not path or path in seen:
|
||||
continue
|
||||
normalized_paths.append(path)
|
||||
seen.add(path)
|
||||
return normalized_paths
|
||||
|
||||
|
||||
def _build_combined_subtitle_content(subtitle_paths, video_paths=None):
|
||||
sections = []
|
||||
video_paths = _normalize_paths(video_paths)
|
||||
for index, subtitle_path in enumerate(_normalize_paths(subtitle_paths), start=1):
|
||||
if not os.path.exists(subtitle_path):
|
||||
continue
|
||||
|
||||
video_path = video_paths[index - 1] if index <= len(video_paths) else ""
|
||||
if video_path:
|
||||
header = (
|
||||
f"# 视频 {index}: {os.path.basename(video_path)}\n"
|
||||
f"字幕文件: {os.path.basename(subtitle_path)}"
|
||||
)
|
||||
else:
|
||||
header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
|
||||
sections.append(f"{header}\n{read_subtitle_text(subtitle_path).text}".strip())
|
||||
|
||||
return "\n\n".join(sections)
|
||||
|
||||
|
||||
def _coerce_video_id(value):
|
||||
try:
|
||||
video_id = int(value)
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
return video_id if video_id > 0 else None
|
||||
|
||||
|
||||
def _match_video_id_by_name(video_name, video_paths):
|
||||
video_name = str(video_name or "").strip()
|
||||
if not video_name:
|
||||
return None
|
||||
|
||||
for index, video_path in enumerate(video_paths, start=1):
|
||||
if os.path.basename(video_path) == os.path.basename(video_name):
|
||||
return index
|
||||
return None
|
||||
|
||||
|
||||
def _normalize_narration_items_video_sources(items, video_paths):
|
||||
video_paths = _normalize_paths(video_paths)
|
||||
if not video_paths:
|
||||
return items
|
||||
|
||||
normalized_items = []
|
||||
for item in items:
|
||||
if not isinstance(item, dict):
|
||||
normalized_items.append(item)
|
||||
continue
|
||||
|
||||
item_copy = item.copy()
|
||||
video_id = _coerce_video_id(item_copy.get("video_id") or item_copy.get("video_index"))
|
||||
matched_video_id = _match_video_id_by_name(
|
||||
item_copy.get("video_name") or item_copy.get("source_video"),
|
||||
video_paths,
|
||||
)
|
||||
if matched_video_id:
|
||||
video_id = matched_video_id
|
||||
if video_id is None or video_id > len(video_paths):
|
||||
logger.warning(f"片段 {item_copy.get('_id')} 未提供有效 video_id,默认使用视频 1")
|
||||
video_id = 1
|
||||
|
||||
item_copy["video_id"] = video_id
|
||||
item_copy["video_name"] = os.path.basename(video_paths[video_id - 1])
|
||||
normalized_items.append(item_copy)
|
||||
|
||||
return normalized_items
|
||||
|
||||
|
||||
def parse_and_fix_json(json_string):
|
||||
"""
|
||||
解析并修复JSON字符串
|
||||
@ -135,12 +224,83 @@ def parse_and_fix_json(json_string):
|
||||
return None
|
||||
|
||||
|
||||
def analyze_short_drama_plot(subtitle_path, temperature, tr=lambda key: key, subtitle_content=None):
|
||||
def _get_tavily_api_key() -> str:
|
||||
return (
|
||||
st.session_state.get("tavily_api_key")
|
||||
or config.app.get("tavily_api_key")
|
||||
or ""
|
||||
).strip()
|
||||
|
||||
|
||||
def _build_tavily_context(short_name: str, tr=lambda key: key) -> str | None:
|
||||
short_name = str(short_name or "").strip()
|
||||
if not short_name:
|
||||
st.error(tr("Please enter short drama name before web search"))
|
||||
return None
|
||||
|
||||
api_key = _get_tavily_api_key()
|
||||
if not api_key:
|
||||
st.error(tr("Please configure Tavily API Key in Basic Settings"))
|
||||
return None
|
||||
|
||||
try:
|
||||
search_data = search_short_drama(
|
||||
short_name,
|
||||
api_key,
|
||||
search_depth=config.app.get("tavily_search_depth", "basic"),
|
||||
max_results=config.app.get("tavily_max_results", 5),
|
||||
)
|
||||
return format_search_context(search_data)
|
||||
except TavilySearchError as e:
|
||||
logger.error(f"Tavily 短剧检索失败: {str(e)}")
|
||||
st.error(f"{tr('Tavily search failed')}: {str(e)}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Tavily 短剧检索异常: {traceback.format_exc()}")
|
||||
st.error(f"{tr('Tavily search failed')}: {str(e)}")
|
||||
return None
|
||||
|
||||
|
||||
def _build_plot_analysis_input(
|
||||
subtitle_content: str,
|
||||
short_name: str = "",
|
||||
enable_web_search: bool = False,
|
||||
tr=lambda key: key,
|
||||
) -> str | None:
|
||||
subtitle_content = str(subtitle_content or "").strip()
|
||||
if not enable_web_search:
|
||||
return subtitle_content
|
||||
|
||||
tavily_context = _build_tavily_context(short_name, tr)
|
||||
if tavily_context is None:
|
||||
return None
|
||||
|
||||
return f"""# 分析补充说明
|
||||
请先参考 Tavily 联网检索结果理解短剧名称、人物关系、剧情背景和公开剧情梗概,再结合原始字幕完成剧情理解。
|
||||
如果联网检索结果与字幕内容冲突,请以字幕内容为准;时间戳必须只从字幕内容中提取。
|
||||
|
||||
{tavily_context}
|
||||
|
||||
# 原始字幕
|
||||
{subtitle_content}"""
|
||||
|
||||
|
||||
def analyze_short_drama_plot(
|
||||
subtitle_path,
|
||||
temperature,
|
||||
tr=lambda key: key,
|
||||
subtitle_content=None,
|
||||
short_name: str = "",
|
||||
enable_web_search: bool = False,
|
||||
video_paths=None,
|
||||
):
|
||||
"""仅执行短剧字幕剧情理解,返回可编辑的剧情分析文本。"""
|
||||
if not subtitle_path:
|
||||
subtitle_paths = _normalize_paths(subtitle_path)
|
||||
if not subtitle_paths:
|
||||
st.error(tr("Please generate or upload subtitles first"))
|
||||
return None
|
||||
if not os.path.exists(subtitle_path):
|
||||
missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
|
||||
if missing_subtitle_paths:
|
||||
st.error(tr("Subtitle file does not exist"))
|
||||
return None
|
||||
|
||||
@ -149,19 +309,31 @@ def analyze_short_drama_plot(subtitle_path, temperature, tr=lambda key: key, sub
|
||||
text_model = config.app.get(f'text_{text_provider}_model_name')
|
||||
text_base_url = config.app.get(f'text_{text_provider}_base_url')
|
||||
|
||||
subtitle_content = str(subtitle_content or "").strip() or read_subtitle_text(subtitle_path).text
|
||||
subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
|
||||
subtitle_paths,
|
||||
video_paths,
|
||||
)
|
||||
if not subtitle_content:
|
||||
st.error(tr("Subtitle file is empty or unreadable"))
|
||||
return None
|
||||
|
||||
plot_analysis_input = _build_plot_analysis_input(
|
||||
subtitle_content,
|
||||
short_name=short_name,
|
||||
enable_web_search=enable_web_search,
|
||||
tr=tr,
|
||||
)
|
||||
if plot_analysis_input is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
logger.info("使用新的LLM服务架构进行字幕分析")
|
||||
analyzer = SubtitleAnalyzerAdapter(text_api_key, text_model, text_base_url, text_provider)
|
||||
analysis_result = analyzer.analyze_subtitle(subtitle_content)
|
||||
analysis_result = analyzer.analyze_subtitle(plot_analysis_input)
|
||||
except Exception as e:
|
||||
logger.warning(f"使用新LLM服务失败,回退到旧实现: {str(e)}")
|
||||
analysis_result = analyze_subtitle(
|
||||
subtitle_content=subtitle_content,
|
||||
subtitle_content=plot_analysis_input,
|
||||
api_key=text_api_key,
|
||||
model=text_model,
|
||||
base_url=text_base_url,
|
||||
@ -186,6 +358,8 @@ def generate_script_short_sunmmary(
|
||||
tr=lambda key: key,
|
||||
plot_analysis=None,
|
||||
subtitle_content=None,
|
||||
enable_web_search: bool = False,
|
||||
video_paths=None,
|
||||
):
|
||||
"""
|
||||
生成 短剧解说 视频脚本
|
||||
@ -204,7 +378,12 @@ def generate_script_short_sunmmary(
|
||||
|
||||
try:
|
||||
with st.spinner(tr("Generating script...")):
|
||||
if not params.video_origin_path:
|
||||
selected_video_paths = _normalize_paths(
|
||||
video_paths
|
||||
or getattr(params, "video_origin_paths", [])
|
||||
or getattr(params, "video_origin_path", "")
|
||||
)
|
||||
if not selected_video_paths:
|
||||
st.error(tr("Please select video file first"))
|
||||
return
|
||||
"""
|
||||
@ -212,7 +391,9 @@ def generate_script_short_sunmmary(
|
||||
"""
|
||||
update_progress(30, tr("Parsing subtitles..."))
|
||||
# 判断字幕文件是否存在
|
||||
if not os.path.exists(subtitle_path):
|
||||
subtitle_paths = _normalize_paths(subtitle_path)
|
||||
missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
|
||||
if not subtitle_paths or missing_subtitle_paths:
|
||||
st.error(tr("Subtitle file does not exist"))
|
||||
return
|
||||
|
||||
@ -225,7 +406,10 @@ def generate_script_short_sunmmary(
|
||||
text_base_url = config.app.get(f'text_{text_provider}_base_url')
|
||||
|
||||
# 读取字幕文件内容(无论使用哪种实现都需要)
|
||||
subtitle_content = str(subtitle_content or "").strip() or read_subtitle_text(subtitle_path).text
|
||||
subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
|
||||
subtitle_paths,
|
||||
selected_video_paths,
|
||||
)
|
||||
if not subtitle_content:
|
||||
st.error(tr("Subtitle file is empty or unreadable"))
|
||||
return
|
||||
@ -238,16 +422,27 @@ def generate_script_short_sunmmary(
|
||||
"analysis": str(plot_analysis).strip(),
|
||||
}
|
||||
else:
|
||||
plot_analysis_input = subtitle_content
|
||||
if enable_web_search:
|
||||
update_progress(40, tr("Searching short drama with Tavily..."))
|
||||
plot_analysis_input = _build_plot_analysis_input(
|
||||
subtitle_content,
|
||||
short_name=video_theme,
|
||||
enable_web_search=True,
|
||||
tr=tr,
|
||||
)
|
||||
if plot_analysis_input is None:
|
||||
return
|
||||
try:
|
||||
# 优先使用新的LLM服务架构
|
||||
logger.info("使用新的LLM服务架构进行字幕分析")
|
||||
analysis_result = analyzer.analyze_subtitle(subtitle_content)
|
||||
analysis_result = analyzer.analyze_subtitle(plot_analysis_input)
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"使用新LLM服务失败,回退到旧实现: {str(e)}")
|
||||
# 回退到旧的实现
|
||||
analysis_result = analyze_subtitle(
|
||||
subtitle_content=subtitle_content,
|
||||
subtitle_content=plot_analysis_input,
|
||||
api_key=text_api_key,
|
||||
model=text_model,
|
||||
base_url=text_base_url,
|
||||
@ -320,7 +515,11 @@ def generate_script_short_sunmmary(
|
||||
logger.error(f"JSON结构错误,缺少items字段: {narration_dict}")
|
||||
st.stop()
|
||||
|
||||
script = json.dumps(narration_dict['items'], ensure_ascii=False, indent=2)
|
||||
narration_items = _normalize_narration_items_video_sources(
|
||||
narration_dict['items'],
|
||||
selected_video_paths,
|
||||
)
|
||||
script = json.dumps(narration_items, ensure_ascii=False, indent=2)
|
||||
|
||||
if script is None:
|
||||
st.error(tr("Script generation failed check logs"))
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user