viccy 7d4bd45f69 feat: 新增视频生成进度追踪与WebUI展示,优化剪映导出功能
- 添加FFmpeg视频合并进度回调支持,实现实时进度上报
- 为进度回调添加参数校验与异常捕获,避免回调失败影响主流程
- 重构任务状态更新逻辑,统一封装任务更新函数减少重复代码
- 重新划分视频生成的6个标准步骤,更新各阶段的状态上报逻辑
- 更新WebUI界面,支持展示当前生成步骤、总步骤与FFmpeg实时进度
- 优化剪映草稿导出功能,不再预先裁剪原视频,直接引用源素材时间戳
- 新增剪映草稿字幕生成功能,并补充对应的单元测试用例
2026-06-08 16:02:20 +08:00

1017 lines
39 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import math
import json
import os.path
import re
import traceback
from os import path
from loguru import logger
from app.config import config
from app.config.audio_config import AudioConfig, get_recommended_volumes_for_content
from app.models import const
from app.models.schema import VideoClipParams
from app.services import (
voice,
audio_merger,
subtitle_merger,
clip_video,
merger_video,
update_script,
generate_video,
script_subtitle,
)
from app.services import state as sm
from app.utils import utils
VIDEO_GENERATION_TOTAL_STEPS = 6
def _update_video_generation_task(
task_id: str,
progress: int,
message: str,
step_current: int = 0,
ffmpeg_progress: float | None = None,
state: int = const.TASK_STATE_PROCESSING,
**kwargs,
) -> None:
task_fields = {
"message": message,
"step_current": step_current,
"step_total": VIDEO_GENERATION_TOTAL_STEPS,
**kwargs,
}
if ffmpeg_progress is not None:
task_fields["ffmpeg_progress"] = round(
max(0.0, min(100.0, float(ffmpeg_progress))),
1,
)
sm.state.update_task(
task_id,
state=state,
progress=progress,
**task_fields,
)
def _is_auto_transcription_enabled(params: VideoClipParams) -> bool:
return bool(
getattr(params, "subtitle_enabled", True)
and getattr(params, "subtitle_auto_transcribe_enabled", False)
)
def _get_auto_transcription_backend(params: VideoClipParams) -> str:
backend = str(getattr(params, "subtitle_auto_transcribe_backend", "") or "").strip().lower()
if backend not in {"local", "firered", "bailian"}:
backend = "local"
return backend
def _get_original_subtitle_paths(params: VideoClipParams) -> list[str]:
subtitle_paths = getattr(params, "original_subtitle_paths", []) or []
if isinstance(subtitle_paths, str):
subtitle_paths = [subtitle_paths]
normalized_paths = []
seen = set()
for subtitle_path in subtitle_paths:
if not isinstance(subtitle_path, str):
continue
subtitle_path = subtitle_path.strip()
if subtitle_path and subtitle_path not in seen:
normalized_paths.append(subtitle_path)
seen.add(subtitle_path)
single_subtitle_path = str(getattr(params, "original_subtitle_path", "") or "").strip()
if single_subtitle_path and single_subtitle_path not in seen:
normalized_paths.insert(0, single_subtitle_path)
if not normalized_paths:
normalized_paths = _find_original_subtitle_paths_for_videos(_get_video_origin_paths(params))
return normalized_paths
def _get_video_origin_paths(params: VideoClipParams) -> list[str]:
video_paths = getattr(params, "video_origin_paths", []) or []
if isinstance(video_paths, str):
video_paths = [video_paths]
normalized_paths = []
seen = set()
for video_path in video_paths:
if not isinstance(video_path, str):
continue
video_path = video_path.strip()
if video_path and video_path not in seen:
normalized_paths.append(video_path)
seen.add(video_path)
single_video_path = str(getattr(params, "video_origin_path", "") or "").strip()
if single_video_path and single_video_path not in seen:
normalized_paths.insert(0, single_video_path)
return normalized_paths
def _video_stem_candidates(video_path: str) -> list[str]:
stem = path.splitext(path.basename(str(video_path or "").strip()))[0]
if not stem:
return []
candidates = [stem]
timestamp_stripped = re.sub(r"_[0-9]{14}$", "", stem)
if timestamp_stripped and timestamp_stripped not in candidates:
candidates.append(timestamp_stripped)
return candidates
def _find_original_subtitle_paths_for_videos(video_paths: list[str]) -> list[str]:
subtitle_dir = utils.subtitle_dir()
if not path.isdir(subtitle_dir):
return []
subtitle_files = [
path.join(subtitle_dir, filename)
for filename in os.listdir(subtitle_dir)
if filename.lower().endswith(".srt")
]
if not subtitle_files:
return []
resolved_paths = []
seen = set()
for video_path in video_paths:
candidates = _video_stem_candidates(video_path)
if not candidates:
continue
matches = []
for subtitle_path in subtitle_files:
subtitle_stem = path.splitext(path.basename(subtitle_path))[0]
for candidate in candidates:
if subtitle_stem == candidate or subtitle_stem.startswith(f"{candidate}_"):
matches.append(subtitle_path)
break
if not matches:
continue
matches.sort(key=lambda item: path.getmtime(item), reverse=True)
selected_path = matches[0]
if selected_path not in seen:
resolved_paths.append(selected_path)
seen.add(selected_path)
if resolved_paths:
logger.info(f"未从参数获取原片字幕,已按视频文件名自动匹配: {resolved_paths}")
return resolved_paths
def _create_programmatic_subtitle_file(
task_id: str,
list_script: list[dict],
params: VideoClipParams,
) -> str:
if not getattr(params, "subtitle_enabled", True):
return ""
original_subtitle_paths = _get_original_subtitle_paths(params)
logger.info(f"程序化字幕使用原片字幕路径: {original_subtitle_paths or '未提供'}")
return script_subtitle.create_script_subtitle_file(
task_id=task_id,
list_script=list_script,
original_subtitle_paths=original_subtitle_paths,
video_origin_paths=_get_video_origin_paths(params),
)
def _build_subtitle_mask_options(params: VideoClipParams, enabled=None) -> dict:
mask_configured = bool(
getattr(params, "subtitle_enabled", True)
and getattr(params, "subtitle_mask_enabled", False)
)
mask_enabled = mask_configured if enabled is None else mask_configured and enabled
return {
'subtitle_mask_enabled': mask_enabled,
'subtitle_mask_landscape_x_percent': getattr(params, "subtitle_mask_landscape_x_percent", 10.0),
'subtitle_mask_landscape_y_percent': getattr(params, "subtitle_mask_landscape_y_percent", 78.0),
'subtitle_mask_landscape_width_percent': getattr(params, "subtitle_mask_landscape_width_percent", 80.0),
'subtitle_mask_landscape_height_percent': getattr(params, "subtitle_mask_landscape_height_percent", 14.0),
'subtitle_mask_landscape_blur_radius': getattr(params, "subtitle_mask_landscape_blur_radius", 18),
'subtitle_mask_landscape_opacity_percent': getattr(params, "subtitle_mask_landscape_opacity_percent", 82),
'subtitle_mask_portrait_x_percent': getattr(params, "subtitle_mask_portrait_x_percent", 8.0),
'subtitle_mask_portrait_y_percent': getattr(params, "subtitle_mask_portrait_y_percent", 79.0),
'subtitle_mask_portrait_width_percent': getattr(params, "subtitle_mask_portrait_width_percent", 84.0),
'subtitle_mask_portrait_height_percent': getattr(params, "subtitle_mask_portrait_height_percent", 16.0),
'subtitle_mask_portrait_blur_radius': getattr(params, "subtitle_mask_portrait_blur_radius", 26),
'subtitle_mask_portrait_opacity_percent': getattr(params, "subtitle_mask_portrait_opacity_percent", 84),
'subtitle_position_landscape_y_percent': getattr(params, "subtitle_position_landscape_y_percent", 85.0),
'subtitle_position_portrait_y_percent': getattr(params, "subtitle_position_portrait_y_percent", 82.0),
}
def _transcribe_final_video(task_id: str, video_path: str, params: VideoClipParams) -> str:
"""Transcribe the fully merged video into an SRT file."""
from app.services import fun_asr_subtitle
if not video_path or not path.exists(video_path):
raise FileNotFoundError(f"自动转录视频不存在: {video_path}")
backend = _get_auto_transcription_backend(params)
subtitle_file = path.join(utils.task_dir(task_id), "auto_transcribed_final.srt")
logger.info(f"开始自动转录最终视频: {video_path}, backend={backend}")
if backend == "local":
api_url = str(
getattr(params, "subtitle_auto_transcribe_api_url", "")
or config.fun_asr.get("api_url", fun_asr_subtitle.LOCAL_FUN_ASR_API_URL)
).strip()
if not api_url:
raise ValueError("请先输入本地 FunASR-Pack API 地址")
generated_path = fun_asr_subtitle.create_with_local_fun_asr(
local_file=video_path,
subtitle_file=subtitle_file,
api_url=api_url,
hotword=str(getattr(params, "subtitle_auto_transcribe_hotword", "") or "").strip(),
enable_spk=bool(getattr(params, "subtitle_auto_transcribe_enable_spk", False)),
)
elif backend == "firered":
api_url = str(
getattr(params, "subtitle_auto_transcribe_firered_api_url", "")
or config.fun_asr.get("firered_api_url", fun_asr_subtitle.LOCAL_FIRERED_ASR_API_URL)
).strip()
if not api_url:
raise ValueError("请先输入本地ASR API 地址")
generated_path = fun_asr_subtitle.create_with_local_firered_asr(
local_file=video_path,
subtitle_file=subtitle_file,
api_url=api_url,
)
else:
api_key = str(
getattr(params, "subtitle_auto_transcribe_api_key", "")
or config.fun_asr.get("api_key", "")
).strip()
if not api_key:
raise ValueError("请先输入阿里百炼 API Key")
generated_path = fun_asr_subtitle.create_with_fun_asr(
local_file=video_path,
subtitle_file=subtitle_file,
api_key=api_key,
)
if not generated_path or not path.exists(generated_path):
raise RuntimeError("自动转录失败:未生成字幕文件")
logger.info(f"自动转录字幕生成成功: {generated_path}")
return generated_path
def _merge_auto_transcribed_subtitles(
source_video_path: str,
output_video_path: str,
subtitle_path: str,
params: VideoClipParams,
) -> str:
subtitle_options = {
'voice_volume': 1.0,
'bgm_volume': 0.0,
'original_audio_volume': 1.0,
'keep_original_audio': True,
'subtitle_enabled': True,
'subtitle_font': params.font_name,
'subtitle_font_size': params.font_size,
'subtitle_color': params.text_fore_color,
'subtitle_bg_color': None,
'subtitle_position': params.subtitle_position,
'custom_position': params.custom_position,
'threads': params.n_threads,
**_build_subtitle_mask_options(params, enabled=True),
}
return generate_video.merge_materials(
video_path=source_video_path,
audio_path="",
subtitle_path=subtitle_path,
bgm_path="",
output_path=output_video_path,
options=subtitle_options
)
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict = None):
"""
后台任务(统一视频裁剪处理)- 优化版本
实施基于OST类型的统一视频裁剪策略消除双重裁剪问题
- OST=0: 根据TTS音频时长动态裁剪移除原声
- OST=1: 严格按照脚本timestamp精确裁剪保持原声
- OST=2: 根据TTS音频时长动态裁剪保持原声
Args:
task_id: 任务ID
params: 视频参数
subclip_path_videos: 视频片段路径(可选,仅作为备用方案)
"""
global merged_audio_path, merged_subtitle_path
logger.info(f"\n\n## 开始任务: {task_id}")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
"""
1. 加载剪辑脚本
"""
logger.info("\n\n## 1. 加载视频脚本")
video_script_path = path.join(params.video_clip_json_path)
if path.exists(video_script_path):
try:
with open(video_script_path, "r", encoding="utf-8") as f:
list_script = json.load(f)
video_list = [i['narration'] for i in list_script]
video_ost = [i['OST'] for i in list_script]
time_list = [i['timestamp'] for i in list_script]
video_script = " ".join(video_list)
logger.debug(f"解说完整脚本: \n{video_script}")
logger.debug(f"解说 OST 列表: \n{video_ost}")
logger.debug(f"解说时间戳列表: \n{time_list}")
except Exception as e:
logger.error(f"无法读取视频json脚本请检查脚本格式是否正确")
raise ValueError("无法读取视频json脚本请检查脚本格式是否正确")
else:
logger.error(f"解说脚本文件不存在: {video_script_path},请先点击【保存脚本】按钮保存脚本后再生成视频")
raise ValueError("解说脚本文件不存在!请先点击【保存脚本】按钮保存脚本后再生成视频。")
"""
2. 使用 TTS 生成音频素材
"""
logger.info("\n\n## 2. 根据OST设置生成音频列表")
# 只为OST=0 or 2的判断生成音频 OST=0 仅保留解说 OST=2 保留解说和原声
tts_segments = [
segment for segment in list_script
if segment['OST'] in [0, 2]
]
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
tts_results = voice.tts_multiple(
task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段
tts_engine=params.tts_engine,
voice_name=params.voice_name,
voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch,
)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
# """
# 3. (可选) 使用 whisper 生成字幕
# """
# if merged_subtitle_path is None:
# if audio_files:
# merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
# subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
# logger.info(f"\n\n使用 {subtitle_provider} 生成字幕")
#
# subtitle.create(
# audio_file=merged_audio_path,
# subtitle_file=merged_subtitle_path,
# )
# subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path)
# if not subtitle_lines:
# logger.warning(f"字幕文件无效: {merged_subtitle_path}")
#
# sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
"""
3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
"""
logger.info("\n\n## 3. 统一视频裁剪基于OST类型")
# 使用新的统一裁剪策略
video_clip_result = clip_video.clip_video_unified(
video_origin_path=params.video_origin_path,
video_origin_paths=getattr(params, "video_origin_paths", []),
script_list=list_script,
tts_results=tts_results
)
# 更新 list_script 中的时间戳和路径信息
tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
subclip_clip_result = {
tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
}
new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
"""
4. 合并音频和字幕
"""
logger.info("\n\n## 4. 合并音频和字幕")
total_duration = sum([script["duration"] for script in new_script_list])
if tts_segments:
try:
# 合并音频文件
merged_audio_path = audio_merger.merge_audio_files(
task_id=task_id,
total_duration=total_duration,
list_script=new_script_list
)
logger.info(f"音频文件合并成功->{merged_audio_path}")
# 合并字幕文件
merged_subtitle_path = ""
if getattr(params, "subtitle_enabled", True):
try:
merged_subtitle_path = _create_programmatic_subtitle_file(
task_id,
new_script_list,
params,
)
except Exception as e:
logger.warning(f"程序化字幕生成失败将尝试合并TTS字幕: {e}")
if not merged_subtitle_path and getattr(params, "subtitle_enabled", True):
merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
if merged_subtitle_path:
logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
else:
logger.warning("没有有效的字幕内容,将生成无字幕视频")
merged_subtitle_path = ""
except Exception as e:
logger.error(f"合并音频/字幕文件失败: {str(e)}")
# 确保即使合并失败也有默认值
if 'merged_audio_path' not in locals():
merged_audio_path = ""
if 'merged_subtitle_path' not in locals():
merged_subtitle_path = ""
else:
logger.warning("没有需要合并的音频/字幕")
merged_audio_path = ""
merged_subtitle_path = ""
if getattr(params, "subtitle_enabled", True):
try:
merged_subtitle_path = _create_programmatic_subtitle_file(
task_id,
new_script_list,
params,
)
except Exception as e:
logger.warning(f"程序化字幕生成失败: {e}")
"""
5. 合并视频
"""
final_video_paths = []
combined_video_paths = []
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
# 使用统一裁剪后的视频片段
video_clips = []
for new_script in new_script_list:
video_path = new_script.get('video')
if video_path and os.path.exists(video_path):
video_clips.append(video_path)
else:
logger.warning(f"片段 {new_script.get('_id')} 的视频文件不存在或未生成: {video_path}")
# 如果统一裁剪失败尝试使用备用方案如果提供了subclip_path_videos
if subclip_path_videos and new_script.get('_id') in subclip_path_videos:
backup_video = subclip_path_videos[new_script.get('_id')]
if os.path.exists(backup_video):
video_clips.append(backup_video)
logger.info(f"使用备用视频: {backup_video}")
else:
logger.error(f"备用视频也不存在: {backup_video}")
else:
logger.error(f"无法找到片段 {new_script.get('_id')} 的视频文件")
logger.info(f"准备合并 {len(video_clips)} 个视频片段")
merger_video.combine_clip_videos(
output_video_path=combined_video_path,
video_paths=video_clips,
video_ost_list=video_ost,
video_aspect=params.video_aspect,
threads=params.n_threads
)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
"""
6. 合并字幕/BGM/配音/视频
"""
output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
auto_transcription_enabled = _is_auto_transcription_enabled(params)
merge_output_video_path = (
path.join(utils.task_dir(task_id), "combined_without_auto_subtitles.mp4")
if auto_transcription_enabled
else output_video_path
)
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}")
# bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
bgm_path = utils.get_bgm_file(
bgm_type=getattr(params, "bgm_type", "random"),
bgm_file=getattr(params, "bgm_file", ""),
)
# 获取优化的音量配置
optimized_volumes = get_recommended_volumes_for_content('mixed')
# 检查是否有OST=1的原声片段如果有则保持原声音量为1.0不变
has_original_audio_segments = any(segment['OST'] == 1 for segment in list_script)
# 应用用户设置和优化建议的组合
# 如果用户设置了非默认值,优先使用用户设置
final_tts_volume = params.tts_volume if hasattr(params, 'tts_volume') and params.tts_volume != 1.0 else optimized_volumes['tts_volume']
# 关键修复如果有原声片段保持原声音量为1.0,确保与原视频音量一致
if has_original_audio_segments:
final_original_volume = 1.0 # 保持原声音量不变
logger.info("检测到原声片段原声音量设置为1.0以保持与原视频一致")
else:
final_original_volume = params.original_volume if hasattr(params, 'original_volume') and params.original_volume != 0.7 else optimized_volumes['original_volume']
final_bgm_volume = params.bgm_volume if hasattr(params, 'bgm_volume') and params.bgm_volume != 0.3 else optimized_volumes['bgm_volume']
logger.info(f"音量配置 - TTS: {final_tts_volume}, 原声: {final_original_volume}, BGM: {final_bgm_volume}")
# 调用示例
options = {
'voice_volume': final_tts_volume, # 配音音量(优化后)
'bgm_volume': final_bgm_volume, # 背景音乐音量(优化后)
'original_audio_volume': final_original_volume, # 视频原声音量(优化后)
'keep_original_audio': True, # 是否保留原声
'subtitle_enabled': params.subtitle_enabled and not auto_transcription_enabled,
'subtitle_font': params.font_name, # 这里使用相对字体路径,会自动在 font_dir() 目录下查找
'subtitle_font_size': params.font_size,
'subtitle_color': params.text_fore_color,
'subtitle_bg_color': None, # 直接使用None表示透明背景
'subtitle_position': params.subtitle_position,
'custom_position': params.custom_position,
'threads': params.n_threads,
**_build_subtitle_mask_options(params, enabled=not auto_transcription_enabled),
}
generate_video.merge_materials(
video_path=combined_video_path,
audio_path=merged_audio_path,
subtitle_path=merged_subtitle_path,
bgm_path=bgm_path,
output_path=merge_output_video_path,
options=options
)
auto_subtitle_path = ""
if auto_transcription_enabled:
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=90)
logger.info("\n\n## 7. 自动转录最终视频字幕")
auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=95)
logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}")
_merge_auto_transcribed_subtitles(
source_video_path=merge_output_video_path,
output_video_path=output_video_path,
subtitle_path=auto_subtitle_path,
params=params,
)
final_video_paths.append(output_video_path)
combined_video_paths.append(combined_video_path)
logger.success(f"任务 {task_id} 已完成, 生成 {len(final_video_paths)} 个视频.")
kwargs = {
"videos": final_video_paths,
"combined_videos": combined_video_paths
}
if auto_subtitle_path:
kwargs["subtitles"] = [auto_subtitle_path]
sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
return kwargs
def start_subclip_unified(task_id: str, params: VideoClipParams):
"""
统一视频裁剪处理函数 - 完全基于OST类型的新实现
这是优化后的版本,完全移除了对预裁剪视频的依赖,
实现真正的统一裁剪策略。
Args:
task_id: 任务ID
params: 视频参数
"""
global merged_audio_path, merged_subtitle_path
logger.info(f"\n\n## 开始统一视频处理任务: {task_id}")
_update_video_generation_task(
task_id,
progress=0,
message="正在初始化视频生成任务",
step_current=0,
)
"""
1. 加载剪辑脚本
"""
logger.info("\n\n## 1. 加载视频脚本")
_update_video_generation_task(
task_id,
progress=5,
message="正在加载剪辑脚本",
step_current=1,
)
video_script_path = path.join(params.video_clip_json_path)
if path.exists(video_script_path):
try:
with open(video_script_path, "r", encoding="utf-8") as f:
list_script = json.load(f)
video_list = [i['narration'] for i in list_script]
video_ost = [i['OST'] for i in list_script]
time_list = [i['timestamp'] for i in list_script]
video_script = " ".join(video_list)
logger.debug(f"解说完整脚本: \n{video_script}")
logger.debug(f"解说 OST 列表: \n{video_ost}")
logger.debug(f"解说时间戳列表: \n{time_list}")
except Exception as e:
logger.error(f"无法读取视频json脚本请检查脚本格式是否正确")
raise ValueError("无法读取视频json脚本请检查脚本格式是否正确")
else:
logger.error(f"解说脚本文件不存在: {video_script_path},请先点击【保存脚本】按钮保存脚本后再生成视频")
raise ValueError("解说脚本文件不存在!请先点击【保存脚本】按钮保存脚本后再生成视频。")
"""
2. 使用 TTS 生成音频素材
"""
logger.info("\n\n## 2. 根据OST设置生成音频列表")
_update_video_generation_task(
task_id,
progress=10,
message="正在生成 TTS 配音",
step_current=2,
)
# 只为OST=0 or 2的判断生成音频 OST=0 仅保留解说 OST=2 保留解说和原声
tts_segments = [
segment for segment in list_script
if segment['OST'] in [0, 2]
]
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
tts_results = voice.tts_multiple(
task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段
tts_engine=params.tts_engine,
voice_name=params.voice_name,
voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch,
)
_update_video_generation_task(
task_id,
progress=20,
message="TTS 配音生成完成",
step_current=2,
)
"""
3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
"""
logger.info("\n\n## 3. 统一视频裁剪基于OST类型")
_update_video_generation_task(
task_id,
progress=30,
message="正在按脚本裁剪视频片段",
step_current=3,
)
# 使用新的统一裁剪策略
video_clip_result = clip_video.clip_video_unified(
video_origin_path=params.video_origin_path,
video_origin_paths=getattr(params, "video_origin_paths", []),
script_list=list_script,
tts_results=tts_results
)
# 更新 list_script 中的时间戳和路径信息
tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
subclip_clip_result = {
tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
}
new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段")
_update_video_generation_task(
task_id,
progress=60,
message="视频片段裁剪完成",
step_current=3,
)
"""
4. 合并音频和字幕
"""
logger.info("\n\n## 4. 合并音频和字幕")
_update_video_generation_task(
task_id,
progress=65,
message="正在合并配音和字幕",
step_current=4,
)
total_duration = sum([script["duration"] for script in new_script_list])
if tts_segments:
try:
# 合并音频文件
merged_audio_path = audio_merger.merge_audio_files(
task_id=task_id,
total_duration=total_duration,
list_script=new_script_list
)
logger.info(f"音频文件合并成功->{merged_audio_path}")
# 优先基于脚本文案和成片时间线生成字幕失败时回退到TTS字幕合并
merged_subtitle_path = ""
if getattr(params, "subtitle_enabled", True):
try:
merged_subtitle_path = _create_programmatic_subtitle_file(
task_id,
new_script_list,
params,
)
except Exception as e:
logger.warning(f"程序化字幕生成失败将尝试合并TTS字幕: {e}")
if not merged_subtitle_path and getattr(params, "subtitle_enabled", True):
merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
if merged_subtitle_path:
logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
else:
logger.warning("没有有效的字幕内容,将生成无字幕视频")
merged_subtitle_path = ""
except Exception as e:
logger.error(f"合并音频/字幕文件失败: {str(e)}")
# 确保即使合并失败也有默认值
if 'merged_audio_path' not in locals():
merged_audio_path = ""
if 'merged_subtitle_path' not in locals():
merged_subtitle_path = ""
else:
logger.warning("没有需要合并的音频/字幕")
merged_audio_path = ""
merged_subtitle_path = ""
if getattr(params, "subtitle_enabled", True):
try:
merged_subtitle_path = _create_programmatic_subtitle_file(
task_id,
new_script_list,
params,
)
except Exception as e:
logger.warning(f"程序化字幕生成失败: {e}")
_update_video_generation_task(
task_id,
progress=70,
message="配音和字幕合并完成",
step_current=4,
)
"""
5. 合并视频
"""
final_video_paths = []
combined_video_paths = []
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
_update_video_generation_task(
task_id,
progress=75,
message="正在合并视频片段",
step_current=5,
)
# 使用统一裁剪后的视频片段
video_clips = []
for new_script in new_script_list:
video_path = new_script.get('video')
if video_path and os.path.exists(video_path):
video_clips.append(video_path)
else:
logger.error(f"片段 {new_script.get('_id')} 的视频文件不存在: {video_path}")
logger.info(f"准备合并 {len(video_clips)} 个视频片段")
merger_video.combine_clip_videos(
output_video_path=combined_video_path,
video_paths=video_clips,
video_ost_list=video_ost,
video_aspect=params.video_aspect,
threads=params.n_threads
)
_update_video_generation_task(
task_id,
progress=80,
message="视频片段合并完成",
step_current=5,
)
"""
6. 合并字幕/BGM/配音/视频
"""
output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
auto_transcription_enabled = _is_auto_transcription_enabled(params) and not bool(merged_subtitle_path)
if _is_auto_transcription_enabled(params) and merged_subtitle_path:
logger.info("已生成字幕文件,跳过最终视频自动转录")
merge_output_video_path = (
path.join(utils.task_dir(task_id), "combined_without_auto_subtitles.mp4")
if auto_transcription_enabled
else output_video_path
)
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}")
_update_video_generation_task(
task_id,
progress=85,
message="正在合成最终视频",
step_current=6,
ffmpeg_progress=0,
)
bgm_path = utils.get_bgm_file(
bgm_type=getattr(params, "bgm_type", "random"),
bgm_file=getattr(params, "bgm_file", ""),
)
# 获取优化的音量配置
optimized_volumes = get_recommended_volumes_for_content('mixed')
# 检查是否有OST=1的原声片段如果有则保持原声音量为1.0不变
has_original_audio_segments = any(segment['OST'] == 1 for segment in list_script)
# 应用用户设置和优化建议的组合
final_tts_volume = params.tts_volume if hasattr(params, 'tts_volume') and params.tts_volume != 1.0 else optimized_volumes['tts_volume']
# 关键修复如果有原声片段保持原声音量为1.0,确保与原视频音量一致
if has_original_audio_segments:
final_original_volume = 1.0 # 保持原声音量不变
logger.info("检测到原声片段原声音量设置为1.0以保持与原视频一致")
else:
final_original_volume = params.original_volume if hasattr(params, 'original_volume') and params.original_volume != 0.7 else optimized_volumes['original_volume']
final_bgm_volume = params.bgm_volume if hasattr(params, 'bgm_volume') and params.bgm_volume != 0.3 else optimized_volumes['bgm_volume']
logger.info(f"音量配置 - TTS: {final_tts_volume}, 原声: {final_original_volume}, BGM: {final_bgm_volume}")
# 调用示例
options = {
'voice_volume': final_tts_volume,
'bgm_volume': final_bgm_volume,
'original_audio_volume': final_original_volume,
'keep_original_audio': True,
'subtitle_enabled': params.subtitle_enabled and not auto_transcription_enabled,
'subtitle_font': params.font_name,
'subtitle_font_size': params.font_size,
'subtitle_color': params.text_fore_color,
'subtitle_bg_color': None,
'subtitle_position': params.subtitle_position,
'custom_position': params.custom_position,
'threads': params.n_threads,
**_build_subtitle_mask_options(params, enabled=not auto_transcription_enabled),
}
final_merge_progress_start = 85
final_merge_progress_end = 89 if auto_transcription_enabled else 99
def update_final_merge_progress(ffmpeg_progress: float):
progress_span = final_merge_progress_end - final_merge_progress_start
overall_progress = final_merge_progress_start + int(
round((max(0.0, min(100.0, float(ffmpeg_progress))) / 100) * progress_span)
)
_update_video_generation_task(
task_id,
progress=overall_progress,
message="正在合成最终视频",
step_current=6,
ffmpeg_progress=ffmpeg_progress,
)
generate_video.merge_materials(
video_path=combined_video_path,
audio_path=merged_audio_path,
subtitle_path=merged_subtitle_path,
bgm_path=bgm_path,
output_path=merge_output_video_path,
options=options,
progress_callback=update_final_merge_progress,
)
auto_subtitle_path = ""
if auto_transcription_enabled:
_update_video_generation_task(
task_id,
progress=90,
message="正在自动转录最终视频",
step_current=6,
)
logger.info("\n\n## 7. 自动转录最终视频字幕")
auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params)
_update_video_generation_task(
task_id,
progress=95,
message="正在压入自动转录字幕",
step_current=6,
)
logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}")
_merge_auto_transcribed_subtitles(
source_video_path=merge_output_video_path,
output_video_path=output_video_path,
subtitle_path=auto_subtitle_path,
params=params,
)
final_video_paths.append(output_video_path)
combined_video_paths.append(combined_video_path)
logger.success(f"统一处理任务 {task_id} 已完成, 生成 {len(final_video_paths)} 个视频.")
kwargs = {
"videos": final_video_paths,
"combined_videos": combined_video_paths
}
if auto_subtitle_path:
kwargs["subtitles"] = [auto_subtitle_path]
_update_video_generation_task(
task_id,
progress=100,
message="视频生成完成",
step_current=VIDEO_GENERATION_TOTAL_STEPS,
state=const.TASK_STATE_COMPLETE,
**kwargs
)
return kwargs
def validate_params(video_path, audio_path, output_file, params):
"""
验证输入参数
Args:
video_path: 视频文件路径
audio_path: 音频文件路径(可以为空字符串)
output_file: 输出文件路径
params: 视频参数
Raises:
FileNotFoundError: 文件不存在时抛出
ValueError: 参数无效时抛出
"""
if not video_path:
raise ValueError("视频路径不能为空")
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件不存在: {video_path}")
# 如果提供了音频路径,则验证文件是否存在
if audio_path and not os.path.exists(audio_path):
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
if not output_file:
raise ValueError("输出文件路径不能为空")
# 确保输出目录存在
output_dir = os.path.dirname(output_file)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
if not params:
raise ValueError("视频参数不能为空")
if __name__ == "__main__":
task_id = "demo"
# 提前裁剪是为了方便检查视频
subclip_path_videos = {
1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-00-05-390@00-00-57-980.mp4',
2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-00-28-900@00-00-43-700.mp4',
3: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-01-17-840@00-01-27-600.mp4',
4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-02-35-460@00-02-52-380.mp4',
5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-06-59-520@00-07-29-500.mp4',
}
params = VideoClipParams(
video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/2025-0507-223311.json",
video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_4938.mp4",
)
start_subclip(task_id, params, subclip_path_videos)