feat(task): 重构视频剪辑流程并添加新功能

-重构了 start_subclip 函数,优化了视频剪辑流程
- 新增 clip_video 函数,用于裁剪视频片段
- 改进了字幕生成和处理逻辑- 优化了音频合并和处理
- 更新了任务状态管理
This commit is contained in:
linyq 2025-05-06 21:43:20 +08:00
parent 42151f8766
commit 2914cd924d
8 changed files with 1170 additions and 161 deletions

View File

@ -46,7 +46,7 @@ def merge_audio_files(task_id: str, audio_files: list, total_duration: float, li
tts_audio = AudioSegment.from_file(audio_file) tts_audio = AudioSegment.from_file(audio_file)
# 获取片段的开始和结束时间 # 获取片段的开始和结束时间
start_time, end_time = segment['new_timestamp'].split('-') start_time, end_time = segment['timestamp'].split('-')
start_seconds = utils.time_to_seconds(start_time) start_seconds = utils.time_to_seconds(start_time)
end_seconds = utils.time_to_seconds(end_time) end_seconds = utils.time_to_seconds(end_time)

227
app/services/clip_video.py Normal file
View File

@ -0,0 +1,227 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : clip_video
@Author : 小林同学
@Date : 2025/5/6 下午6:14
'''
import os
import subprocess
import json
import hashlib
import logging
from typing import Dict, List, Optional
from pathlib import Path
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def parse_timestamp(timestamp: str) -> tuple:
"""
解析时间戳字符串返回开始和结束时间
Args:
timestamp: 格式为'HH:MM:SS-HH:MM:SS'的时间戳字符串
Returns:
tuple: (开始时间, 结束时间) 格式为'HH:MM:SS'
"""
start_time, end_time = timestamp.split('-')
return start_time, end_time
def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
"""
根据开始时间和持续时间计算结束时间
Args:
start_time: 开始时间格式为'HH:MM:SS'
duration: 持续时间单位为秒
extra_seconds: 额外添加的秒数默认为1秒
Returns:
str: 计算后的结束时间格式为'HH:MM:SS'
"""
h, m, s = map(int, start_time.split(':'))
total_seconds = h * 3600 + m * 60 + s + duration + extra_seconds
h_new = int(total_seconds // 3600)
m_new = int((total_seconds % 3600) // 60)
s_new = int(total_seconds % 60)
return f"{h_new:02d}:{m_new:02d}:{s_new:02d}"
def check_hardware_acceleration() -> Optional[str]:
"""
检查系统支持的硬件加速选项
Returns:
Optional[str]: 硬件加速参数如果不支持则返回None
"""
# 检查NVIDIA GPU支持
try:
nvidia_check = subprocess.run(
["ffmpeg", "-hwaccel", "cuda", "-i", "/dev/null", "-f", "null", "-"],
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
)
if nvidia_check.returncode == 0:
return "cuda"
except Exception:
pass
# 检查MacOS videotoolbox支持
try:
videotoolbox_check = subprocess.run(
["ffmpeg", "-hwaccel", "videotoolbox", "-i", "/dev/null", "-f", "null", "-"],
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
)
if videotoolbox_check.returncode == 0:
return "videotoolbox"
except Exception:
pass
# 检查Intel Quick Sync支持
try:
qsv_check = subprocess.run(
["ffmpeg", "-hwaccel", "qsv", "-i", "/dev/null", "-f", "null", "-"],
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
)
if qsv_check.returncode == 0:
return "qsv"
except Exception:
pass
return None
def clip_video(
video_origin_path: str,
tts_result: List[Dict],
output_dir: Optional[str] = None,
task_id: Optional[str] = None
) -> Dict[str, str]:
"""
根据时间戳裁剪视频
Args:
video_origin_path: 原始视频的路径
tts_result: 包含时间戳和持续时间信息的列表
output_dir: 输出目录路径默认为None时会自动生成
task_id: 任务ID用于生成唯一的输出目录默认为None时会自动生成
Returns:
Dict[str, str]: 时间戳到裁剪后视频路径的映射
"""
# 检查视频文件是否存在
if not os.path.exists(video_origin_path):
raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
# 如果未提供task_id则根据输入生成一个唯一ID
if task_id is None:
content_for_hash = f"{video_origin_path}_{json.dumps(tts_result)}"
task_id = hashlib.md5(content_for_hash.encode()).hexdigest()
# 设置输出目录
if output_dir is None:
output_dir = os.path.join(
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
"storage", "temp", "clip_video", task_id
)
# 确保输出目录存在
Path(output_dir).mkdir(parents=True, exist_ok=True)
# 检查硬件加速支持
hwaccel = check_hardware_acceleration()
hwaccel_args = []
if hwaccel:
hwaccel_args = ["-hwaccel", hwaccel]
logger.info(f"使用硬件加速: {hwaccel}")
# 存储裁剪结果
result = {}
for item in tts_result:
timestamp = item["timestamp"]
start_time, _ = parse_timestamp(timestamp)
# 根据持续时间计算真正的结束时间加上1秒余量
duration = item["duration"]
calculated_end_time = calculate_end_time(start_time, duration)
# 格式化输出文件名
output_filename = f"vid-{start_time.replace(':', '-')}-{calculated_end_time.replace(':', '-')}.mp4"
output_path = os.path.join(output_dir, output_filename)
# 构建FFmpeg命令
ffmpeg_cmd = [
"ffmpeg", "-y", *hwaccel_args,
"-i", video_origin_path,
"-ss", start_time,
"-to", calculated_end_time,
"-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264",
"-c:a", "aac",
"-strict", "experimental",
output_path
]
# 执行FFmpeg命令
try:
logger.info(f"裁剪视频片段: {timestamp} -> {start_time}{calculated_end_time}")
logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
process = subprocess.run(
ffmpeg_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)
result[timestamp] = output_path
logger.info(f"成功裁剪视频片段: {timestamp} -> {output_path}")
except subprocess.CalledProcessError as e:
logger.error(f"裁剪视频片段失败: {timestamp}")
logger.error(f"错误信息: {e.stderr}")
raise RuntimeError(f"视频裁剪失败: {e.stderr}")
return result
if __name__ == "__main__":
video_origin_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/qyn2-2无片头片尾.mp4"
tts_result = [{'timestamp': '00:00:00-00:01:15',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
'duration': 25.55,
'text': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!'},
{'timestamp': '00:01:15-00:04:40',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
'duration': 13.488,
'text': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…'},
{'timestamp': '00:04:58-00:05:45',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
'duration': 21.363,
'text': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!'},
{'timestamp': '00:05:45-00:06:00',
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
'duration': 7.675, 'text': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!'}]
# 使用方法示例
try:
result = clip_video(video_origin_path, tts_result)
print("裁剪结果:")
print(json.dumps(result, indent=4, ensure_ascii=False))
except Exception as e:
print(f"发生错误: {e}")

View File

@ -0,0 +1,543 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : merger_video
@Author : 小林同学
@Date : 2025/5/6 下午7:38
'''
import os
import subprocess
import logging
from enum import Enum
from typing import List, Optional, Tuple, Dict, Any
import shutil
# 设置日志
logger = logging.getLogger(__name__)
class VideoAspect(Enum):
"""视频宽高比枚举"""
portrait = "portrait" # 竖屏 9:16
landscape = "landscape" # 横屏 16:9
square = "square" # 方形 1:1
def to_resolution(self) -> Tuple[int, int]:
"""根据宽高比返回标准分辨率"""
if self == VideoAspect.portrait:
return 1080, 1920 # 竖屏 9:16
elif self == VideoAspect.landscape:
return 1920, 1080 # 横屏 16:9
elif self == VideoAspect.square:
return 1080, 1080 # 方形 1:1
else:
return 1080, 1920 # 默认竖屏
def check_ffmpeg_installation() -> bool:
"""
检查ffmpeg是否已安装
Returns:
bool: 如果安装则返回True否则返回False
"""
try:
subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return True
except (subprocess.SubprocessError, FileNotFoundError):
logger.error("ffmpeg未安装或不在系统PATH中请安装ffmpeg")
return False
def get_hardware_acceleration_option() -> Optional[str]:
"""
根据系统环境选择合适的硬件加速选项
Returns:
Optional[str]: 硬件加速参数如果不支持则返回None
"""
try:
# 检查NVIDIA GPU支持
nvidia_check = subprocess.run(
['ffmpeg', '-hide_banner', '-hwaccels'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
output = nvidia_check.stdout.lower()
if 'cuda' in output:
return 'cuda'
elif 'nvenc' in output:
return 'nvenc'
elif 'qsv' in output: # Intel Quick Sync
return 'qsv'
elif 'videotoolbox' in output: # macOS
return 'videotoolbox'
elif 'vaapi' in output: # Linux VA-API
return 'vaapi'
else:
logger.info("没有找到支持的硬件加速器,将使用软件编码")
return None
except Exception as e:
logger.warning(f"检测硬件加速器时出错: {str(e)},将使用软件编码")
return None
def check_video_has_audio(video_path: str) -> bool:
"""
检查视频是否包含音频流
Args:
video_path: 视频文件路径
Returns:
bool: 如果视频包含音频流则返回True否则返回False
"""
if not os.path.exists(video_path):
logger.warning(f"视频文件不存在: {video_path}")
return False
probe_cmd = [
'ffprobe', '-v', 'error',
'-select_streams', 'a:0',
'-show_entries', 'stream=codec_type',
'-of', 'csv=p=0',
video_path
]
try:
result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
return result.stdout.strip() == 'audio'
except Exception as e:
logger.warning(f"检测视频音频流时出错: {str(e)}")
return False
def create_ffmpeg_concat_file(video_paths: List[str], concat_file_path: str) -> str:
"""
创建ffmpeg合并所需的concat文件
Args:
video_paths: 需要合并的视频文件路径列表
concat_file_path: concat文件的输出路径
Returns:
str: concat文件的路径
"""
with open(concat_file_path, 'w', encoding='utf-8') as f:
for video_path in video_paths:
# 使用绝对路径并转义特殊字符
abs_path = os.path.abspath(video_path).replace('\\', '\\\\').replace(':', '\\:')
f.write(f"file '{abs_path}'\n")
return concat_file_path
def process_single_video(
input_path: str,
output_path: str,
target_width: int,
target_height: int,
keep_audio: bool = True,
hwaccel: Optional[str] = None
) -> str:
"""
处理单个视频调整分辨率帧率等
Args:
input_path: 输入视频路径
output_path: 输出视频路径
target_width: 目标宽度
target_height: 目标高度
keep_audio: 是否保留音频
hwaccel: 硬件加速选项
Returns:
str: 处理后的视频路径
"""
if not os.path.exists(input_path):
raise FileNotFoundError(f"找不到视频文件: {input_path}")
# 构建基本命令
command = ['ffmpeg', '-y']
# 添加硬件加速参数
if hwaccel:
if hwaccel == 'cuda' or hwaccel == 'nvenc':
command.extend(['-hwaccel', 'cuda'])
elif hwaccel == 'qsv':
command.extend(['-hwaccel', 'qsv'])
elif hwaccel == 'videotoolbox':
command.extend(['-hwaccel', 'videotoolbox'])
elif hwaccel == 'vaapi':
command.extend(['-hwaccel', 'vaapi', '-vaapi_device', '/dev/dri/renderD128'])
# 输入文件
command.extend(['-i', input_path])
# 处理音频
if not keep_audio:
command.extend(['-an']) # 移除音频
else:
# 检查输入视频是否有音频流
has_audio = check_video_has_audio(input_path)
if has_audio:
command.extend(['-c:a', 'aac', '-b:a', '128k']) # 音频编码为AAC
else:
logger.warning(f"视频 {input_path} 没有音频流,将会忽略音频设置")
command.extend(['-an']) # 没有音频流时移除音频设置
# 视频处理参数:缩放并添加填充以保持比例
scale_filter = f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease"
pad_filter = f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2"
command.extend([
'-vf', f"{scale_filter},{pad_filter}",
'-r', '30', # 设置帧率为30fps
])
# 选择编码器
if hwaccel == 'cuda' or hwaccel == 'nvenc':
command.extend(['-c:v', 'h264_nvenc', '-preset', 'p4', '-profile:v', 'high'])
elif hwaccel == 'qsv':
command.extend(['-c:v', 'h264_qsv', '-preset', 'medium'])
elif hwaccel == 'videotoolbox':
command.extend(['-c:v', 'h264_videotoolbox', '-profile:v', 'high'])
elif hwaccel == 'vaapi':
command.extend(['-c:v', 'h264_vaapi', '-profile', '100'])
else:
command.extend(['-c:v', 'libx264', '-preset', 'medium', '-profile:v', 'high'])
# 设置视频比特率和其他参数
command.extend([
'-b:v', '5M',
'-maxrate', '8M',
'-bufsize', '10M',
'-pix_fmt', 'yuv420p', # 兼容性更好的颜色格式
])
# 输出文件
command.append(output_path)
# 执行命令
try:
logger.info(f"处理视频 {input_path} -> {output_path}")
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return output_path
except subprocess.CalledProcessError as e:
logger.error(f"处理视频失败: {e.stderr.decode() if e.stderr else str(e)}")
raise RuntimeError(f"处理视频失败: {str(e)}")
def combine_clip_videos(
output_video_path: str,
video_paths: List[str],
video_ost_list: List[int],
video_aspect: VideoAspect = VideoAspect.portrait,
threads: int = 4,
) -> str:
"""
合并子视频
Args:
output_video_path: 合并后的存储路径
video_paths: 子视频路径列表
video_ost_list: 原声播放列表 (0: 不保留原声, 1: 只保留原声, 2: 保留原声并保留解说)
video_aspect: 屏幕比例
threads: 线程数
Returns:
str: 合并后的视频路径
"""
# 检查ffmpeg是否安装
if not check_ffmpeg_installation():
raise RuntimeError("未找到ffmpeg请先安装")
# 准备输出目录
output_dir = os.path.dirname(output_video_path)
os.makedirs(output_dir, exist_ok=True)
# 获取目标分辨率
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
# 检测可用的硬件加速选项
hwaccel = get_hardware_acceleration_option()
if hwaccel:
logger.info(f"将使用 {hwaccel} 硬件加速")
# 重组视频路径和原声设置为一个字典列表结构
video_segments = []
# 检查视频路径和原声设置列表长度是否匹配
if len(video_paths) != len(video_ost_list):
logger.warning(f"视频路径列表({len(video_paths)})和原声设置列表({len(video_ost_list)})长度不匹配")
# 调整长度以匹配较短的列表
min_length = min(len(video_paths), len(video_ost_list))
video_paths = video_paths[:min_length]
video_ost_list = video_ost_list[:min_length]
# 创建视频处理配置字典列表
for i, (video_path, video_ost) in enumerate(zip(video_paths, video_ost_list)):
if not os.path.exists(video_path):
logger.warning(f"视频不存在,跳过: {video_path}")
continue
# 检查是否有音频流
has_audio = check_video_has_audio(video_path)
# 构建视频片段配置
segment = {
"index": i,
"path": video_path,
"ost": video_ost,
"has_audio": has_audio,
"keep_audio": video_ost > 0 and has_audio # 只有当ost>0且实际有音频时才保留
}
# 记录日志
if video_ost > 0 and not has_audio:
logger.warning(f"视频 {video_path} 设置为保留原声(ost={video_ost}),但该视频没有音频流")
video_segments.append(segment)
# 处理每个视频片段
processed_videos = []
temp_dir = os.path.join(output_dir, "temp_videos")
os.makedirs(temp_dir, exist_ok=True)
try:
# 第一阶段:处理所有视频片段到中间文件
for segment in video_segments:
# 处理单个视频,去除或保留音频
temp_output = os.path.join(temp_dir, f"processed_{segment['index']}.mp4")
try:
process_single_video(
input_path=segment['path'],
output_path=temp_output,
target_width=video_width,
target_height=video_height,
keep_audio=segment['keep_audio'],
hwaccel=hwaccel
)
processed_videos.append({
"index": segment["index"],
"path": temp_output,
"keep_audio": segment["keep_audio"]
})
logger.info(f"视频 {segment['index'] + 1}/{len(video_segments)} 处理完成")
except Exception as e:
logger.error(f"处理视频 {segment['path']} 时出错: {str(e)}")
continue
if not processed_videos:
raise ValueError("没有有效的视频片段可以合并")
# 按原始索引排序处理后的视频
processed_videos.sort(key=lambda x: x["index"])
# 第二阶段:分步骤合并视频 - 避免复杂的filter_complex滤镜
try:
# 1. 首先,将所有没有音频的视频或音频被禁用的视频合并到一个临时文件中
video_paths_only = [video["path"] for video in processed_videos]
video_concat_path = os.path.join(temp_dir, "video_concat.mp4")
# 创建concat文件用于合并视频流
concat_file = os.path.join(temp_dir, "concat_list.txt")
create_ffmpeg_concat_file(video_paths_only, concat_file)
# 合并所有视频流,但不包含音频
concat_cmd = [
'ffmpeg', '-y',
'-f', 'concat',
'-safe', '0',
'-i', concat_file,
'-c:v', 'libx264',
'-preset', 'medium',
'-profile:v', 'high',
'-an', # 不包含音频
'-threads', str(threads),
video_concat_path
]
subprocess.run(concat_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("视频流合并完成")
# 2. 提取并合并有音频的片段
audio_segments = [video for video in processed_videos if video["keep_audio"]]
if not audio_segments:
# 如果没有音频片段,直接使用无音频的合并视频作为最终结果
shutil.copy(video_concat_path, output_video_path)
logger.info("无音频视频合并完成")
return output_video_path
# 创建音频中间文件
audio_files = []
for i, segment in enumerate(audio_segments):
# 提取音频
audio_file = os.path.join(temp_dir, f"audio_{i}.aac")
extract_audio_cmd = [
'ffmpeg', '-y',
'-i', segment["path"],
'-vn', # 不包含视频
'-c:a', 'aac',
'-b:a', '128k',
audio_file
]
subprocess.run(extract_audio_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
audio_files.append({
"index": segment["index"],
"path": audio_file
})
logger.info(f"提取音频 {i+1}/{len(audio_segments)} 完成")
# 3. 计算每个音频片段的时间位置
audio_timings = []
current_time = 0.0
# 获取每个视频片段的时长
for i, video in enumerate(processed_videos):
duration_cmd = [
'ffprobe', '-v', 'error',
'-show_entries', 'format=duration',
'-of', 'csv=p=0',
video["path"]
]
result = subprocess.run(duration_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
duration = float(result.stdout.strip())
# 如果当前片段需要保留音频,记录时间位置
if video["keep_audio"]:
for audio in audio_files:
if audio["index"] == video["index"]:
audio_timings.append({
"file": audio["path"],
"start": current_time,
"index": video["index"]
})
break
current_time += duration
# 4. 创建静音音频轨道作为基础
silence_audio = os.path.join(temp_dir, "silence.aac")
create_silence_cmd = [
'ffmpeg', '-y',
'-f', 'lavfi',
'-i', f'anullsrc=r=44100:cl=stereo',
'-t', str(current_time), # 总时长
'-c:a', 'aac',
'-b:a', '128k',
silence_audio
]
subprocess.run(create_silence_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# 5. 创建复杂滤镜命令以混合音频
filter_script = os.path.join(temp_dir, "filter_script.txt")
with open(filter_script, 'w') as f:
f.write(f"[0:a]volume=0.0[silence];\n") # 首先静音背景轨道
# 添加每个音频文件
for i, timing in enumerate(audio_timings):
f.write(f"[{i+1}:a]adelay={int(timing['start']*1000)}|{int(timing['start']*1000)}[a{i}];\n")
# 混合所有音频
mix_str = "[silence]"
for i in range(len(audio_timings)):
mix_str += f"[a{i}]"
mix_str += f"amix=inputs={len(audio_timings)+1}:duration=longest[aout]"
f.write(mix_str)
# 6. 构建音频合并命令
audio_inputs = ['-i', silence_audio]
for timing in audio_timings:
audio_inputs.extend(['-i', timing["file"]])
mixed_audio = os.path.join(temp_dir, "mixed_audio.aac")
audio_mix_cmd = [
'ffmpeg', '-y'
] + audio_inputs + [
'-filter_complex_script', filter_script,
'-map', '[aout]',
'-c:a', 'aac',
'-b:a', '128k',
mixed_audio
]
subprocess.run(audio_mix_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("音频混合完成")
# 7. 将合并的视频和混合的音频组合在一起
final_cmd = [
'ffmpeg', '-y',
'-i', video_concat_path,
'-i', mixed_audio,
'-c:v', 'copy',
'-c:a', 'aac',
'-map', '0:v:0',
'-map', '1:a:0',
'-shortest',
output_video_path
]
subprocess.run(final_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("视频最终合并完成")
return output_video_path
except subprocess.CalledProcessError as e:
logger.error(f"合并视频过程中出错: {e.stderr.decode() if e.stderr else str(e)}")
# 尝试备用合并方法 - 最简单的无音频合并
logger.info("尝试备用合并方法 - 无音频合并")
try:
concat_file = os.path.join(temp_dir, "concat_list.txt")
video_paths_only = [video["path"] for video in processed_videos]
create_ffmpeg_concat_file(video_paths_only, concat_file)
backup_cmd = [
'ffmpeg', '-y',
'-f', 'concat',
'-safe', '0',
'-i', concat_file,
'-c:v', 'copy',
'-an', # 无音频
output_video_path
]
subprocess.run(backup_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.warning("使用备用方法(无音频)成功合并视频")
return output_video_path
except Exception as backup_error:
logger.error(f"备用合并方法也失败: {str(backup_error)}")
raise RuntimeError(f"无法合并视频: {str(backup_error)}")
except Exception as e:
logger.error(f"合并视频时出错: {str(e)}")
raise
finally:
# 清理临时文件
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
logger.info("已清理临时文件")
except Exception as e:
logger.warning(f"清理临时文件时出错: {str(e)}")
if __name__ == '__main__':
video_paths = [
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-00-00-00-00-26.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-01-15-00-01-29.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-04-58-00-05-20.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-05-45-00-05-53.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4'
]
combine_clip_videos(
output_video_path="/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/merged_123.mp4",
video_paths=video_paths,
video_ost_list=[1, 0, 1, 0, 0, 1],
video_aspect=VideoAspect.portrait
)

View File

@ -0,0 +1,154 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : subtitle_merger
@Author : viccy
@Date : 2025/5/6 下午4:00
'''
import re
import os
from datetime import datetime, timedelta
def parse_time(time_str):
"""解析时间字符串为timedelta对象"""
hours, minutes, seconds_ms = time_str.split(':')
seconds, milliseconds = seconds_ms.split(',')
td = timedelta(
hours=int(hours),
minutes=int(minutes),
seconds=int(seconds),
milliseconds=int(milliseconds)
)
return td
def format_time(td):
"""将timedelta对象格式化为SRT时间字符串"""
total_seconds = int(td.total_seconds())
hours = total_seconds // 3600
minutes = (total_seconds % 3600) // 60
seconds = total_seconds % 60
milliseconds = td.microseconds // 1000
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
def extract_time_range_from_filename(filename):
"""从文件名中提取时间范围"""
pattern = r'subtitle_(\d{2})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})'
match = re.search(pattern, filename)
if not match:
return None, None
start_h, start_m, start_s, end_h, end_m, end_s = map(int, match.groups())
start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s)
end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s)
return start_time, end_time
def merge_subtitle_files(subtitle_files, output_file=None):
"""
合并多个SRT字幕文件
参数:
subtitle_files: 包含SRT文件路径的列表
output_file: 输出文件的路径如果为None则自动生成
返回:
合并后的字幕文件路径
"""
# 按文件名中的开始时间排序
sorted_files = sorted(subtitle_files,
key=lambda x: extract_time_range_from_filename(x)[0])
merged_subtitles = []
subtitle_index = 1
for file_path in sorted_files:
# 从文件名获取起始时间偏移
offset_time, _ = extract_time_range_from_filename(file_path)
if offset_time is None:
print(f"警告: 无法从文件名 {os.path.basename(file_path)} 中提取时间范围,跳过该文件")
continue
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# 解析字幕文件
subtitle_blocks = re.split(r'\n\s*\n', content.strip())
for block in subtitle_blocks:
lines = block.strip().split('\n')
if len(lines) < 3: # 确保块有足够的行数
continue
# 解析时间轴行
time_line = lines[1]
time_parts = time_line.split(' --> ')
if len(time_parts) != 2:
continue
start_time = parse_time(time_parts[0])
end_time = parse_time(time_parts[1])
# 应用时间偏移
adjusted_start_time = start_time + offset_time
adjusted_end_time = end_time + offset_time
# 重建字幕块
adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}"
text_lines = lines[2:]
new_block = [
str(subtitle_index),
adjusted_time_line,
*text_lines
]
merged_subtitles.append('\n'.join(new_block))
subtitle_index += 1
# 合并所有字幕块
merged_content = '\n\n'.join(merged_subtitles)
# 确定输出文件路径
if output_file is None:
# 自动生成输出文件名
first_file_path = sorted_files[0]
last_file_path = sorted_files[-1]
_, first_end = extract_time_range_from_filename(first_file_path)
_, last_end = extract_time_range_from_filename(last_file_path)
dir_path = os.path.dirname(first_file_path)
first_start_str = os.path.basename(first_file_path).split('-')[0].replace('subtitle_', '')
last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60)
last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}"
output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt")
# 写入合并后的内容
with open(output_file, 'w', encoding='utf-8') as file:
file.write(merged_content)
return output_file
if __name__ == '__main__':
subtitle_files = [
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt",
]
output_file = merge_subtitle_files(subtitle_files)
print(f"字幕文件已合并至: {output_file}")

View File

@ -9,7 +9,7 @@ from loguru import logger
from app.config import config from app.config import config
from app.models import const from app.models import const
from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams
from app.services import llm, material, subtitle, video, voice, audio_merger from app.services import llm, material, subtitle, video, voice, audio_merger, subtitle_merger, clip_video
from app.services import state as sm from app.services import state as sm
from app.utils import utils from app.utils import utils
@ -158,18 +158,25 @@ def get_video_materials(task_id, params, video_terms, audio_duration):
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict): def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
"""后台任务(自动剪辑视频进行剪辑)""" """
后台任务自动剪辑视频进行剪辑
Args:
task_id: 任务ID
params: 视频参数
subclip_path_videos: 视频片段路径
"""
logger.info(f"\n\n## 开始任务: {task_id}") logger.info(f"\n\n## 开始任务: {task_id}")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
# 初始化 ImageMagick
if not utils.init_imagemagick():
logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
# tts 角色名称 # # 初始化 ImageMagick
voice_name = voice.parse_voice_name(params.voice_name) # if not utils.init_imagemagick():
# logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
# # tts 角色名称
# voice_name = voice.parse_voice_name(params.voice_name)
"""
1. 加载剪辑脚本
"""
logger.info("\n\n## 1. 加载视频脚本") logger.info("\n\n## 1. 加载视频脚本")
video_script_path = path.join(params.video_clip_json_path) video_script_path = path.join(params.video_clip_json_path)
@ -187,111 +194,102 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
logger.debug(f"解说时间戳列表: \n{time_list}") logger.debug(f"解说时间戳列表: \n{time_list}")
# 获取视频总时长(单位 s) # 获取视频总时长(单位 s)
last_timestamp = list_script[-1]['new_timestamp'] last_timestamp = list_script[-1]['timestamp'].split("-")[1]
end_time = last_timestamp.split("-")[1] total_duration = utils.time_to_seconds(last_timestamp)
total_duration = utils.time_to_seconds(end_time)
except Exception as e: except Exception as e:
logger.error(f"无法读取视频json脚本请检查配置是否正确。{e}") logger.error(f"无法读取视频json脚本请检查脚本格式是否正确")
raise ValueError("无法读取视频json脚本请检查配置是否正确") raise ValueError("无法读取视频json脚本请检查脚本格式是否正确")
else: else:
logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc()) logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc())
raise ValueError("解说脚本不存在!请检查配置是否正确。") raise ValueError("解说脚本不存在!请检查配置是否正确。")
"""
2. 使用 TTS 生成音频素材
"""
logger.info("\n\n## 2. 根据OST设置生成音频列表") logger.info("\n\n## 2. 根据OST设置生成音频列表")
# 只为OST=0或2的片段生成TTS音频 # 只为OST=0 or 2的判断生成音频 OST=0 仅保留解说 OST=2 保留解说和原声
tts_segments = [ tts_segments = [
segment for segment in list_script segment for segment in list_script
if segment['OST'] in [0, 2] if segment['OST'] in [0, 2]
] ]
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}") logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
# 初始化音频文件路径
audio_files = []
final_audio = ""
if tts_segments:
audio_files, sub_maker_list = voice.tts_multiple(
task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段
voice_name=voice_name,
voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch,
force_regenerate=True
)
if audio_files is None:
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
return
if audio_files: tts_results = voice.tts_multiple(
logger.info(f"合并音频文件: {audio_files}") task_id=task_id,
try: list_script=tts_segments, # 只传入需要TTS的片段
# 传入OST信息以便正确处理音频 voice_name=params.voice_name,
final_audio = audio_merger.merge_audio_files( voice_rate=params.voice_rate,
task_id=task_id, voice_pitch=params.voice_pitch,
audio_files=audio_files, force_regenerate=True
total_duration=total_duration, )
list_script=list_script # 传入完整脚本以便处理OST audio_files = [
) tts_result["audio_file"] for tts_result in tts_results
logger.info("音频文件合并成功") ]
except Exception as e: subtitle_files = [
logger.error(f"合并音频文件失败: {str(e)}") tts_result["subtitle_file"] for tts_result in tts_results
final_audio = "" ]
else: if tts_results:
# 如果没有需要生成TTS的片段创建一个空白音频文件 logger.info(f"合并音频/字幕文件")
# 这样可以确保后续的音频处理能正确进行
logger.info("没有需要生成TTS的片段将保留原声和背景音乐")
final_audio = path.join(utils.task_dir(task_id), "empty.mp3")
try: try:
from moviepy.editor import AudioClip # 合并音频文件
# 创建一个与视频等长的空白音频 merged_audio_path = audio_merger.merge_audio_files(
empty_audio = AudioClip(make_frame=lambda t: 0, duration=total_duration) task_id=task_id,
empty_audio.write_audiofile(final_audio, fps=44100) audio_files=audio_files,
logger.info(f"已创建空白音频文件: {final_audio}") total_duration=total_duration,
except Exception as e: list_script=list_script # 传入完整脚本以便处理OST
logger.error(f"创建空白音频文件失败: {str(e)}")
final_audio = ""
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
subtitle_path = ""
if params.subtitle_enabled:
if audio_files:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
subtitle.create(
audio_file=final_audio,
subtitle_file=subtitle_path,
) )
logger.info(f"音频文件合并成功->{merged_audio_path}")
# 合并字幕文件
merged_subtitle_path = subtitle_merger.merge_subtitle_files(
subtitle_files=subtitle_files,
)
logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
except Exception as e:
logger.error(f"合并音频文件失败: {str(e)}")
merged_audio_path = ""
merged_subtitle_path = ""
else:
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
return
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
subtitle_lines = subtitle.file_to_subtitles(subtitle_path) """
3. (可选) 使用 whisper 生成字幕
"""
if merged_subtitle_path is None:
if audio_files:
merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n使用 {subtitle_provider} 生成字幕")
subtitle.create(
audio_file=merged_audio_path,
subtitle_file=merged_subtitle_path,
)
subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path)
if not subtitle_lines: if not subtitle_lines:
logger.warning(f"字幕文件无效: {subtitle_path}") logger.warning(f"字幕文件无效: {merged_subtitle_path}")
subtitle_path = ""
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40) sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
"""
4. 裁剪视频 - 将超出音频长度的视频进行裁剪
"""
logger.info("\n\n## 4. 裁剪视频") logger.info("\n\n## 4. 裁剪视频")
result = clip_video.clip_video(params.video_origin_path, tts_results)
subclip_path_videos.update(result)
subclip_videos = [x for x in subclip_path_videos.values()] subclip_videos = [x for x in subclip_path_videos.values()]
# logger.debug(f"\n\n## 裁剪后的视频文件列表: \n{subclip_videos}")
if not subclip_videos: sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
logger.error(
"裁剪视频失败,可能是 ImageMagick 不可用")
return
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
"""
5. 合并视频
"""
final_video_paths = [] final_video_paths = []
combined_video_paths = [] combined_video_paths = []
_progress = 50 combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
index = 1
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}") logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
video.combine_clip_videos( video.combine_clip_videos(
@ -302,14 +300,15 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
video_aspect=params.video_aspect, video_aspect=params.video_aspect,
threads=params.n_threads # 多线程 threads=params.n_threads # 多线程
) )
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
_progress += 50 / 2
sm.state.update_task(task_id, progress=_progress)
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4") """
6. 合并字幕/BGM/配音/视频
"""
final_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {final_video_path}")
logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
# 获取背景音乐 # 获取背景音乐
bgm_path = None bgm_path = None
if params.bgm_type or params.bgm_file: if params.bgm_type or params.bgm_file:
@ -340,18 +339,15 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
font_path = utils.font_dir(params.font_name) font_path = utils.font_dir(params.font_name)
video.generate_video_v3( video.generate_video_v3(
video_path=combined_video_path, video_path=combined_video_path,
subtitle_path=subtitle_path, subtitle_path=merged_subtitle_path,
bgm_path=bgm_path, bgm_path=bgm_path,
narration_path=final_audio, narration_path=merged_audio_path,
output_path=final_video_path, output_path=final_video_path,
volume_config=volume_config, # 添加音量配置 volume_config=volume_config, # 添加音量配置
subtitle_style=subtitle_style, subtitle_style=subtitle_style,
font_path=font_path font_path=font_path
) )
_progress += 50 / 2
sm.state.update_task(task_id, progress=_progress)
final_video_paths.append(final_video_path) final_video_paths.append(final_video_path)
combined_video_paths.append(combined_video_path) combined_video_paths.append(combined_video_path)
@ -400,35 +396,20 @@ def validate_params(video_path, audio_path, output_file, params):
if __name__ == "__main__": if __name__ == "__main__":
# task_id = "test123" task_id = "qyn2-2-demo"
# subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4',
# '00:06-00:15': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_06-00_15.mp4',
# '01:10-01:17': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_10-01_17.mp4',
# '00:47-01:03': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_47-01_03.mp4',
# '01:03-01:10': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_03-01_10.mp4',
# '02:40-03:08': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-02_40-03_08.mp4',
# '03:02-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_02-03_20.mp4',
# '03:18-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_18-03_20.mp4'}
#
# params = VideoClipParams(
# video_clip_json_path="E:\\projects\\NarratoAI\\resource/scripts/test003.json",
# video_origin_path="E:\\projects\\NarratoAI\\resource/videos/1.mp4",
# )
# start_subclip(task_id, params, subclip_path_videos=subclip_path_videos)
task_id = "test456" # 提前裁剪是为了方便检查视频
subclip_path_videos = {'01:10-01:17': './storage/cache_videos/vid-01_10-01_17.mp4', subclip_path_videos = {
'01:58-02:04': './storage/cache_videos/vid-01_58-02_04.mp4', '00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-00-00-00-01-15.mp4',
'02:25-02:31': './storage/cache_videos/vid-02_25-02_31.mp4', '00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-01-15-00-04-40.mp4',
'01:28-01:33': './storage/cache_videos/vid-01_28-01_33.mp4', '00:04:41-00:04:58': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4',
'03:14-03:18': './storage/cache_videos/vid-03_14-03_18.mp4', '00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-58-00-05-45.mp4',
'00:24-00:28': './storage/cache_videos/vid-00_24-00_28.mp4', '00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-05-45-00-06-00.mp4',
'03:02-03:08': './storage/cache_videos/vid-03_02-03_08.mp4', '00:06:00-00:06:03': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4',
'00:41-00:44': './storage/cache_videos/vid-00_41-00_44.mp4', }
'02:12-02:25': './storage/cache_videos/vid-02_12-02_25.mp4'}
params = VideoClipParams( params = VideoClipParams(
video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/test004.json", video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/demo.json",
video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4", video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/qyn2-2无片头片尾.mp4",
) )
start_subclip(task_id, params, subclip_path_videos=subclip_path_videos) start_subclip(task_id, params, subclip_path_videos)

View File

@ -443,4 +443,3 @@ def generate_video_v3(
bgm.close() bgm.close()
if narration_path: if narration_path:
narration.close() narration.close()

View File

@ -5,10 +5,11 @@ import traceback
import edge_tts import edge_tts
import asyncio import asyncio
from loguru import logger from loguru import logger
from typing import List from typing import List, Union
from datetime import datetime from datetime import datetime
from xml.sax.saxutils import unescape from xml.sax.saxutils import unescape
from edge_tts import submaker, SubMaker from edge_tts import submaker, SubMaker
from edge_tts.submaker import mktimestamp
from moviepy.video.tools import subtitles from moviepy.video.tools import subtitles
import time import time
@ -1036,7 +1037,7 @@ def is_azure_v2_voice(voice_name: str):
def tts( def tts(
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
) -> [SubMaker, None]: ) -> Union[SubMaker, None]:
if is_azure_v2_voice(voice_name): if is_azure_v2_voice(voice_name):
return azure_tts_v2(text, voice_name, voice_file) return azure_tts_v2(text, voice_name, voice_file)
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
@ -1064,7 +1065,7 @@ def convert_pitch_to_percent(rate: float) -> str:
def azure_tts_v1( def azure_tts_v1(
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
) -> [SubMaker, None]: ) -> Union[SubMaker, None]:
voice_name = parse_voice_name(voice_name) voice_name = parse_voice_name(voice_name)
text = text.strip() text = text.strip()
rate_str = convert_rate_to_percent(voice_rate) rate_str = convert_rate_to_percent(voice_rate)
@ -1087,11 +1088,6 @@ def azure_tts_v1(
) )
return sub_maker, audio_data return sub_maker, audio_data
# 判断音频文件是否已存在
if os.path.exists(voice_file):
logger.info(f"voice file exists, skip tts: {voice_file}")
continue
# 获取音频数据和字幕信息 # 获取音频数据和字幕信息
sub_maker, audio_data = asyncio.run(_do()) sub_maker, audio_data = asyncio.run(_do())
@ -1105,8 +1101,6 @@ def azure_tts_v1(
# 数据有效,写入文件 # 数据有效,写入文件
with open(voice_file, "wb") as file: with open(voice_file, "wb") as file:
file.write(audio_data) file.write(audio_data)
logger.info(f"completed, output file: {voice_file}")
return sub_maker return sub_maker
except Exception as e: except Exception as e:
logger.error(f"生成音频文件时出错: {str(e)}") logger.error(f"生成音频文件时出错: {str(e)}")
@ -1115,7 +1109,7 @@ def azure_tts_v1(
return None return None
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]: def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
voice_name = is_azure_v2_voice(voice_name) voice_name = is_azure_v2_voice(voice_name)
if not voice_name: if not voice_name:
logger.error(f"invalid voice name: {voice_name}") logger.error(f"invalid voice name: {voice_name}")
@ -1240,7 +1234,7 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
if script_item['OST']: if script_item['OST']:
continue continue
start_time, end_time = script_item['new_timestamp'].split('-') start_time, end_time = script_item['timestamp'].split('-')
if sub_maker_index >= len(sub_maker_list): if sub_maker_index >= len(sub_maker_list):
logger.error(f"Sub maker list index out of range: {sub_maker_index}") logger.error(f"Sub maker list index out of range: {sub_maker_index}")
break break
@ -1317,6 +1311,97 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
traceback.print_exc() traceback.print_exc()
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
"""
优化字幕文件
1. 将字幕文件按照标点符号分割成多行
2. 逐行匹配字幕文件中的文本
3. 生成新的字幕文件
"""
text = _format_text(text)
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
"""
1
00:00:00,000 --> 00:00:02,360
跑步是一项简单易行的运动
"""
start_t = mktimestamp(start_time).replace(".", ",")
end_t = mktimestamp(end_time).replace(".", ",")
return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
start_time = -1.0
sub_items = []
sub_index = 0
script_lines = utils.split_string_by_punctuations(text)
def match_line(_sub_line: str, _sub_index: int):
if len(script_lines) <= _sub_index:
return ""
_line = script_lines[_sub_index]
if _sub_line == _line:
return script_lines[_sub_index].strip()
_sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
_line_ = re.sub(r"[^\w\s]", "", _line)
if _sub_line_ == _line_:
return _line_.strip()
_sub_line_ = re.sub(r"\W+", "", _sub_line)
_line_ = re.sub(r"\W+", "", _line)
if _sub_line_ == _line_:
return _line.strip()
return ""
sub_line = ""
try:
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
_start_time, end_time = offset
if start_time < 0:
start_time = _start_time
sub = unescape(sub)
sub_line += sub
sub_text = match_line(sub_line, sub_index)
if sub_text:
sub_index += 1
line = formatter(
idx=sub_index,
start_time=start_time,
end_time=end_time,
sub_text=sub_text,
)
sub_items.append(line)
start_time = -1.0
sub_line = ""
if len(sub_items) == len(script_lines):
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items) + "\n")
try:
sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
duration = max([tb for ((ta, tb), txt) in sbs])
logger.info(
f"已创建字幕文件: {subtitle_file}, duration: {duration}"
)
return subtitle_file, duration
except Exception as e:
logger.error(f"failed, error: {str(e)}")
os.remove(subtitle_file)
else:
logger.warning(
f"字幕创建失败, 字幕长度: {len(sub_items)}, script_lines len: {len(script_lines)}"
)
except Exception as e:
logger.error(f"failed, error: {str(e)}")
def get_audio_duration(sub_maker: submaker.SubMaker): def get_audio_duration(sub_maker: submaker.SubMaker):
""" """
获取音频时长 获取音频时长
@ -1339,20 +1424,25 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
""" """
voice_name = parse_voice_name(voice_name) voice_name = parse_voice_name(voice_name)
output_dir = utils.task_dir(task_id) output_dir = utils.task_dir(task_id)
audio_files = [] tts_results = []
sub_maker_list = []
for item in list_script: for item in list_script:
tts_item = {
"audio_file": "",
"subtitle_file": "",
"duration": 0,
}
if item['OST'] != 1: if item['OST'] != 1:
# 将时间戳中的冒号替换为下划线 # 将时间戳中的冒号替换为下划线
timestamp = item['new_timestamp'].replace(':', '_') timestamp = item['timestamp'].replace(':', '_')
audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3") audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
subtitle_file = os.path.join(output_dir, f"subtitle_{timestamp}.srt")
# 检查文件是否已存在,如存在且不强制重新生成,则跳过 # # 检查文件是否已存在,如存在且不强制重新生成,则跳过
if os.path.exists(audio_file) and not force_regenerate: # if os.path.exists(audio_file) and not force_regenerate:
logger.info(f"音频文件已存在,跳过生成: {audio_file}") # logger.info(f"音频文件已存在,跳过生成: {audio_file}")
audio_files.append(audio_file) # tts_item["audio_file"] = audio_file
continue # continue
text = item['narration'] text = item['narration']
@ -1369,9 +1459,17 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
f"如果您在中国请使用VPN; " f"如果您在中国请使用VPN; "
f"或者使用其他 tts 引擎") f"或者使用其他 tts 引擎")
continue continue
else:
# 为当前片段生成字幕文件
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
audio_files.append(audio_file) tts_results.append({
sub_maker_list.append(sub_maker) "timestamp": item['timestamp'],
"audio_file": audio_file,
"subtitle_file": subtitle_file,
"duration": duration,
"text": text,
})
logger.info(f"已生成音频文件: {audio_file}") logger.info(f"已生成音频文件: {audio_file}")
return audio_files, sub_maker_list return tts_results

View File

@ -3,7 +3,8 @@ import os
import sys import sys
from uuid import uuid4 from uuid import uuid4
from app.config import config from app.config import config
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, review_settings, merge_settings, system_settings from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
review_settings, merge_settings, system_settings
from webui.utils import cache, file_utils from webui.utils import cache, file_utils
from app.utils import utils from app.utils import utils
from app.models.schema import VideoClipParams, VideoAspect from app.models.schema import VideoClipParams, VideoAspect
@ -28,6 +29,7 @@ hide_streamlit_style = """
""" """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) st.markdown(hide_streamlit_style, unsafe_allow_html=True)
def init_log(): def init_log():
"""初始化日志配置""" """初始化日志配置"""
from loguru import logger from loguru import logger
@ -41,11 +43,11 @@ def init_log():
"torch.cuda.is_available()", "torch.cuda.is_available()",
"CUDA initialization" "CUDA initialization"
] ]
for msg in ignore_messages: for msg in ignore_messages:
if msg in record["message"]: if msg in record["message"]:
return "" return ""
file_path = record["file"].path file_path = record["file"].path
relative_path = os.path.relpath(file_path, config.root_dir) relative_path = os.path.relpath(file_path, config.root_dir)
record["file"].path = f"./{relative_path}" record["file"].path = f"./{relative_path}"
@ -74,6 +76,7 @@ def init_log():
filter=log_filter filter=log_filter
) )
def init_global_state(): def init_global_state():
"""初始化全局状态""" """初始化全局状态"""
if 'video_clip_json' not in st.session_state: if 'video_clip_json' not in st.session_state:
@ -85,6 +88,7 @@ def init_global_state():
if 'subclip_videos' not in st.session_state: if 'subclip_videos' not in st.session_state:
st.session_state['subclip_videos'] = {} st.session_state['subclip_videos'] = {}
def tr(key): def tr(key):
"""翻译函数""" """翻译函数"""
i18n_dir = os.path.join(os.path.dirname(__file__), "webui", "i18n") i18n_dir = os.path.join(os.path.dirname(__file__), "webui", "i18n")
@ -92,13 +96,14 @@ def tr(key):
loc = locales.get(st.session_state['ui_language'], {}) loc = locales.get(st.session_state['ui_language'], {})
return loc.get("Translation", {}).get(key, key) return loc.get("Translation", {}).get(key, key)
def render_generate_button(): def render_generate_button():
"""渲染生成按钮和处理逻辑""" """渲染生成按钮和处理逻辑"""
if st.button(tr("Generate Video"), use_container_width=True, type="primary"): if st.button(tr("Generate Video"), use_container_width=True, type="primary"):
try: try:
from app.services import task as tm from app.services import task as tm
import torch import torch
# 重置日志容器和记录 # 重置日志容器和记录
log_container = st.empty() log_container = st.empty()
log_records = [] log_records = []
@ -152,7 +157,7 @@ def render_generate_button():
video_files = result.get("videos", []) video_files = result.get("videos", [])
st.success(tr("视生成完成")) st.success(tr("视生成完成"))
try: try:
if video_files: if video_files:
player_cols = st.columns(len(video_files) * 2 + 1) player_cols = st.columns(len(video_files) * 2 + 1)
@ -167,15 +172,16 @@ def render_generate_button():
finally: finally:
PerformanceMonitor.cleanup_resources() PerformanceMonitor.cleanup_resources()
def main(): def main():
"""主函数""" """主函数"""
init_log() init_log()
init_global_state() init_global_state()
utils.init_resources() utils.init_resources()
st.title(f"NarratoAI :sunglasses:📽️") st.title(f"NarratoAI :sunglasses:📽️")
st.write(tr("Get Help")) st.write(tr("Get Help"))
# 渲染基础设置面板 # 渲染基础设置面板
basic_settings.render_basic_settings(tr) basic_settings.render_basic_settings(tr)
# 渲染合并设置 # 渲染合并设置
@ -192,12 +198,13 @@ def main():
subtitle_settings.render_subtitle_panel(tr) subtitle_settings.render_subtitle_panel(tr)
# 渲染系统设置面板 # 渲染系统设置面板
system_settings.render_system_panel(tr) system_settings.render_system_panel(tr)
# 渲染视频审查面板 # 渲染视频审查面板
review_settings.render_review_panel(tr) review_settings.render_review_panel(tr)
# 渲染生成按钮和处理逻辑 # 渲染生成按钮和处理逻辑
render_generate_button() render_generate_button()
if __name__ == "__main__": if __name__ == "__main__":
main() main()