NarratoAI/app/services/merger_video.py
viccy dc12f390bb feat: 新增原片字幕支持并优化视频合并流程
- 为VideoClipParams新增原字幕路径配置字段,支持单条/多条字幕路径
- 完善webui参数获取逻辑,处理字幕路径兼容性并对接前端选择
- 重构后端字幕处理流程,支持自动匹配视频对应原字幕,合并原声字幕
- 优化视频合并逻辑,新增ffmpeg无损copy合并判断,自动回退重编码提升效率
- 新增ffmpeg快速素材合并路径,支持自定义字幕样式与多音轨混合
- 新增多个单元测试覆盖字幕匹配、合并及视频合并场景
2026-06-08 13:05:30 +08:00

846 lines
32 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : merger_video
@Author : Viccy同学
@Date : 2025/5/6 下午7:38
'''
import os
import json
import shutil
import subprocess
from enum import Enum
from typing import List, Optional, Tuple
from loguru import logger
from app.utils import ffmpeg_utils
class VideoAspect(Enum):
"""视频宽高比枚举"""
landscape = "16:9" # 横屏 16:9
landscape_2 = "4:3"
portrait = "9:16" # 竖屏 9:16
portrait_2 = "3:4"
square = "1:1" # 方形 1:1
def to_resolution(self) -> Tuple[int, int]:
"""根据宽高比返回标准分辨率"""
if self == VideoAspect.portrait:
return 1080, 1920 # 竖屏 9:16
elif self == VideoAspect.portrait_2:
return 720, 1280 # 竖屏 4:3
elif self == VideoAspect.landscape:
return 1920, 1080 # 横屏 16:9
elif self == VideoAspect.landscape_2:
return 1280, 720 # 横屏 4:3
elif self == VideoAspect.square:
return 1080, 1080 # 方形 1:1
else:
return 1080, 1920 # 默认竖屏
def check_ffmpeg_installation() -> bool:
"""
检查ffmpeg是否已安装
Returns:
bool: 如果安装则返回True否则返回False
"""
try:
subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return True
except (subprocess.SubprocessError, FileNotFoundError):
logger.error("ffmpeg未安装或不在系统PATH中请安装ffmpeg")
return False
def get_hardware_acceleration_option() -> Optional[str]:
"""
根据系统环境选择合适的硬件加速选项
Returns:
Optional[str]: 硬件加速参数如果不支持则返回None
"""
# 使用新的硬件加速检测API
return ffmpeg_utils.get_ffmpeg_hwaccel_type()
def check_video_has_audio(video_path: str) -> bool:
"""
检查视频是否包含音频流
Args:
video_path: 视频文件路径
Returns:
bool: 如果视频包含音频流则返回True否则返回False
"""
if not os.path.exists(video_path):
logger.warning(f"视频文件不存在: {video_path}")
return False
probe_cmd = [
'ffprobe', '-v', 'error',
'-select_streams', 'a:0',
'-show_entries', 'stream=codec_type',
'-of', 'csv=p=0',
video_path
]
try:
result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
return result.stdout.strip() == 'audio'
except Exception as e:
logger.warning(f"检测视频音频流时出错: {str(e)}")
return False
def create_ffmpeg_concat_file(video_paths: List[str], concat_file_path: str) -> str:
"""
创建ffmpeg合并所需的concat文件
Args:
video_paths: 需要合并的视频文件路径列表
concat_file_path: concat文件的输出路径
Returns:
str: concat文件的路径
"""
with open(concat_file_path, 'w', encoding='utf-8') as f:
for video_path in video_paths:
# 获取绝对路径
abs_path = os.path.abspath(video_path)
# 在Windows上将反斜杠替换为正斜杠
if os.name == 'nt': # Windows系统
abs_path = abs_path.replace('\\', '/')
else: # Unix/Mac系统
# 转义特殊字符
abs_path = abs_path.replace('\\', '\\\\').replace(':', '\\:')
# 处理路径中的单引号 (如果有)
abs_path = abs_path.replace("'", "\\'")
f.write(f"file '{abs_path}'\n")
return concat_file_path
def _get_video_stream_signature(video_path: str) -> Optional[dict]:
"""
获取用于判断 concat copy 是否安全的视频流关键参数。
"""
probe_cmd = [
'ffprobe', '-v', 'error',
'-select_streams', 'v:0',
'-show_entries',
'stream=codec_name,profile,width,height,pix_fmt,r_frame_rate,avg_frame_rate,time_base,sample_aspect_ratio',
'-of', 'json',
video_path
]
try:
result = subprocess.run(
probe_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)
streams = json.loads(result.stdout or "{}").get("streams", [])
if not streams:
logger.warning(f"视频没有可用的视频流,不能使用 copy 合并: {video_path}")
return None
stream = streams[0]
return {
"codec_name": stream.get("codec_name"),
"profile": stream.get("profile"),
"width": stream.get("width"),
"height": stream.get("height"),
"pix_fmt": stream.get("pix_fmt"),
"r_frame_rate": stream.get("r_frame_rate"),
"avg_frame_rate": stream.get("avg_frame_rate"),
"time_base": stream.get("time_base"),
"sample_aspect_ratio": stream.get("sample_aspect_ratio", "1:1"),
}
except Exception as e:
logger.warning(f"探测视频流参数失败,不能使用 copy 合并: {video_path}, 错误: {str(e)}")
return None
def _can_concat_video_copy(video_paths: List[str]) -> bool:
"""
判断所有片段的视频流参数是否一致,避免 concat copy 造成时间轴或封装异常。
"""
if not video_paths:
return False
signatures = []
for video_path in video_paths:
signature = _get_video_stream_signature(video_path)
if not signature:
return False
signatures.append(signature)
base_signature = signatures[0]
for video_path, signature in zip(video_paths[1:], signatures[1:]):
if signature != base_signature:
logger.warning(
"视频片段参数不一致,跳过 copy 合并并回退重编码: "
f"{video_path}, 基准={base_signature}, 当前={signature}"
)
return False
return True
def _get_media_duration(video_path: str) -> Optional[float]:
probe_cmd = [
'ffprobe', '-v', 'error',
'-show_entries', 'format=duration',
'-of', 'csv=p=0',
video_path
]
try:
result = subprocess.run(
probe_cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=True
)
return float(result.stdout.strip())
except Exception as e:
logger.warning(f"探测视频时长失败: {video_path}, 错误: {str(e)}")
return None
def _concat_duration_matches(video_paths: List[str], output_path: str) -> bool:
input_durations = []
for video_path in video_paths:
duration = _get_media_duration(video_path)
if duration is None:
return False
input_durations.append(duration)
output_duration = _get_media_duration(output_path)
if output_duration is None:
return False
expected_duration = sum(input_durations)
diff = abs(expected_duration - output_duration)
tolerance = max(0.5, len(video_paths) * 0.04)
if diff > tolerance:
logger.warning(
"视频流 copy 合并后的时长偏差过大,将回退重编码: "
f"期望={expected_duration:.3f}s, 实际={output_duration:.3f}s, 偏差={diff:.3f}s"
)
return False
logger.info(
"视频流 copy 合并时长校验通过: "
f"期望={expected_duration:.3f}s, 实际={output_duration:.3f}s"
)
return True
def _build_concat_video_copy_cmd(concat_file: str, output_path: str) -> List[str]:
return [
'ffmpeg', '-y',
'-f', 'concat',
'-safe', '0',
'-i', concat_file,
'-c:v', 'copy',
'-an',
'-movflags', '+faststart',
'-avoid_negative_ts', 'make_zero',
output_path
]
def _build_concat_video_reencode_cmd(concat_file: str, output_path: str, threads: int) -> List[str]:
return [
'ffmpeg', '-y',
'-f', 'concat',
'-safe', '0',
'-i', concat_file,
'-c:v', 'libx264',
'-preset', 'medium',
'-profile:v', 'high',
'-an',
'-threads', str(threads),
output_path
]
def _concat_video_streams(
video_paths: List[str],
concat_file: str,
output_path: str,
threads: int
) -> None:
"""
优先使用无损 copy 合并视频流,失败时回退到原来的重编码合并。
"""
if _can_concat_video_copy(video_paths):
copy_cmd = _build_concat_video_copy_cmd(concat_file, output_path)
try:
subprocess.run(copy_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if _concat_duration_matches(video_paths, output_path):
logger.info("视频流 copy 合并完成")
return
if os.path.exists(output_path):
try:
os.remove(output_path)
except OSError as e:
logger.warning(f"删除 copy 合并临时结果失败,将继续尝试重编码覆盖: {str(e)}")
except subprocess.CalledProcessError as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.warning(f"视频流 copy 合并失败,将回退重编码合并: {error_msg}")
else:
logger.info("视频流不满足 copy 合并条件,将使用重编码合并")
reencode_cmd = _build_concat_video_reencode_cmd(concat_file, output_path, threads)
subprocess.run(reencode_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("视频流重编码合并完成")
def process_single_video(
input_path: str,
output_path: str,
target_width: int,
target_height: int,
keep_audio: bool = True,
hwaccel: Optional[str] = None
) -> str:
"""
处理单个视频:调整分辨率、帧率等
重要修复避免在视频滤镜处理时使用CUDA硬件解码
因为这会导致滤镜链格式转换错误。使用纯NVENC编码器获得最佳兼容性。
Args:
input_path: 输入视频路径
output_path: 输出视频路径
target_width: 目标宽度
target_height: 目标高度
keep_audio: 是否保留音频
hwaccel: 硬件加速选项
Returns:
str: 处理后的视频路径
"""
if not os.path.exists(input_path):
raise FileNotFoundError(f"找不到视频文件: {input_path}")
# 构建基本命令
command = ['ffmpeg', '-y']
# 安全检查如果在Windows上则慎用硬件加速
is_windows = os.name == 'nt'
if is_windows and hwaccel:
logger.info("在Windows系统上检测到硬件加速请求将进行额外的兼容性检查")
try:
# 对视频进行快速探测,检测其基本信息
probe_cmd = [
'ffprobe', '-v', 'error',
'-select_streams', 'v:0',
'-show_entries', 'stream=codec_name,width,height',
'-of', 'csv=p=0',
input_path
]
result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
# 如果探测成功,使用硬件加速;否则降级到软件编码
if result.returncode != 0:
logger.warning(f"视频探测失败,为安全起见,禁用硬件加速: {result.stderr}")
hwaccel = None
except Exception as e:
logger.warning(f"视频探测出错,禁用硬件加速: {str(e)}")
hwaccel = None
# 关键修复对于涉及滤镜处理的场景不使用CUDA硬件解码
# 这避免了 "Impossible to convert between the formats" 错误
# 我们将只使用纯NVENC编码器来获得硬件加速优势
# 输入文件(不添加硬件解码参数)
command.extend(['-i', input_path])
# 处理音频
if not keep_audio:
command.extend(['-an']) # 移除音频
else:
# 检查输入视频是否有音频流
has_audio = check_video_has_audio(input_path)
if has_audio:
command.extend(['-c:a', 'aac', '-b:a', '128k']) # 音频编码为AAC
else:
logger.warning(f"视频 {input_path} 没有音频流,将会忽略音频设置")
command.extend(['-an']) # 没有音频流时移除音频设置
# 视频处理参数:缩放并添加填充以保持比例
scale_filter = f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease"
pad_filter = f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2"
command.extend([
'-vf', f"{scale_filter},{pad_filter}",
'-r', '30', # 设置帧率为30fps
])
# 关键修复选择编码器时优先使用纯NVENC无硬件解码
if hwaccel:
try:
# 检查是否为NVIDIA硬件加速
hwaccel_info = ffmpeg_utils.detect_hardware_acceleration()
if hwaccel_info.get("type") in ["cuda", "nvenc"] and hwaccel_info.get("encoder") == "h264_nvenc":
# 使用纯NVENC编码器最佳兼容性
logger.info("使用纯NVENC编码器避免滤镜链问题")
command.extend(['-c:v', 'h264_nvenc'])
command.extend(['-preset', 'medium', '-cq', '23', '-profile:v', 'main'])
else:
# 其他硬件编码器
encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder()
# logger.info(f"使用硬件编码器: {encoder}")
command.extend(['-c:v', encoder])
# 根据编码器类型添加特定参数
if "amf" in encoder:
command.extend(['-quality', 'balanced'])
elif "qsv" in encoder:
command.extend(['-preset', 'medium'])
elif "videotoolbox" in encoder:
command.extend(['-profile:v', 'high'])
else:
command.extend(['-preset', 'medium', '-profile:v', 'high'])
except Exception as e:
logger.warning(f"硬件编码器检测失败: {str(e)},将使用软件编码")
hwaccel = None
if not hwaccel:
logger.info("使用软件编码器(libx264)")
command.extend(['-c:v', 'libx264', '-preset', 'medium', '-profile:v', 'high'])
# 设置视频比特率和其他参数
command.extend([
'-b:v', '5M',
'-maxrate', '8M',
'-bufsize', '10M',
'-pix_fmt', 'yuv420p', # 兼容性更好的颜色格式
])
# 输出文件
command.append(output_path)
# 执行命令
try:
# logger.info(f"执行FFmpeg命令: {' '.join(command)}")
process = subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# logger.info(f"视频处理成功: {output_path}")
return output_path
except subprocess.CalledProcessError as e:
error_msg = e.stderr.decode() if e.stderr else str(e)
logger.error(f"处理视频失败: {error_msg}")
# 如果使用硬件加速失败,尝试使用软件编码
if hwaccel:
logger.info("硬件加速失败,尝试使用软件编码作为备选方案")
try:
# 强制使用软件编码
ffmpeg_utils.force_software_encoding()
# 构建新的命令,使用软件编码
fallback_cmd = ['ffmpeg', '-y', '-i', input_path]
# 保持原有的音频设置
if not keep_audio:
fallback_cmd.extend(['-an'])
else:
has_audio = check_video_has_audio(input_path)
if has_audio:
fallback_cmd.extend(['-c:a', 'aac', '-b:a', '128k'])
else:
fallback_cmd.extend(['-an'])
# 保持原有的视频过滤器
fallback_cmd.extend([
'-vf', f"{scale_filter},{pad_filter}",
'-r', '30',
'-c:v', 'libx264',
'-preset', 'medium',
'-profile:v', 'high',
'-b:v', '5M',
'-maxrate', '8M',
'-bufsize', '10M',
'-pix_fmt', 'yuv420p',
output_path
])
logger.info("执行软件编码备选方案")
subprocess.run(fallback_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"使用软件编码成功处理视频: {output_path}")
return output_path
except subprocess.CalledProcessError as fallback_error:
fallback_error_msg = fallback_error.stderr.decode() if fallback_error.stderr else str(fallback_error)
logger.error(f"软件编码备选方案也失败: {fallback_error_msg}")
# 尝试最基本的编码参数
try:
logger.info("尝试最基本的编码参数")
basic_cmd = [
'ffmpeg', '-y', '-i', input_path,
'-c:v', 'libx264', '-preset', 'ultrafast',
'-crf', '23', '-pix_fmt', 'yuv420p',
output_path
]
subprocess.run(basic_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"使用基本编码参数成功处理视频: {output_path}")
return output_path
except subprocess.CalledProcessError as basic_error:
basic_error_msg = basic_error.stderr.decode() if basic_error.stderr else str(basic_error)
logger.error(f"基本编码参数也失败: {basic_error_msg}")
raise RuntimeError(f"无法处理视频 {input_path}: 所有编码方案都失败")
# 如果不是硬件加速导致的问题,或者备选方案也失败了,抛出原始错误
raise RuntimeError(f"处理视频失败: {error_msg}")
def combine_clip_videos(
output_video_path: str,
video_paths: List[str],
video_ost_list: List[int],
video_aspect: VideoAspect = VideoAspect.portrait,
threads: int = 4,
force_software_encoding: bool = False, # 新参数,强制使用软件编码
) -> str:
"""
合并子视频
Args:
output_video_path: 合并后的存储路径
video_paths: 子视频路径列表
video_ost_list: 原声播放列表 (0: 不保留原声, 1: 只保留原声, 2: 保留原声并保留解说)
video_aspect: 屏幕比例
threads: 线程数
force_software_encoding: 是否强制使用软件编码(忽略硬件加速检测)
Returns:
str: 合并后的视频路径
"""
# 检查ffmpeg是否安装
if not check_ffmpeg_installation():
raise RuntimeError("未找到ffmpeg请先安装")
# 准备输出目录
output_dir = os.path.dirname(output_video_path)
os.makedirs(output_dir, exist_ok=True)
# 获取目标分辨率
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
# 检测可用的硬件加速选项
hwaccel = None if force_software_encoding else get_hardware_acceleration_option()
if hwaccel:
logger.info(f"将使用 {hwaccel} 硬件加速")
elif force_software_encoding:
logger.info("已强制使用软件编码,跳过硬件加速检测")
else:
logger.info("未检测到兼容的硬件加速,将使用软件编码")
# Windows系统上默认使用软件编码以提高兼容性
if os.name == 'nt' and hwaccel:
logger.warning("在Windows系统上检测到硬件加速但为了提高兼容性建议使用软件编码")
# 不强制禁用hwaccel而是在process_single_video中进行额外安全检查
# 重组视频路径和原声设置为一个字典列表结构
video_segments = []
# 检查视频路径和原声设置列表长度是否匹配
if len(video_paths) != len(video_ost_list):
logger.warning(f"视频路径列表({len(video_paths)})和原声设置列表({len(video_ost_list)})长度不匹配")
# 调整长度以匹配较短的列表
min_length = min(len(video_paths), len(video_ost_list))
video_paths = video_paths[:min_length]
video_ost_list = video_ost_list[:min_length]
# 创建视频处理配置字典列表
for i, (video_path, video_ost) in enumerate(zip(video_paths, video_ost_list)):
if not os.path.exists(video_path):
logger.warning(f"视频不存在,跳过: {video_path}")
continue
# 检查是否有音频流
has_audio = check_video_has_audio(video_path)
# 构建视频片段配置
segment = {
"index": i,
"path": video_path,
"ost": video_ost,
"has_audio": has_audio,
"keep_audio": video_ost > 0 and has_audio # 只有当ost>0且实际有音频时才保留
}
# 记录日志
if video_ost > 0 and not has_audio:
logger.warning(f"视频 {video_path} 设置为保留原声(ost={video_ost}),但该视频没有音频流")
video_segments.append(segment)
# 处理每个视频片段
processed_videos = []
temp_dir = os.path.join(output_dir, "temp_videos")
os.makedirs(temp_dir, exist_ok=True)
try:
# 第一阶段:处理所有视频片段到中间文件
for segment in video_segments:
# 处理单个视频,去除或保留音频
temp_output = os.path.join(temp_dir, f"processed_{segment['index']}.mp4")
try:
process_single_video(
input_path=segment['path'],
output_path=temp_output,
target_width=video_width,
target_height=video_height,
keep_audio=segment['keep_audio'],
hwaccel=hwaccel
)
processed_videos.append({
"index": segment["index"],
"path": temp_output,
"keep_audio": segment["keep_audio"]
})
logger.info(f"视频 {segment['index'] + 1}/{len(video_segments)} 处理完成")
except Exception as e:
logger.error(f"处理视频 {segment['path']} 时出错: {str(e)}")
# 如果使用硬件加速失败,尝试使用软件编码
if hwaccel and not force_software_encoding:
logger.info(f"尝试使用软件编码处理视频 {segment['path']}")
try:
process_single_video(
input_path=segment['path'],
output_path=temp_output,
target_width=video_width,
target_height=video_height,
keep_audio=segment['keep_audio'],
hwaccel=None # 使用软件编码
)
processed_videos.append({
"index": segment["index"],
"path": temp_output,
"keep_audio": segment["keep_audio"]
})
logger.info(f"使用软件编码成功处理视频 {segment['index'] + 1}/{len(video_segments)}")
except Exception as fallback_error:
logger.error(f"使用软件编码处理视频 {segment['path']} 也失败: {str(fallback_error)}")
continue
else:
continue
if not processed_videos:
raise ValueError("没有有效的视频片段可以合并")
# 按原始索引排序处理后的视频
processed_videos.sort(key=lambda x: x["index"])
# 第二阶段:分步骤合并视频 - 避免复杂的filter_complex滤镜
try:
# 1. 首先,将所有没有音频的视频或音频被禁用的视频合并到一个临时文件中
video_paths_only = [video["path"] for video in processed_videos]
video_concat_path = os.path.join(temp_dir, "video_concat.mp4")
# 创建concat文件用于合并视频流
concat_file = os.path.join(temp_dir, "concat_list.txt")
create_ffmpeg_concat_file(video_paths_only, concat_file)
_concat_video_streams(video_paths_only, concat_file, video_concat_path, threads)
# 2. 提取并合并有音频的片段
audio_segments = [video for video in processed_videos if video["keep_audio"]]
if not audio_segments:
# 如果没有音频片段,直接使用无音频的合并视频作为最终结果
shutil.copy(video_concat_path, output_video_path)
logger.info("无音频视频合并完成")
return output_video_path
# 创建音频中间文件
audio_files = []
for i, segment in enumerate(audio_segments):
# 提取音频
audio_file = os.path.join(temp_dir, f"audio_{i}.aac")
extract_audio_cmd = [
'ffmpeg', '-y',
'-i', segment["path"],
'-vn', # 不包含视频
'-c:a', 'aac',
'-b:a', '128k',
audio_file
]
subprocess.run(extract_audio_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
audio_files.append({
"index": segment["index"],
"path": audio_file
})
logger.info(f"提取音频 {i+1}/{len(audio_segments)} 完成")
# 3. 计算每个音频片段的时间位置
audio_timings = []
current_time = 0.0
# 获取每个视频片段的时长
for i, video in enumerate(processed_videos):
duration_cmd = [
'ffprobe', '-v', 'error',
'-show_entries', 'format=duration',
'-of', 'csv=p=0',
video["path"]
]
result = subprocess.run(duration_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
duration = float(result.stdout.strip())
# 如果当前片段需要保留音频,记录时间位置
if video["keep_audio"]:
for audio in audio_files:
if audio["index"] == video["index"]:
audio_timings.append({
"file": audio["path"],
"start": current_time,
"index": video["index"]
})
break
current_time += duration
# 4. 创建静音音频轨道作为基础
silence_audio = os.path.join(temp_dir, "silence.aac")
create_silence_cmd = [
'ffmpeg', '-y',
'-f', 'lavfi',
'-i', f'anullsrc=r=44100:cl=stereo',
'-t', str(current_time), # 总时长
'-c:a', 'aac',
'-b:a', '128k',
silence_audio
]
subprocess.run(create_silence_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
# 5. 创建复杂滤镜命令以混合音频
filter_script = os.path.join(temp_dir, "filter_script.txt")
with open(filter_script, 'w') as f:
f.write(f"[0:a]volume=0.0[silence];\n") # 首先静音背景轨道
# 添加每个音频文件并补偿amix的音量稀释
# amix会将n个输入的音量平均分配所以我们需要将每个输入的音量提高n倍来保持原始音量
num_inputs = len(audio_timings) + 1 # +1 for silence track
volume_compensation = num_inputs # 补偿系数
for i, timing in enumerate(audio_timings):
# 为每个音频添加音量补偿,确保原声保持原始音量
f.write(f"[{i+1}:a]volume={volume_compensation},adelay={int(timing['start']*1000)}|{int(timing['start']*1000)}[a{i}];\n")
# 混合所有音频
mix_str = "[silence]"
for i in range(len(audio_timings)):
mix_str += f"[a{i}]"
mix_str += f"amix=inputs={len(audio_timings)+1}:duration=longest[aout]"
f.write(mix_str)
# 6. 构建音频合并命令
audio_inputs = ['-i', silence_audio]
for timing in audio_timings:
audio_inputs.extend(['-i', timing["file"]])
mixed_audio = os.path.join(temp_dir, "mixed_audio.aac")
audio_mix_cmd = [
'ffmpeg', '-y'
] + audio_inputs + [
'-filter_complex_script', filter_script,
'-map', '[aout]',
'-c:a', 'aac',
'-b:a', '128k',
mixed_audio
]
subprocess.run(audio_mix_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("音频混合完成")
# 7. 将合并的视频和混合的音频组合在一起
final_cmd = [
'ffmpeg', '-y',
'-i', video_concat_path,
'-i', mixed_audio,
'-c:v', 'copy',
'-c:a', 'aac',
'-map', '0:v:0',
'-map', '1:a:0',
'-shortest',
output_video_path
]
subprocess.run(final_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info("视频最终合并完成")
return output_video_path
except subprocess.CalledProcessError as e:
logger.error(f"合并视频过程中出错: {e.stderr.decode() if e.stderr else str(e)}")
# 尝试备用合并方法 - 最简单的无音频合并
logger.info("尝试备用合并方法 - 无音频合并")
try:
concat_file = os.path.join(temp_dir, "concat_list.txt")
video_paths_only = [video["path"] for video in processed_videos]
create_ffmpeg_concat_file(video_paths_only, concat_file)
backup_cmd = [
'ffmpeg', '-y',
'-f', 'concat',
'-safe', '0',
'-i', concat_file,
'-c:v', 'copy',
'-an', # 无音频
output_video_path
]
subprocess.run(backup_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.warning("使用备用方法(无音频)成功合并视频")
return output_video_path
except Exception as backup_error:
logger.error(f"备用合并方法也失败: {str(backup_error)}")
raise RuntimeError(f"无法合并视频: {str(backup_error)}")
except Exception as e:
logger.error(f"合并视频时出错: {str(e)}")
raise
finally:
# 清理临时文件
try:
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
logger.info("已清理临时文件")
except Exception as e:
logger.warning(f"清理临时文件时出错: {str(e)}")
if __name__ == '__main__':
video_paths = [
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E02_00_14_09_440.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_27_11_110.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_34_44_480.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E08_00_42_47_630.mp4',
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/S01E09_00_29_48_160.mp4'
]
combine_clip_videos(
output_video_path="/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/merged_123.mp4",
video_paths=video_paths,
video_ost_list=[1, 1, 1,1,1],
video_aspect=VideoAspect.portrait,
force_software_encoding=False # 默认不强制使用软件编码,让系统自动决定
)