Merge pull request #153 from linyqh/dev065

修复历史遗留 bug
This commit is contained in:
viccy 2025-07-03 00:04:13 +08:00 committed by GitHub
commit d3df2931c4
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 771 additions and 130 deletions

3
.gitignore vendored
View File

@ -34,3 +34,6 @@ resource/srt/*.srt
app/models/faster-whisper-large-v2/*
app/models/faster-whisper-large-v3/*
app/models/bert/*
bug清单.md
task.md

View File

@ -1,6 +1,6 @@
import warnings
from enum import Enum
from typing import Any, List, Optional
from typing import Any, List, Optional, Union
import pydantic
from pydantic import BaseModel, Field
@ -13,6 +13,24 @@ warnings.filterwarnings(
)
class AudioVolumeDefaults:
"""音量配置默认值常量类 - 确保全局一致性"""
# 语音音量默认值
VOICE_VOLUME = 1.0
TTS_VOLUME = 1.0
# 原声音量默认值 - 这是修复bug的关键
ORIGINAL_VOLUME = 0.7
# 背景音乐音量默认值
BGM_VOLUME = 0.3
# 音量范围
MIN_VOLUME = 0.0
MAX_VOLUME = 1.0
class VideoConcatMode(str, Enum):
random = "random"
sequential = "sequential"
@ -101,7 +119,7 @@ class VideoParams(BaseModel):
video_subject: str
video_script: str = "" # 用于生成视频的脚本
video_terms: Optional[str | list] = None # 用于生成视频的关键词
video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5
@ -113,11 +131,11 @@ class VideoParams(BaseModel):
video_language: Optional[str] = "" # auto detect
voice_name: Optional[str] = ""
voice_volume: Optional[float] = 1.0
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
voice_rate: Optional[float] = 1.0
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
@ -157,11 +175,11 @@ class AudioRequest(BaseModel):
video_script: str
video_language: Optional[str] = ""
voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
voice_volume: Optional[float] = 1.0
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
voice_rate: Optional[float] = 1.2
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
video_source: Optional[str] = "local"
@ -347,7 +365,7 @@ class VideoClipParams(BaseModel):
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量")
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
voice_rate: Optional[float] = Field(default=1.0, description="语速")
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
@ -367,9 +385,9 @@ class VideoClipParams(BaseModel):
n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
bgm_volume: Optional[float] = Field(default=0.3, description="背景音乐音量")
tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量")
bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量")
class VideoTranscriptionRequest(BaseModel):

View File

@ -24,6 +24,7 @@ from moviepy.video.tools.subtitles import SubtitlesClip
from PIL import ImageFont
from app.utils import utils
from app.models.schema import AudioVolumeDefaults
def merge_materials(
@ -58,6 +59,7 @@ def merge_materials(
- stroke_width: 描边宽度默认1
- threads: 处理线程数默认2
- fps: 输出帧率默认30
- subtitle_enabled: 是否启用字幕默认True
返回:
输出视频的路径
@ -66,11 +68,12 @@ def merge_materials(
if options is None:
options = {}
# 设置默认参数值
voice_volume = options.get('voice_volume', 1.0)
bgm_volume = options.get('bgm_volume', 0.3)
original_audio_volume = options.get('original_audio_volume', 0.0) # 默认为0即不保留原声
keep_original_audio = options.get('keep_original_audio', False) # 是否保留原声
# 设置默认参数值 - 使用统一的音量配置
voice_volume = options.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME)
bgm_volume = options.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME)
# 修复bug: 将原声音量默认值从0.0改为0.7,确保短剧解说模式下原片音量正常
original_audio_volume = options.get('original_audio_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
keep_original_audio = options.get('keep_original_audio', True) # 默认保留原声
subtitle_font = options.get('subtitle_font', '')
subtitle_font_size = options.get('subtitle_font_size', 40)
subtitle_color = options.get('subtitle_color', '#FFFFFF')
@ -81,11 +84,33 @@ def merge_materials(
stroke_width = options.get('stroke_width', 1)
threads = options.get('threads', 2)
fps = options.get('fps', 30)
subtitle_enabled = options.get('subtitle_enabled', True)
# 配置日志 - 便于调试问题
logger.info(f"音量配置详情:")
logger.info(f" - 配音音量: {voice_volume}")
logger.info(f" - 背景音乐音量: {bgm_volume}")
logger.info(f" - 原声音量: {original_audio_volume}")
logger.info(f" - 是否保留原声: {keep_original_audio}")
logger.info(f"字幕配置详情:")
logger.info(f" - 是否启用字幕: {subtitle_enabled}")
logger.info(f" - 字幕文件路径: {subtitle_path}")
# 音量参数验证
def validate_volume(volume, name):
if not (AudioVolumeDefaults.MIN_VOLUME <= volume <= AudioVolumeDefaults.MAX_VOLUME):
logger.warning(f"{name}音量 {volume} 超出有效范围 [{AudioVolumeDefaults.MIN_VOLUME}, {AudioVolumeDefaults.MAX_VOLUME}],将被限制")
return max(AudioVolumeDefaults.MIN_VOLUME, min(volume, AudioVolumeDefaults.MAX_VOLUME))
return volume
voice_volume = validate_volume(voice_volume, "配音")
bgm_volume = validate_volume(bgm_volume, "背景音乐")
original_audio_volume = validate_volume(original_audio_volume, "原声")
# 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值
if subtitle_bg_color == 'transparent':
subtitle_bg_color = None # None在新版MoviePy中表示透明背景
# 创建输出目录(如果不存在)
output_dir = os.path.dirname(output_path)
os.makedirs(output_dir, exist_ok=True)
@ -246,27 +271,34 @@ def merge_materials(
color=subtitle_color,
)
# 处理字幕
if subtitle_path and os.path.exists(subtitle_path):
# 处理字幕 - 修复字幕开关bug
if subtitle_enabled and subtitle_path and os.path.exists(subtitle_path):
logger.info("字幕已启用,开始处理字幕文件")
try:
# 加载字幕文件
sub = SubtitlesClip(
subtitles=subtitle_path,
encoding="utf-8",
subtitles=subtitle_path,
encoding="utf-8",
make_textclip=make_textclip
)
# 创建每个字幕片段
text_clips = []
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
text_clips.append(clip)
# 合成视频和字幕
video_clip = CompositeVideoClip([video_clip, *text_clips])
logger.info(f"已添加{len(text_clips)}个字幕片段")
except Exception as e:
logger.error(f"处理字幕失败: \n{traceback.format_exc()}")
elif not subtitle_enabled:
logger.info("字幕已禁用,跳过字幕处理")
elif not subtitle_path:
logger.info("未提供字幕文件路径,跳过字幕处理")
elif not os.path.exists(subtitle_path):
logger.warning(f"字幕文件不存在: {subtitle_path},跳过字幕处理")
# 导出最终视频
try:
@ -372,6 +404,7 @@ if __name__ == '__main__':
'bgm_volume': 0.1, # 背景音乐音量
'original_audio_volume': 1.0, # 视频原声音量0表示不保留
'keep_original_audio': True, # 是否保留原声
'subtitle_enabled': True, # 是否启用字幕 - 修复字幕开关bug
'subtitle_font': 'MicrosoftYaHeiNormal.ttc', # 这里使用相对字体路径,会自动在 font_dir() 目录下查找
'subtitle_font_size': 40,
'subtitle_color': '#FFFFFF',

View File

@ -402,18 +402,36 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> st
ffmpeg_start_time = start_str.replace(',', '.')
ffmpeg_end_time = end_str.replace(',', '.')
# 构建FFmpeg命令
# 构建FFmpeg命令 - 使用新的智能编码器选择
encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder()
ffmpeg_cmd = [
"ffmpeg", "-y", *hwaccel_args,
"-i", origin_video,
"-ss", ffmpeg_start_time,
"-to", ffmpeg_end_time,
"-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264",
"-c:v", encoder,
"-c:a", "aac",
"-strict", "experimental",
video_path
]
# 根据编码器类型添加特定参数
if "nvenc" in encoder:
ffmpeg_cmd.insert(-1, "-preset")
ffmpeg_cmd.insert(-1, "medium")
elif "videotoolbox" in encoder:
ffmpeg_cmd.insert(-1, "-profile:v")
ffmpeg_cmd.insert(-1, "high")
elif "qsv" in encoder:
ffmpeg_cmd.insert(-1, "-preset")
ffmpeg_cmd.insert(-1, "medium")
elif encoder == "libx264":
ffmpeg_cmd.insert(-1, "-preset")
ffmpeg_cmd.insert(-1, "medium")
ffmpeg_cmd.insert(-1, "-crf")
ffmpeg_cmd.insert(-1, "23")
# 执行FFmpeg命令
# logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}到{ffmpeg_end_time}")
# logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")

View File

@ -64,7 +64,7 @@ def get_hardware_acceleration_option() -> Optional[str]:
Returns:
Optional[str]: 硬件加速参数如果不支持则返回None
"""
# 使用集中式硬件加速检测
# 使用新的硬件加速检测API
return ffmpeg_utils.get_ffmpeg_hwaccel_type()
@ -178,14 +178,20 @@ def process_single_video(
logger.warning(f"视频探测出错,禁用硬件加速: {str(e)}")
hwaccel = None
# 添加硬件加速参数(根据前面的安全检查可能已经被禁用
# 添加硬件加速参数(使用新的智能检测机制
if hwaccel:
try:
# 使用集中式硬件加速参数
# 使用新的硬件加速检测API
hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args()
command.extend(hwaccel_args)
if hwaccel_args:
command.extend(hwaccel_args)
logger.debug(f"应用硬件加速参数: {hwaccel_args}")
else:
logger.info("硬件加速不可用,将使用软件编码")
hwaccel = False # 标记为不使用硬件加速
except Exception as e:
logger.warning(f"应用硬件加速参数时出错: {str(e)},将使用软件编码")
hwaccel = False # 标记为不使用硬件加速
# 重置命令,移除可能添加了一半的硬件加速参数
command = ['ffmpeg', '-y']
@ -212,41 +218,27 @@ def process_single_video(
'-r', '30', # 设置帧率为30fps
])
# 选择编码器 - 考虑到Windows和特定硬件的兼容性
use_software_encoder = True
# 选择编码器 - 使用新的智能编码器选择
encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder()
if hwaccel:
# 获取硬件加速类型和编码器信息
hwaccel_type = ffmpeg_utils.get_ffmpeg_hwaccel_type()
hwaccel_encoder = ffmpeg_utils.get_ffmpeg_hwaccel_encoder()
if hwaccel and encoder != "libx264":
logger.info(f"使用硬件编码器: {encoder}")
command.extend(['-c:v', encoder])
if hwaccel_type == 'cuda' or hwaccel_type == 'nvenc':
try:
# 检查NVENC编码器是否可用
encoders_cmd = subprocess.run(
["ffmpeg", "-hide_banner", "-encoders"],
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
)
if "h264_nvenc" in encoders_cmd.stdout.lower():
command.extend(['-c:v', 'h264_nvenc', '-preset', 'p4', '-profile:v', 'high'])
use_software_encoder = False
else:
logger.warning("NVENC编码器不可用将使用软件编码")
except Exception as e:
logger.warning(f"NVENC编码器检测失败: {str(e)},将使用软件编码")
elif hwaccel_type == 'qsv':
command.extend(['-c:v', 'h264_qsv', '-preset', 'medium'])
use_software_encoder = False
elif hwaccel_type == 'videotoolbox': # macOS
command.extend(['-c:v', 'h264_videotoolbox', '-profile:v', 'high'])
use_software_encoder = False
elif hwaccel_type == 'vaapi': # Linux VA-API
command.extend(['-c:v', 'h264_vaapi', '-profile', '100'])
use_software_encoder = False
# 如果前面的条件未能应用硬件编码器,使用软件编码
if use_software_encoder:
# 根据编码器类型添加特定参数
if "nvenc" in encoder:
command.extend(['-preset', 'p4', '-profile:v', 'high'])
elif "videotoolbox" in encoder:
command.extend(['-profile:v', 'high'])
elif "qsv" in encoder:
command.extend(['-preset', 'medium'])
elif "vaapi" in encoder:
command.extend(['-profile', '100'])
elif "amf" in encoder:
command.extend(['-quality', 'balanced'])
else:
command.extend(['-preset', 'medium', '-profile:v', 'high'])
else:
logger.info("使用软件编码器(libx264)")
command.extend(['-c:v', 'libx264', '-preset', 'medium', '-profile:v', 'high'])
@ -273,8 +265,11 @@ def process_single_video(
# 如果使用硬件加速失败,尝试使用软件编码
if hwaccel:
logger.info("尝试使用软件编码作为备选方案")
logger.info("硬件加速失败,尝试使用软件编码作为备选方案")
try:
# 强制使用软件编码
ffmpeg_utils.force_software_encoding()
# 构建新的命令,使用软件编码
fallback_cmd = ['ffmpeg', '-y', '-i', input_path]
@ -302,14 +297,30 @@ def process_single_video(
output_path
])
logger.info(f"执行备选FFmpeg命令: {' '.join(fallback_cmd)}")
logger.info("执行软件编码备选方案")
subprocess.run(fallback_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"使用软件编码成功处理视频: {output_path}")
return output_path
except subprocess.CalledProcessError as fallback_error:
fallback_error_msg = fallback_error.stderr.decode() if fallback_error.stderr else str(fallback_error)
logger.error(f"备选软件编码也失败: {fallback_error_msg}")
raise RuntimeError(f"无法处理视频 {input_path}: 硬件加速和软件编码都失败")
logger.error(f"软件编码备选方案也失败: {fallback_error_msg}")
# 尝试最基本的编码参数
try:
logger.info("尝试最基本的编码参数")
basic_cmd = [
'ffmpeg', '-y', '-i', input_path,
'-c:v', 'libx264', '-preset', 'ultrafast',
'-crf', '23', '-pix_fmt', 'yuv420p',
output_path
]
subprocess.run(basic_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
logger.info(f"使用基本编码参数成功处理视频: {output_path}")
return output_path
except subprocess.CalledProcessError as basic_error:
basic_error_msg = basic_error.stderr.decode() if basic_error.stderr else str(basic_error)
logger.error(f"基本编码参数也失败: {basic_error_msg}")
raise RuntimeError(f"无法处理视频 {input_path}: 所有编码方案都失败")
# 如果不是硬件加速导致的问题,或者备选方案也失败了,抛出原始错误
raise RuntimeError(f"处理视频失败: {error_msg}")

View File

@ -315,6 +315,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
'bgm_volume': params.bgm_volume, # 背景音乐音量
'original_audio_volume': params.original_volume, # 视频原声音量0表示不保留
'keep_original_audio': True, # 是否保留原声
'subtitle_enabled': params.subtitle_enabled, # 是否启用字幕 - 修复字幕开关bug
'subtitle_font': params.font_name, # 这里使用相对字体路径,会自动在 font_dir() 目录下查找
'subtitle_font_size': params.font_size,
'subtitle_color': params.text_fore_color,

View File

@ -205,7 +205,8 @@ def generate_video_v3(
bgm_path: Optional[str] = None,
narration_path: Optional[str] = None,
output_path: str = "output.mp4",
font_path: Optional[str] = None
font_path: Optional[str] = None,
subtitle_enabled: bool = True
) -> None:
"""
合并视频素材包括视频字幕BGM和解说音频
@ -220,6 +221,7 @@ def generate_video_v3(
- original: 原声音量0-1默认1.0
- bgm: BGM音量0-1默认0.3
- narration: 解说音量0-1默认1.0
subtitle_enabled: 是否启用字幕默认True
subtitle_style: 字幕样式配置字典可包含以下键
- font: 字体名称
- fontsize: 字体大小
@ -239,8 +241,8 @@ def generate_video_v3(
video = VideoFileClip(video_path)
subtitle_clips = []
# 处理字幕(如果提供)
if subtitle_path:
# 处理字幕(如果启用且提供)- 修复字幕开关bug
if subtitle_enabled and subtitle_path:
if os.path.exists(subtitle_path):
# 检查字体文件
if font_path and not os.path.exists(font_path):
@ -308,30 +310,45 @@ def generate_video_v3(
except Exception as e:
logger.info(f"警告:处理字幕文件时出错: {str(e)}")
else:
logger.info(f"提示:字幕文件不存在: {subtitle_path}")
logger.warning(f"字幕文件不存在: {subtitle_path}")
elif not subtitle_enabled:
logger.info("字幕已禁用,跳过字幕处理")
elif not subtitle_path:
logger.info("未提供字幕文件路径,跳过字幕处理")
# 合并音频
audio_clips = []
# 添加原声(设置音量)
logger.debug(f"音量配置: {volume_config}")
logger.info(f"音量配置详情: {volume_config}")
if video.audio is not None:
original_audio = video.audio.volumex(volume_config['original'])
original_volume = volume_config['original']
logger.info(f"应用原声音量: {original_volume}")
original_audio = video.audio.volumex(original_volume)
audio_clips.append(original_audio)
logger.info("原声音频已添加到合成列表")
else:
logger.warning("视频没有音轨,无法添加原声")
# 添加BGM如果提供
if bgm_path:
logger.info(f"添加背景音乐: {bgm_path}")
bgm = AudioFileClip(bgm_path)
if bgm.duration < video.duration:
bgm = loop_audio_clip(bgm, video.duration)
else:
bgm = bgm.subclip(0, video.duration)
bgm = bgm.volumex(volume_config['bgm'])
bgm_volume = volume_config['bgm']
logger.info(f"应用BGM音量: {bgm_volume}")
bgm = bgm.volumex(bgm_volume)
audio_clips.append(bgm)
# 添加解说音频(如果提供)
if narration_path:
narration = AudioFileClip(narration_path).volumex(volume_config['narration'])
logger.info(f"添加解说音频: {narration_path}")
narration_volume = volume_config['narration']
logger.info(f"应用解说音量: {narration_volume}")
narration = AudioFileClip(narration_path).volumex(narration_volume)
audio_clips.append(narration)
# 合成最终视频(包含字幕)
@ -342,18 +359,53 @@ def generate_video_v3(
final_video = video
if audio_clips:
logger.info(f"合成音频轨道,共 {len(audio_clips)} 个音频片段")
final_audio = CompositeAudioClip(audio_clips)
final_video = final_video.set_audio(final_audio)
logger.info("音频合成完成")
else:
logger.warning("没有音频轨道需要合成")
# 导出视频
logger.info("开始导出视频...") # 调试信息
final_video.write_videofile(
output_path,
codec='libx264',
audio_codec='aac',
fps=video.fps
)
logger.info(f"视频已导出到: {output_path}") # 调试信息
# 导出视频 - 使用优化的编码器
logger.info("开始导出视频...")
# 获取最优编码器
from app.utils import ffmpeg_utils
optimal_encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder()
# 根据编码器类型设置参数
ffmpeg_params = []
if "nvenc" in optimal_encoder:
ffmpeg_params = ['-preset', 'medium', '-profile:v', 'high']
elif "videotoolbox" in optimal_encoder:
ffmpeg_params = ['-profile:v', 'high']
elif "qsv" in optimal_encoder:
ffmpeg_params = ['-preset', 'medium']
elif "vaapi" in optimal_encoder:
ffmpeg_params = ['-profile', '100']
elif optimal_encoder == "libx264":
ffmpeg_params = ['-preset', 'medium', '-crf', '23']
try:
final_video.write_videofile(
output_path,
codec=optimal_encoder,
audio_codec='aac',
fps=video.fps,
ffmpeg_params=ffmpeg_params
)
logger.info(f"视频已导出到: {output_path} (使用编码器: {optimal_encoder})")
except Exception as e:
logger.warning(f"使用 {optimal_encoder} 编码器失败: {str(e)}, 尝试软件编码")
# 降级到软件编码
final_video.write_videofile(
output_path,
codec='libx264',
audio_codec='aac',
fps=video.fps,
ffmpeg_params=['-preset', 'medium', '-crf', '23']
)
logger.info(f"视频已导出到: {output_path} (使用软件编码)")
# 清理资源
video.close()

View File

@ -1,9 +1,11 @@
"""
FFmpeg 工具模块 - 提供 FFmpeg 相关的工具函数特别是硬件加速检测
优化多平台兼容性支持渐进式降级和智能错误处理
"""
import os
import platform
import subprocess
import tempfile
from typing import Dict, List, Optional, Tuple, Union
from loguru import logger
@ -14,9 +16,104 @@ _FFMPEG_HW_ACCEL_INFO = {
"encoder": None,
"hwaccel_args": [],
"message": "",
"is_dedicated_gpu": False
"is_dedicated_gpu": False,
"fallback_available": False, # 是否有备用方案
"fallback_encoder": None, # 备用编码器
"platform": None, # 平台信息
"gpu_vendor": None, # GPU厂商
"tested_methods": [] # 已测试的方法
}
# 硬件加速优先级配置按平台和GPU类型
HWACCEL_PRIORITY = {
"windows": {
"nvidia": ["cuda", "nvenc", "d3d11va", "dxva2"],
"amd": ["d3d11va", "dxva2", "amf"], # 不再完全禁用AMD
"intel": ["qsv", "d3d11va", "dxva2"],
"unknown": ["d3d11va", "dxva2"]
},
"darwin": {
"apple": ["videotoolbox"],
"nvidia": ["cuda", "videotoolbox"],
"amd": ["videotoolbox"],
"intel": ["videotoolbox"],
"unknown": ["videotoolbox"]
},
"linux": {
"nvidia": ["cuda", "nvenc", "vaapi"],
"amd": ["vaapi", "amf"],
"intel": ["qsv", "vaapi"],
"unknown": ["vaapi"]
}
}
# 编码器映射
ENCODER_MAPPING = {
"cuda": "h264_nvenc",
"nvenc": "h264_nvenc",
"videotoolbox": "h264_videotoolbox",
"qsv": "h264_qsv",
"vaapi": "h264_vaapi",
"amf": "h264_amf",
"d3d11va": "libx264", # D3D11VA只用于解码
"dxva2": "libx264", # DXVA2只用于解码
"software": "libx264"
}
def get_null_input() -> str:
"""
获取平台特定的空输入文件路径
Returns:
str: 平台特定的空输入路径
"""
system = platform.system().lower()
if system == "windows":
return "NUL"
else:
return "/dev/null"
def create_test_video() -> str:
"""
创建一个临时的测试视频文件用于硬件加速测试
Returns:
str: 临时测试视频文件路径
"""
try:
# 创建临时文件
temp_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
temp_path = temp_file.name
temp_file.close()
# 生成一个简单的测试视频1秒黑色画面
cmd = [
'ffmpeg', '-y', '-f', 'lavfi', '-i', 'color=black:size=320x240:duration=1',
'-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-t', '1', temp_path
]
subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return temp_path
except Exception as e:
logger.debug(f"创建测试视频失败: {str(e)}")
return get_null_input()
def cleanup_test_video(path: str) -> None:
"""
清理测试视频文件
Args:
path: 测试视频文件路径
"""
try:
if path != get_null_input() and os.path.exists(path):
os.unlink(path)
except Exception as e:
logger.debug(f"清理测试视频失败: {str(e)}")
def check_ffmpeg_installation() -> bool:
"""
@ -38,9 +135,123 @@ def check_ffmpeg_installation() -> bool:
return False
def detect_gpu_vendor() -> str:
"""
检测GPU厂商
Returns:
str: GPU厂商 (nvidia, amd, intel, apple, unknown)
"""
system = platform.system().lower()
try:
if system == "windows":
gpu_info = _get_windows_gpu_info().lower()
if 'nvidia' in gpu_info or 'geforce' in gpu_info or 'quadro' in gpu_info:
return "nvidia"
elif 'amd' in gpu_info or 'radeon' in gpu_info:
return "amd"
elif 'intel' in gpu_info:
return "intel"
elif system == "darwin":
# macOS上检查是否为Apple Silicon
if platform.machine().lower() in ['arm64', 'aarch64']:
return "apple"
else:
# Intel Mac可能有独立显卡
gpu_info = _get_macos_gpu_info().lower()
if 'nvidia' in gpu_info:
return "nvidia"
elif 'amd' in gpu_info or 'radeon' in gpu_info:
return "amd"
else:
return "intel"
elif system == "linux":
gpu_info = _get_linux_gpu_info().lower()
if 'nvidia' in gpu_info:
return "nvidia"
elif 'amd' in gpu_info or 'radeon' in gpu_info:
return "amd"
elif 'intel' in gpu_info:
return "intel"
except Exception as e:
logger.debug(f"检测GPU厂商失败: {str(e)}")
return "unknown"
def test_hwaccel_method(method: str, test_input: str) -> bool:
"""
测试特定的硬件加速方法
Args:
method: 硬件加速方法名称
test_input: 测试输入文件路径
Returns:
bool: 是否支持该方法
"""
try:
# 构建测试命令
cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error"]
# 添加硬件加速参数
if method == "cuda":
cmd.extend(["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"])
elif method == "nvenc":
cmd.extend(["-hwaccel", "cuda"])
elif method == "videotoolbox":
cmd.extend(["-hwaccel", "videotoolbox"])
elif method == "qsv":
cmd.extend(["-hwaccel", "qsv"])
elif method == "vaapi":
# 尝试找到VAAPI设备
render_device = _find_vaapi_device()
if render_device:
cmd.extend(["-hwaccel", "vaapi", "-vaapi_device", render_device])
else:
cmd.extend(["-hwaccel", "vaapi"])
elif method == "d3d11va":
cmd.extend(["-hwaccel", "d3d11va"])
elif method == "dxva2":
cmd.extend(["-hwaccel", "dxva2"])
elif method == "amf":
cmd.extend(["-hwaccel", "auto"]) # AMF通常通过auto检测
else:
return False
# 添加输入和输出
cmd.extend(["-i", test_input, "-f", "null", "-t", "0.1", "-"])
# 执行测试
result = subprocess.run(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
check=False,
timeout=10 # 10秒超时
)
success = result.returncode == 0
if success:
logger.debug(f"硬件加速方法 {method} 测试成功")
else:
logger.debug(f"硬件加速方法 {method} 测试失败: {result.stderr[:200]}")
return success
except subprocess.TimeoutExpired:
logger.debug(f"硬件加速方法 {method} 测试超时")
return False
except Exception as e:
logger.debug(f"硬件加速方法 {method} 测试异常: {str(e)}")
return False
def detect_hardware_acceleration() -> Dict[str, Union[bool, str, List[str], None]]:
"""
检测系统可用的硬件加速器并存储结果到全局变量
检测系统可用的硬件加速器使用渐进式检测和智能降级
Returns:
Dict: 包含硬件加速信息的字典
@ -56,45 +267,176 @@ def detect_hardware_acceleration() -> Dict[str, Union[bool, str, List[str], None
_FFMPEG_HW_ACCEL_INFO["message"] = "FFmpeg未安装或不在系统PATH中"
return _FFMPEG_HW_ACCEL_INFO
# 检测操作系统
# 检测平台和GPU信息
system = platform.system().lower()
logger.debug(f"检测硬件加速 - 操作系统: {system}")
gpu_vendor = detect_gpu_vendor()
_FFMPEG_HW_ACCEL_INFO["platform"] = system
_FFMPEG_HW_ACCEL_INFO["gpu_vendor"] = gpu_vendor
logger.info(f"检测硬件加速 - 平台: {system}, GPU厂商: {gpu_vendor}")
# 获取FFmpeg支持的硬件加速器列表
try:
# 在Windows系统上使用UTF-8编码
is_windows = os.name == 'nt'
if is_windows:
hwaccels_cmd = subprocess.run(
['ffmpeg', '-hide_banner', '-hwaccels'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8', text=True
)
else:
hwaccels_cmd = subprocess.run(
['ffmpeg', '-hide_banner', '-hwaccels'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
)
supported_hwaccels = hwaccels_cmd.stdout.lower()
hwaccels_cmd = subprocess.run(
['ffmpeg', '-hide_banner', '-hwaccels'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False
)
supported_hwaccels = hwaccels_cmd.stdout.lower() if hwaccels_cmd.returncode == 0 else ""
logger.debug(f"FFmpeg支持的硬件加速器: {supported_hwaccels}")
except Exception as e:
logger.error(f"获取FFmpeg硬件加速器列表失败: {str(e)}")
logger.warning(f"获取FFmpeg硬件加速器列表失败: {str(e)}")
supported_hwaccels = ""
# 根据操作系统检测不同的硬件加速器
if system == 'darwin': # macOS
_detect_macos_acceleration(supported_hwaccels)
elif system == 'windows': # Windows
_detect_windows_acceleration(supported_hwaccels)
elif system == 'linux': # Linux
_detect_linux_acceleration(supported_hwaccels)
else:
logger.warning(f"不支持的操作系统: {system}")
_FFMPEG_HW_ACCEL_INFO["message"] = f"不支持的操作系统: {system}"
# 创建测试输入
test_input = create_test_video()
# 记录检测结果已经在启动时输出,这里不再重复输出
try:
# 根据平台和GPU厂商获取优先级列表
priority_list = HWACCEL_PRIORITY.get(system, {}).get(gpu_vendor, [])
if not priority_list:
priority_list = HWACCEL_PRIORITY.get(system, {}).get("unknown", [])
logger.debug(f"硬件加速测试优先级: {priority_list}")
# 按优先级测试硬件加速方法
for method in priority_list:
# 检查FFmpeg是否支持该方法
if method not in supported_hwaccels and method != "nvenc": # nvenc可能不在hwaccels列表中
logger.debug(f"跳过不支持的硬件加速方法: {method}")
continue
_FFMPEG_HW_ACCEL_INFO["tested_methods"].append(method)
if test_hwaccel_method(method, test_input):
# 找到可用的硬件加速方法
_FFMPEG_HW_ACCEL_INFO["available"] = True
_FFMPEG_HW_ACCEL_INFO["type"] = method
_FFMPEG_HW_ACCEL_INFO["encoder"] = ENCODER_MAPPING.get(method, "libx264")
# 构建硬件加速参数
if method == "cuda":
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"]
elif method == "nvenc":
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "cuda"]
elif method == "videotoolbox":
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "videotoolbox"]
elif method == "qsv":
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "qsv"]
elif method == "vaapi":
render_device = _find_vaapi_device()
if render_device:
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "vaapi", "-vaapi_device", render_device]
else:
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "vaapi"]
elif method in ["d3d11va", "dxva2"]:
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", method]
elif method == "amf":
_FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "auto"]
# 判断是否为独立GPU
_FFMPEG_HW_ACCEL_INFO["is_dedicated_gpu"] = gpu_vendor in ["nvidia", "amd"] or (gpu_vendor == "intel" and "arc" in _get_gpu_info().lower())
_FFMPEG_HW_ACCEL_INFO["message"] = f"使用 {method} 硬件加速 ({gpu_vendor} GPU)"
logger.info(f"硬件加速检测成功: {method} ({gpu_vendor})")
break
# 如果没有找到硬件加速,设置软件编码作为备用
if not _FFMPEG_HW_ACCEL_INFO["available"]:
_FFMPEG_HW_ACCEL_INFO["fallback_available"] = True
_FFMPEG_HW_ACCEL_INFO["fallback_encoder"] = "libx264"
_FFMPEG_HW_ACCEL_INFO["message"] = f"未找到可用的硬件加速,将使用软件编码 (平台: {system}, GPU: {gpu_vendor})"
logger.info("未检测到硬件加速,将使用软件编码")
finally:
# 清理测试文件
cleanup_test_video(test_input)
return _FFMPEG_HW_ACCEL_INFO
def _get_gpu_info() -> str:
"""
获取GPU信息的统一接口
Returns:
str: GPU信息字符串
"""
system = platform.system().lower()
if system == "windows":
return _get_windows_gpu_info()
elif system == "darwin":
return _get_macos_gpu_info()
elif system == "linux":
return _get_linux_gpu_info()
else:
return "unknown"
def _get_macos_gpu_info() -> str:
"""
获取macOS系统的GPU信息
Returns:
str: GPU信息字符串
"""
try:
# 使用system_profiler获取显卡信息
result = subprocess.run(
['system_profiler', 'SPDisplaysDataType'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False
)
if result.returncode == 0:
return result.stdout
# 备用方法检查是否为Apple Silicon
if platform.machine().lower() in ['arm64', 'aarch64']:
return "Apple Silicon GPU"
else:
return "Intel Mac GPU"
except Exception as e:
logger.debug(f"获取macOS GPU信息失败: {str(e)}")
return "unknown"
def _find_vaapi_device() -> Optional[str]:
"""
查找可用的VAAPI设备
Returns:
Optional[str]: VAAPI设备路径如果没有找到则返回None
"""
try:
# 常见的VAAPI设备路径
possible_devices = [
"/dev/dri/renderD128",
"/dev/dri/renderD129",
"/dev/dri/card0",
"/dev/dri/card1"
]
for device in possible_devices:
if os.path.exists(device):
# 测试设备是否可用
test_cmd = subprocess.run(
["ffmpeg", "-hide_banner", "-loglevel", "error",
"-hwaccel", "vaapi", "-vaapi_device", device,
"-f", "lavfi", "-i", "color=black:size=64x64:duration=0.1",
"-f", "null", "-"],
stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False
)
if test_cmd.returncode == 0:
logger.debug(f"找到可用的VAAPI设备: {device}")
return device
logger.debug("未找到可用的VAAPI设备")
return None
except Exception as e:
logger.debug(f"查找VAAPI设备失败: {str(e)}")
return None
def _detect_macos_acceleration(supported_hwaccels: str) -> None:
"""
检测macOS系统的硬件加速
@ -511,3 +853,165 @@ def is_dedicated_gpu() -> bool:
detect_hardware_acceleration()
return _FFMPEG_HW_ACCEL_INFO["is_dedicated_gpu"]
def get_optimal_ffmpeg_encoder() -> str:
"""
获取最优的FFmpeg编码器
Returns:
str: 编码器名称
"""
# 如果还没有检测过,先进行检测
if _FFMPEG_HW_ACCEL_INFO["type"] is None:
detect_hardware_acceleration()
if _FFMPEG_HW_ACCEL_INFO["available"]:
return _FFMPEG_HW_ACCEL_INFO["encoder"]
elif _FFMPEG_HW_ACCEL_INFO["fallback_available"]:
return _FFMPEG_HW_ACCEL_INFO["fallback_encoder"]
else:
return "libx264" # 默认软件编码器
def get_ffmpeg_command_with_hwaccel(input_path: str, output_path: str, **kwargs) -> List[str]:
"""
生成带有硬件加速的FFmpeg命令
Args:
input_path: 输入文件路径
output_path: 输出文件路径
**kwargs: 其他FFmpeg参数
Returns:
List[str]: FFmpeg命令列表
"""
# 如果还没有检测过,先进行检测
if _FFMPEG_HW_ACCEL_INFO["type"] is None:
detect_hardware_acceleration()
cmd = ["ffmpeg", "-y"]
# 添加硬件加速参数
if _FFMPEG_HW_ACCEL_INFO["available"]:
cmd.extend(_FFMPEG_HW_ACCEL_INFO["hwaccel_args"])
# 添加输入文件
cmd.extend(["-i", input_path])
# 添加编码器
encoder = get_optimal_ffmpeg_encoder()
cmd.extend(["-c:v", encoder])
# 添加其他参数
for key, value in kwargs.items():
if key.startswith("_"): # 跳过内部参数
continue
if isinstance(value, list):
cmd.extend(value)
else:
cmd.extend([f"-{key}", str(value)])
# 添加输出文件
cmd.append(output_path)
return cmd
def test_ffmpeg_compatibility() -> Dict[str, any]:
"""
测试FFmpeg兼容性并返回详细报告
Returns:
Dict: 兼容性测试报告
"""
report = {
"ffmpeg_installed": False,
"platform": platform.system().lower(),
"gpu_vendor": "unknown",
"hardware_acceleration": {
"available": False,
"type": None,
"encoder": None,
"tested_methods": []
},
"software_fallback": {
"available": False,
"encoder": "libx264"
},
"recommendations": []
}
# 检查FFmpeg安装
report["ffmpeg_installed"] = check_ffmpeg_installation()
if not report["ffmpeg_installed"]:
report["recommendations"].append("请安装FFmpeg并确保其在系统PATH中")
return report
# 检测硬件加速
hwaccel_info = detect_hardware_acceleration()
report["gpu_vendor"] = hwaccel_info.get("gpu_vendor", "unknown")
report["hardware_acceleration"]["available"] = hwaccel_info.get("available", False)
report["hardware_acceleration"]["type"] = hwaccel_info.get("type")
report["hardware_acceleration"]["encoder"] = hwaccel_info.get("encoder")
report["hardware_acceleration"]["tested_methods"] = hwaccel_info.get("tested_methods", [])
# 检查软件备用方案
report["software_fallback"]["available"] = hwaccel_info.get("fallback_available", True)
report["software_fallback"]["encoder"] = hwaccel_info.get("fallback_encoder", "libx264")
# 生成建议
if not report["hardware_acceleration"]["available"]:
if report["gpu_vendor"] == "nvidia":
report["recommendations"].append("建议安装NVIDIA驱动和CUDA工具包以启用硬件加速")
elif report["gpu_vendor"] == "amd":
report["recommendations"].append("AMD显卡硬件加速支持有限建议使用软件编码")
elif report["gpu_vendor"] == "intel":
report["recommendations"].append("建议更新Intel显卡驱动以启用QSV硬件加速")
else:
report["recommendations"].append("未检测到支持的GPU将使用软件编码")
return report
def force_software_encoding() -> None:
"""
强制使用软件编码禁用硬件加速
"""
global _FFMPEG_HW_ACCEL_INFO
_FFMPEG_HW_ACCEL_INFO.update({
"available": False,
"type": "software",
"encoder": "libx264",
"hwaccel_args": [],
"message": "强制使用软件编码",
"is_dedicated_gpu": False,
"fallback_available": True,
"fallback_encoder": "libx264"
})
logger.info("已强制切换到软件编码模式")
def reset_hwaccel_detection() -> None:
"""
重置硬件加速检测结果强制重新检测
"""
global _FFMPEG_HW_ACCEL_INFO
_FFMPEG_HW_ACCEL_INFO = {
"available": False,
"type": None,
"encoder": None,
"hwaccel_args": [],
"message": "",
"is_dedicated_gpu": False,
"fallback_available": False,
"fallback_encoder": None,
"platform": None,
"gpu_vendor": None,
"tested_methods": []
}
logger.info("已重置硬件加速检测结果")

View File

@ -1,5 +1,5 @@
[app]
project_version="0.6.2"
project_version="0.6.5"
# 支持视频理解的大模型提供商
# gemini (谷歌, 需要 VPN)
# siliconflow (硅基流动)

View File

@ -1 +1 @@
0.6.3
0.6.5

View File

@ -3,6 +3,7 @@ import os
from uuid import uuid4
from app.config import config
from app.services import voice
from app.models.schema import AudioVolumeDefaults
from app.utils import utils
from webui.utils.cache import get_songs_cache
@ -94,12 +95,12 @@ def render_azure_v2_settings(tr):
def render_voice_parameters(tr):
"""渲染语音参数设置"""
# 音量
# 音量 - 使用统一的默认值
voice_volume = st.slider(
tr("Speech Volume"),
min_value=0.0,
max_value=1.0,
value=1.0,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.VOICE_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -187,12 +188,12 @@ def render_bgm_settings(tr):
if custom_bgm_file and os.path.exists(custom_bgm_file):
st.session_state['bgm_file'] = custom_bgm_file
# 背景音乐音量
# 背景音乐音量 - 使用统一的默认值
bgm_volume = st.slider(
tr("Background Music Volume"),
min_value=0.0,
max_value=1.0,
value=0.3,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.BGM_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -203,10 +204,10 @@ def get_audio_params():
"""获取音频参数"""
return {
'voice_name': config.ui.get("voice_name", ""),
'voice_volume': st.session_state.get('voice_volume', 1.0),
'voice_volume': st.session_state.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME),
'voice_rate': st.session_state.get('voice_rate', 1.0),
'voice_pitch': st.session_state.get('voice_pitch', 1.0),
'bgm_type': st.session_state.get('bgm_type', 'random'),
'bgm_file': st.session_state.get('bgm_file', ''),
'bgm_volume': st.session_state.get('bgm_volume', 0.3),
'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME),
}

View File

@ -1,5 +1,5 @@
import streamlit as st
from app.models.schema import VideoClipParams, VideoAspect
from app.models.schema import VideoClipParams, VideoAspect, AudioVolumeDefaults
def render_video_panel(tr):
@ -41,12 +41,12 @@ def render_video_config(tr, params):
)
st.session_state['video_quality'] = video_qualities[quality_index][1]
# 原声音量
# 原声音量 - 使用统一的默认值
params.original_volume = st.slider(
tr("Original Volume"),
min_value=0.0,
max_value=1.0,
value=0.7,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.ORIGINAL_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -58,5 +58,5 @@ def get_video_params():
return {
'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value),
'video_quality': st.session_state.get('video_quality', '1080p'),
'original_volume': st.session_state.get('original_volume', 0.7)
'original_volume': st.session_state.get('original_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
}