diff --git a/.gitignore b/.gitignore index 8096610..f3c7489 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ resource/srt/*.srt app/models/faster-whisper-large-v2/* app/models/faster-whisper-large-v3/* app/models/bert/* + +bug清单.md +task.md \ No newline at end of file diff --git a/app/models/schema.py b/app/models/schema.py index ddf0ad1..b059b36 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -1,6 +1,6 @@ import warnings from enum import Enum -from typing import Any, List, Optional +from typing import Any, List, Optional, Union import pydantic from pydantic import BaseModel, Field @@ -13,6 +13,24 @@ warnings.filterwarnings( ) +class AudioVolumeDefaults: + """音量配置默认值常量类 - 确保全局一致性""" + + # 语音音量默认值 + VOICE_VOLUME = 1.0 + TTS_VOLUME = 1.0 + + # 原声音量默认值 - 这是修复bug的关键 + ORIGINAL_VOLUME = 0.7 + + # 背景音乐音量默认值 + BGM_VOLUME = 0.3 + + # 音量范围 + MIN_VOLUME = 0.0 + MAX_VOLUME = 1.0 + + class VideoConcatMode(str, Enum): random = "random" sequential = "sequential" @@ -101,7 +119,7 @@ class VideoParams(BaseModel): video_subject: str video_script: str = "" # 用于生成视频的脚本 - video_terms: Optional[str | list] = None # 用于生成视频的关键词 + video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词 video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value video_clip_duration: Optional[int] = 5 @@ -113,11 +131,11 @@ class VideoParams(BaseModel): video_language: Optional[str] = "" # auto detect voice_name: Optional[str] = "" - voice_volume: Optional[float] = 1.0 + voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME voice_rate: Optional[float] = 1.0 bgm_type: Optional[str] = "random" bgm_file: Optional[str] = "" - bgm_volume: Optional[float] = 0.2 + bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME subtitle_enabled: Optional[bool] = True subtitle_position: Optional[str] = "bottom" # top, bottom, center @@ -157,11 +175,11 @@ class AudioRequest(BaseModel): video_script: str video_language: Optional[str] = "" voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female" - voice_volume: Optional[float] = 1.0 + voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME voice_rate: Optional[float] = 1.2 bgm_type: Optional[str] = "random" bgm_file: Optional[str] = "" - bgm_volume: Optional[float] = 0.2 + bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME video_source: Optional[str] = "local" @@ -347,7 +365,7 @@ class VideoClipParams(BaseModel): # video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称") - voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量") + voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量") voice_rate: Optional[float] = Field(default=1.0, description="语速") voice_pitch: Optional[float] = Field(default=1.0, description="语调") @@ -367,9 +385,9 @@ class VideoClipParams(BaseModel): n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度 - tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)") - original_volume: Optional[float] = Field(default=1.0, description="视频原声音量") - bgm_volume: Optional[float] = Field(default=0.3, description="背景音乐音量") + tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)") + original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量") + bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量") class VideoTranscriptionRequest(BaseModel): diff --git a/app/services/generate_video.py b/app/services/generate_video.py index f125c05..2313aeb 100644 --- a/app/services/generate_video.py +++ b/app/services/generate_video.py @@ -24,6 +24,7 @@ from moviepy.video.tools.subtitles import SubtitlesClip from PIL import ImageFont from app.utils import utils +from app.models.schema import AudioVolumeDefaults def merge_materials( @@ -58,6 +59,7 @@ def merge_materials( - stroke_width: 描边宽度,默认1 - threads: 处理线程数,默认2 - fps: 输出帧率,默认30 + - subtitle_enabled: 是否启用字幕,默认True 返回: 输出视频的路径 @@ -66,11 +68,12 @@ def merge_materials( if options is None: options = {} - # 设置默认参数值 - voice_volume = options.get('voice_volume', 1.0) - bgm_volume = options.get('bgm_volume', 0.3) - original_audio_volume = options.get('original_audio_volume', 0.0) # 默认为0,即不保留原声 - keep_original_audio = options.get('keep_original_audio', False) # 是否保留原声 + # 设置默认参数值 - 使用统一的音量配置 + voice_volume = options.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME) + bgm_volume = options.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME) + # 修复bug: 将原声音量默认值从0.0改为0.7,确保短剧解说模式下原片音量正常 + original_audio_volume = options.get('original_audio_volume', AudioVolumeDefaults.ORIGINAL_VOLUME) + keep_original_audio = options.get('keep_original_audio', True) # 默认保留原声 subtitle_font = options.get('subtitle_font', '') subtitle_font_size = options.get('subtitle_font_size', 40) subtitle_color = options.get('subtitle_color', '#FFFFFF') @@ -81,11 +84,33 @@ def merge_materials( stroke_width = options.get('stroke_width', 1) threads = options.get('threads', 2) fps = options.get('fps', 30) - + subtitle_enabled = options.get('subtitle_enabled', True) + + # 配置日志 - 便于调试问题 + logger.info(f"音量配置详情:") + logger.info(f" - 配音音量: {voice_volume}") + logger.info(f" - 背景音乐音量: {bgm_volume}") + logger.info(f" - 原声音量: {original_audio_volume}") + logger.info(f" - 是否保留原声: {keep_original_audio}") + logger.info(f"字幕配置详情:") + logger.info(f" - 是否启用字幕: {subtitle_enabled}") + logger.info(f" - 字幕文件路径: {subtitle_path}") + + # 音量参数验证 + def validate_volume(volume, name): + if not (AudioVolumeDefaults.MIN_VOLUME <= volume <= AudioVolumeDefaults.MAX_VOLUME): + logger.warning(f"{name}音量 {volume} 超出有效范围 [{AudioVolumeDefaults.MIN_VOLUME}, {AudioVolumeDefaults.MAX_VOLUME}],将被限制") + return max(AudioVolumeDefaults.MIN_VOLUME, min(volume, AudioVolumeDefaults.MAX_VOLUME)) + return volume + + voice_volume = validate_volume(voice_volume, "配音") + bgm_volume = validate_volume(bgm_volume, "背景音乐") + original_audio_volume = validate_volume(original_audio_volume, "原声") + # 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值 if subtitle_bg_color == 'transparent': subtitle_bg_color = None # None在新版MoviePy中表示透明背景 - + # 创建输出目录(如果不存在) output_dir = os.path.dirname(output_path) os.makedirs(output_dir, exist_ok=True) @@ -246,27 +271,34 @@ def merge_materials( color=subtitle_color, ) - # 处理字幕 - if subtitle_path and os.path.exists(subtitle_path): + # 处理字幕 - 修复字幕开关bug + if subtitle_enabled and subtitle_path and os.path.exists(subtitle_path): + logger.info("字幕已启用,开始处理字幕文件") try: # 加载字幕文件 sub = SubtitlesClip( - subtitles=subtitle_path, - encoding="utf-8", + subtitles=subtitle_path, + encoding="utf-8", make_textclip=make_textclip ) - + # 创建每个字幕片段 text_clips = [] for item in sub.subtitles: clip = create_text_clip(subtitle_item=item) text_clips.append(clip) - + # 合成视频和字幕 video_clip = CompositeVideoClip([video_clip, *text_clips]) logger.info(f"已添加{len(text_clips)}个字幕片段") except Exception as e: logger.error(f"处理字幕失败: \n{traceback.format_exc()}") + elif not subtitle_enabled: + logger.info("字幕已禁用,跳过字幕处理") + elif not subtitle_path: + logger.info("未提供字幕文件路径,跳过字幕处理") + elif not os.path.exists(subtitle_path): + logger.warning(f"字幕文件不存在: {subtitle_path},跳过字幕处理") # 导出最终视频 try: @@ -372,6 +404,7 @@ if __name__ == '__main__': 'bgm_volume': 0.1, # 背景音乐音量 'original_audio_volume': 1.0, # 视频原声音量,0表示不保留 'keep_original_audio': True, # 是否保留原声 + 'subtitle_enabled': True, # 是否启用字幕 - 修复字幕开关bug 'subtitle_font': 'MicrosoftYaHeiNormal.ttc', # 这里使用相对字体路径,会自动在 font_dir() 目录下查找 'subtitle_font_size': 40, 'subtitle_color': '#FFFFFF', diff --git a/app/services/material.py b/app/services/material.py index 9a3c289..d63d04c 100644 --- a/app/services/material.py +++ b/app/services/material.py @@ -402,18 +402,36 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> st ffmpeg_start_time = start_str.replace(',', '.') ffmpeg_end_time = end_str.replace(',', '.') - # 构建FFmpeg命令 + # 构建FFmpeg命令 - 使用新的智能编码器选择 + encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder() + ffmpeg_cmd = [ "ffmpeg", "-y", *hwaccel_args, "-i", origin_video, "-ss", ffmpeg_start_time, "-to", ffmpeg_end_time, - "-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264", + "-c:v", encoder, "-c:a", "aac", "-strict", "experimental", video_path ] + # 根据编码器类型添加特定参数 + if "nvenc" in encoder: + ffmpeg_cmd.insert(-1, "-preset") + ffmpeg_cmd.insert(-1, "medium") + elif "videotoolbox" in encoder: + ffmpeg_cmd.insert(-1, "-profile:v") + ffmpeg_cmd.insert(-1, "high") + elif "qsv" in encoder: + ffmpeg_cmd.insert(-1, "-preset") + ffmpeg_cmd.insert(-1, "medium") + elif encoder == "libx264": + ffmpeg_cmd.insert(-1, "-preset") + ffmpeg_cmd.insert(-1, "medium") + ffmpeg_cmd.insert(-1, "-crf") + ffmpeg_cmd.insert(-1, "23") + # 执行FFmpeg命令 # logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}到{ffmpeg_end_time}") # logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}") diff --git a/app/services/merger_video.py b/app/services/merger_video.py index 6d688bf..026a47c 100644 --- a/app/services/merger_video.py +++ b/app/services/merger_video.py @@ -64,7 +64,7 @@ def get_hardware_acceleration_option() -> Optional[str]: Returns: Optional[str]: 硬件加速参数,如果不支持则返回None """ - # 使用集中式硬件加速检测 + # 使用新的硬件加速检测API return ffmpeg_utils.get_ffmpeg_hwaccel_type() @@ -178,14 +178,20 @@ def process_single_video( logger.warning(f"视频探测出错,禁用硬件加速: {str(e)}") hwaccel = None - # 添加硬件加速参数(根据前面的安全检查可能已经被禁用) + # 添加硬件加速参数(使用新的智能检测机制) if hwaccel: try: - # 使用集中式硬件加速参数 + # 使用新的硬件加速检测API hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args() - command.extend(hwaccel_args) + if hwaccel_args: + command.extend(hwaccel_args) + logger.debug(f"应用硬件加速参数: {hwaccel_args}") + else: + logger.info("硬件加速不可用,将使用软件编码") + hwaccel = False # 标记为不使用硬件加速 except Exception as e: logger.warning(f"应用硬件加速参数时出错: {str(e)},将使用软件编码") + hwaccel = False # 标记为不使用硬件加速 # 重置命令,移除可能添加了一半的硬件加速参数 command = ['ffmpeg', '-y'] @@ -212,41 +218,27 @@ def process_single_video( '-r', '30', # 设置帧率为30fps ]) - # 选择编码器 - 考虑到Windows和特定硬件的兼容性 - use_software_encoder = True + # 选择编码器 - 使用新的智能编码器选择 + encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder() - if hwaccel: - # 获取硬件加速类型和编码器信息 - hwaccel_type = ffmpeg_utils.get_ffmpeg_hwaccel_type() - hwaccel_encoder = ffmpeg_utils.get_ffmpeg_hwaccel_encoder() + if hwaccel and encoder != "libx264": + logger.info(f"使用硬件编码器: {encoder}") + command.extend(['-c:v', encoder]) - if hwaccel_type == 'cuda' or hwaccel_type == 'nvenc': - try: - # 检查NVENC编码器是否可用 - encoders_cmd = subprocess.run( - ["ffmpeg", "-hide_banner", "-encoders"], - stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False - ) - - if "h264_nvenc" in encoders_cmd.stdout.lower(): - command.extend(['-c:v', 'h264_nvenc', '-preset', 'p4', '-profile:v', 'high']) - use_software_encoder = False - else: - logger.warning("NVENC编码器不可用,将使用软件编码") - except Exception as e: - logger.warning(f"NVENC编码器检测失败: {str(e)},将使用软件编码") - elif hwaccel_type == 'qsv': - command.extend(['-c:v', 'h264_qsv', '-preset', 'medium']) - use_software_encoder = False - elif hwaccel_type == 'videotoolbox': # macOS - command.extend(['-c:v', 'h264_videotoolbox', '-profile:v', 'high']) - use_software_encoder = False - elif hwaccel_type == 'vaapi': # Linux VA-API - command.extend(['-c:v', 'h264_vaapi', '-profile', '100']) - use_software_encoder = False - - # 如果前面的条件未能应用硬件编码器,使用软件编码 - if use_software_encoder: + # 根据编码器类型添加特定参数 + if "nvenc" in encoder: + command.extend(['-preset', 'p4', '-profile:v', 'high']) + elif "videotoolbox" in encoder: + command.extend(['-profile:v', 'high']) + elif "qsv" in encoder: + command.extend(['-preset', 'medium']) + elif "vaapi" in encoder: + command.extend(['-profile', '100']) + elif "amf" in encoder: + command.extend(['-quality', 'balanced']) + else: + command.extend(['-preset', 'medium', '-profile:v', 'high']) + else: logger.info("使用软件编码器(libx264)") command.extend(['-c:v', 'libx264', '-preset', 'medium', '-profile:v', 'high']) @@ -273,8 +265,11 @@ def process_single_video( # 如果使用硬件加速失败,尝试使用软件编码 if hwaccel: - logger.info("尝试使用软件编码作为备选方案") + logger.info("硬件加速失败,尝试使用软件编码作为备选方案") try: + # 强制使用软件编码 + ffmpeg_utils.force_software_encoding() + # 构建新的命令,使用软件编码 fallback_cmd = ['ffmpeg', '-y', '-i', input_path] @@ -302,14 +297,30 @@ def process_single_video( output_path ]) - logger.info(f"执行备选FFmpeg命令: {' '.join(fallback_cmd)}") + logger.info("执行软件编码备选方案") subprocess.run(fallback_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) logger.info(f"使用软件编码成功处理视频: {output_path}") return output_path except subprocess.CalledProcessError as fallback_error: fallback_error_msg = fallback_error.stderr.decode() if fallback_error.stderr else str(fallback_error) - logger.error(f"备选软件编码也失败: {fallback_error_msg}") - raise RuntimeError(f"无法处理视频 {input_path}: 硬件加速和软件编码都失败") + logger.error(f"软件编码备选方案也失败: {fallback_error_msg}") + + # 尝试最基本的编码参数 + try: + logger.info("尝试最基本的编码参数") + basic_cmd = [ + 'ffmpeg', '-y', '-i', input_path, + '-c:v', 'libx264', '-preset', 'ultrafast', + '-crf', '23', '-pix_fmt', 'yuv420p', + output_path + ] + subprocess.run(basic_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + logger.info(f"使用基本编码参数成功处理视频: {output_path}") + return output_path + except subprocess.CalledProcessError as basic_error: + basic_error_msg = basic_error.stderr.decode() if basic_error.stderr else str(basic_error) + logger.error(f"基本编码参数也失败: {basic_error_msg}") + raise RuntimeError(f"无法处理视频 {input_path}: 所有编码方案都失败") # 如果不是硬件加速导致的问题,或者备选方案也失败了,抛出原始错误 raise RuntimeError(f"处理视频失败: {error_msg}") diff --git a/app/services/task.py b/app/services/task.py index c257d39..60deabe 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -315,6 +315,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di 'bgm_volume': params.bgm_volume, # 背景音乐音量 'original_audio_volume': params.original_volume, # 视频原声音量,0表示不保留 'keep_original_audio': True, # 是否保留原声 + 'subtitle_enabled': params.subtitle_enabled, # 是否启用字幕 - 修复字幕开关bug 'subtitle_font': params.font_name, # 这里使用相对字体路径,会自动在 font_dir() 目录下查找 'subtitle_font_size': params.font_size, 'subtitle_color': params.text_fore_color, diff --git a/app/services/video.py b/app/services/video.py index 087dbdf..d9a364f 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -205,7 +205,8 @@ def generate_video_v3( bgm_path: Optional[str] = None, narration_path: Optional[str] = None, output_path: str = "output.mp4", - font_path: Optional[str] = None + font_path: Optional[str] = None, + subtitle_enabled: bool = True ) -> None: """ 合并视频素材,包括视频、字幕、BGM和解说音频 @@ -220,6 +221,7 @@ def generate_video_v3( - original: 原声音量(0-1),默认1.0 - bgm: BGM音量(0-1),默认0.3 - narration: 解说音量(0-1),默认1.0 + subtitle_enabled: 是否启用字幕,默认True subtitle_style: 字幕样式配置字典,可包含以下键: - font: 字体名称 - fontsize: 字体大小 @@ -239,8 +241,8 @@ def generate_video_v3( video = VideoFileClip(video_path) subtitle_clips = [] - # 处理字幕(如果提供) - if subtitle_path: + # 处理字幕(如果启用且提供)- 修复字幕开关bug + if subtitle_enabled and subtitle_path: if os.path.exists(subtitle_path): # 检查字体文件 if font_path and not os.path.exists(font_path): @@ -308,30 +310,45 @@ def generate_video_v3( except Exception as e: logger.info(f"警告:处理字幕文件时出错: {str(e)}") else: - logger.info(f"提示:字幕文件不存在: {subtitle_path}") + logger.warning(f"字幕文件不存在: {subtitle_path}") + elif not subtitle_enabled: + logger.info("字幕已禁用,跳过字幕处理") + elif not subtitle_path: + logger.info("未提供字幕文件路径,跳过字幕处理") # 合并音频 audio_clips = [] # 添加原声(设置音量) - logger.debug(f"音量配置: {volume_config}") + logger.info(f"音量配置详情: {volume_config}") if video.audio is not None: - original_audio = video.audio.volumex(volume_config['original']) + original_volume = volume_config['original'] + logger.info(f"应用原声音量: {original_volume}") + original_audio = video.audio.volumex(original_volume) audio_clips.append(original_audio) + logger.info("原声音频已添加到合成列表") + else: + logger.warning("视频没有音轨,无法添加原声") # 添加BGM(如果提供) if bgm_path: + logger.info(f"添加背景音乐: {bgm_path}") bgm = AudioFileClip(bgm_path) if bgm.duration < video.duration: bgm = loop_audio_clip(bgm, video.duration) else: bgm = bgm.subclip(0, video.duration) - bgm = bgm.volumex(volume_config['bgm']) + bgm_volume = volume_config['bgm'] + logger.info(f"应用BGM音量: {bgm_volume}") + bgm = bgm.volumex(bgm_volume) audio_clips.append(bgm) # 添加解说音频(如果提供) if narration_path: - narration = AudioFileClip(narration_path).volumex(volume_config['narration']) + logger.info(f"添加解说音频: {narration_path}") + narration_volume = volume_config['narration'] + logger.info(f"应用解说音量: {narration_volume}") + narration = AudioFileClip(narration_path).volumex(narration_volume) audio_clips.append(narration) # 合成最终视频(包含字幕) @@ -342,18 +359,53 @@ def generate_video_v3( final_video = video if audio_clips: + logger.info(f"合成音频轨道,共 {len(audio_clips)} 个音频片段") final_audio = CompositeAudioClip(audio_clips) final_video = final_video.set_audio(final_audio) + logger.info("音频合成完成") + else: + logger.warning("没有音频轨道需要合成") - # 导出视频 - logger.info("开始导出视频...") # 调试信息 - final_video.write_videofile( - output_path, - codec='libx264', - audio_codec='aac', - fps=video.fps - ) - logger.info(f"视频已导出到: {output_path}") # 调试信息 + # 导出视频 - 使用优化的编码器 + logger.info("开始导出视频...") + + # 获取最优编码器 + from app.utils import ffmpeg_utils + optimal_encoder = ffmpeg_utils.get_optimal_ffmpeg_encoder() + + # 根据编码器类型设置参数 + ffmpeg_params = [] + if "nvenc" in optimal_encoder: + ffmpeg_params = ['-preset', 'medium', '-profile:v', 'high'] + elif "videotoolbox" in optimal_encoder: + ffmpeg_params = ['-profile:v', 'high'] + elif "qsv" in optimal_encoder: + ffmpeg_params = ['-preset', 'medium'] + elif "vaapi" in optimal_encoder: + ffmpeg_params = ['-profile', '100'] + elif optimal_encoder == "libx264": + ffmpeg_params = ['-preset', 'medium', '-crf', '23'] + + try: + final_video.write_videofile( + output_path, + codec=optimal_encoder, + audio_codec='aac', + fps=video.fps, + ffmpeg_params=ffmpeg_params + ) + logger.info(f"视频已导出到: {output_path} (使用编码器: {optimal_encoder})") + except Exception as e: + logger.warning(f"使用 {optimal_encoder} 编码器失败: {str(e)}, 尝试软件编码") + # 降级到软件编码 + final_video.write_videofile( + output_path, + codec='libx264', + audio_codec='aac', + fps=video.fps, + ffmpeg_params=['-preset', 'medium', '-crf', '23'] + ) + logger.info(f"视频已导出到: {output_path} (使用软件编码)") # 清理资源 video.close() diff --git a/app/utils/ffmpeg_utils.py b/app/utils/ffmpeg_utils.py index 58ae83d..538af7a 100644 --- a/app/utils/ffmpeg_utils.py +++ b/app/utils/ffmpeg_utils.py @@ -1,9 +1,11 @@ """ FFmpeg 工具模块 - 提供 FFmpeg 相关的工具函数,特别是硬件加速检测 +优化多平台兼容性,支持渐进式降级和智能错误处理 """ import os import platform import subprocess +import tempfile from typing import Dict, List, Optional, Tuple, Union from loguru import logger @@ -14,9 +16,104 @@ _FFMPEG_HW_ACCEL_INFO = { "encoder": None, "hwaccel_args": [], "message": "", - "is_dedicated_gpu": False + "is_dedicated_gpu": False, + "fallback_available": False, # 是否有备用方案 + "fallback_encoder": None, # 备用编码器 + "platform": None, # 平台信息 + "gpu_vendor": None, # GPU厂商 + "tested_methods": [] # 已测试的方法 } +# 硬件加速优先级配置(按平台和GPU类型) +HWACCEL_PRIORITY = { + "windows": { + "nvidia": ["cuda", "nvenc", "d3d11va", "dxva2"], + "amd": ["d3d11va", "dxva2", "amf"], # 不再完全禁用AMD + "intel": ["qsv", "d3d11va", "dxva2"], + "unknown": ["d3d11va", "dxva2"] + }, + "darwin": { + "apple": ["videotoolbox"], + "nvidia": ["cuda", "videotoolbox"], + "amd": ["videotoolbox"], + "intel": ["videotoolbox"], + "unknown": ["videotoolbox"] + }, + "linux": { + "nvidia": ["cuda", "nvenc", "vaapi"], + "amd": ["vaapi", "amf"], + "intel": ["qsv", "vaapi"], + "unknown": ["vaapi"] + } +} + +# 编码器映射 +ENCODER_MAPPING = { + "cuda": "h264_nvenc", + "nvenc": "h264_nvenc", + "videotoolbox": "h264_videotoolbox", + "qsv": "h264_qsv", + "vaapi": "h264_vaapi", + "amf": "h264_amf", + "d3d11va": "libx264", # D3D11VA只用于解码 + "dxva2": "libx264", # DXVA2只用于解码 + "software": "libx264" +} + + +def get_null_input() -> str: + """ + 获取平台特定的空输入文件路径 + + Returns: + str: 平台特定的空输入路径 + """ + system = platform.system().lower() + if system == "windows": + return "NUL" + else: + return "/dev/null" + + +def create_test_video() -> str: + """ + 创建一个临时的测试视频文件,用于硬件加速测试 + + Returns: + str: 临时测试视频文件路径 + """ + try: + # 创建临时文件 + temp_file = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False) + temp_path = temp_file.name + temp_file.close() + + # 生成一个简单的测试视频(1秒,黑色画面) + cmd = [ + 'ffmpeg', '-y', '-f', 'lavfi', '-i', 'color=black:size=320x240:duration=1', + '-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-t', '1', temp_path + ] + + subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) + return temp_path + except Exception as e: + logger.debug(f"创建测试视频失败: {str(e)}") + return get_null_input() + + +def cleanup_test_video(path: str) -> None: + """ + 清理测试视频文件 + + Args: + path: 测试视频文件路径 + """ + try: + if path != get_null_input() and os.path.exists(path): + os.unlink(path) + except Exception as e: + logger.debug(f"清理测试视频失败: {str(e)}") + def check_ffmpeg_installation() -> bool: """ @@ -38,9 +135,123 @@ def check_ffmpeg_installation() -> bool: return False +def detect_gpu_vendor() -> str: + """ + 检测GPU厂商 + + Returns: + str: GPU厂商 (nvidia, amd, intel, apple, unknown) + """ + system = platform.system().lower() + + try: + if system == "windows": + gpu_info = _get_windows_gpu_info().lower() + if 'nvidia' in gpu_info or 'geforce' in gpu_info or 'quadro' in gpu_info: + return "nvidia" + elif 'amd' in gpu_info or 'radeon' in gpu_info: + return "amd" + elif 'intel' in gpu_info: + return "intel" + elif system == "darwin": + # macOS上检查是否为Apple Silicon + if platform.machine().lower() in ['arm64', 'aarch64']: + return "apple" + else: + # Intel Mac,可能有独立显卡 + gpu_info = _get_macos_gpu_info().lower() + if 'nvidia' in gpu_info: + return "nvidia" + elif 'amd' in gpu_info or 'radeon' in gpu_info: + return "amd" + else: + return "intel" + elif system == "linux": + gpu_info = _get_linux_gpu_info().lower() + if 'nvidia' in gpu_info: + return "nvidia" + elif 'amd' in gpu_info or 'radeon' in gpu_info: + return "amd" + elif 'intel' in gpu_info: + return "intel" + except Exception as e: + logger.debug(f"检测GPU厂商失败: {str(e)}") + + return "unknown" + + +def test_hwaccel_method(method: str, test_input: str) -> bool: + """ + 测试特定的硬件加速方法 + + Args: + method: 硬件加速方法名称 + test_input: 测试输入文件路径 + + Returns: + bool: 是否支持该方法 + """ + try: + # 构建测试命令 + cmd = ["ffmpeg", "-hide_banner", "-loglevel", "error"] + + # 添加硬件加速参数 + if method == "cuda": + cmd.extend(["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"]) + elif method == "nvenc": + cmd.extend(["-hwaccel", "cuda"]) + elif method == "videotoolbox": + cmd.extend(["-hwaccel", "videotoolbox"]) + elif method == "qsv": + cmd.extend(["-hwaccel", "qsv"]) + elif method == "vaapi": + # 尝试找到VAAPI设备 + render_device = _find_vaapi_device() + if render_device: + cmd.extend(["-hwaccel", "vaapi", "-vaapi_device", render_device]) + else: + cmd.extend(["-hwaccel", "vaapi"]) + elif method == "d3d11va": + cmd.extend(["-hwaccel", "d3d11va"]) + elif method == "dxva2": + cmd.extend(["-hwaccel", "dxva2"]) + elif method == "amf": + cmd.extend(["-hwaccel", "auto"]) # AMF通常通过auto检测 + else: + return False + + # 添加输入和输出 + cmd.extend(["-i", test_input, "-f", "null", "-t", "0.1", "-"]) + + # 执行测试 + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + check=False, + timeout=10 # 10秒超时 + ) + + success = result.returncode == 0 + if success: + logger.debug(f"硬件加速方法 {method} 测试成功") + else: + logger.debug(f"硬件加速方法 {method} 测试失败: {result.stderr[:200]}") + + return success + + except subprocess.TimeoutExpired: + logger.debug(f"硬件加速方法 {method} 测试超时") + return False + except Exception as e: + logger.debug(f"硬件加速方法 {method} 测试异常: {str(e)}") + return False + + def detect_hardware_acceleration() -> Dict[str, Union[bool, str, List[str], None]]: """ - 检测系统可用的硬件加速器,并存储结果到全局变量 + 检测系统可用的硬件加速器,使用渐进式检测和智能降级 Returns: Dict: 包含硬件加速信息的字典 @@ -56,45 +267,176 @@ def detect_hardware_acceleration() -> Dict[str, Union[bool, str, List[str], None _FFMPEG_HW_ACCEL_INFO["message"] = "FFmpeg未安装或不在系统PATH中" return _FFMPEG_HW_ACCEL_INFO - # 检测操作系统 + # 检测平台和GPU信息 system = platform.system().lower() - logger.debug(f"检测硬件加速 - 操作系统: {system}") + gpu_vendor = detect_gpu_vendor() + + _FFMPEG_HW_ACCEL_INFO["platform"] = system + _FFMPEG_HW_ACCEL_INFO["gpu_vendor"] = gpu_vendor + + logger.info(f"检测硬件加速 - 平台: {system}, GPU厂商: {gpu_vendor}") # 获取FFmpeg支持的硬件加速器列表 try: - # 在Windows系统上使用UTF-8编码 - is_windows = os.name == 'nt' - if is_windows: - hwaccels_cmd = subprocess.run( - ['ffmpeg', '-hide_banner', '-hwaccels'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf-8', text=True - ) - else: - hwaccels_cmd = subprocess.run( - ['ffmpeg', '-hide_banner', '-hwaccels'], - stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True - ) - supported_hwaccels = hwaccels_cmd.stdout.lower() + hwaccels_cmd = subprocess.run( + ['ffmpeg', '-hide_banner', '-hwaccels'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False + ) + supported_hwaccels = hwaccels_cmd.stdout.lower() if hwaccels_cmd.returncode == 0 else "" + logger.debug(f"FFmpeg支持的硬件加速器: {supported_hwaccels}") except Exception as e: - logger.error(f"获取FFmpeg硬件加速器列表失败: {str(e)}") + logger.warning(f"获取FFmpeg硬件加速器列表失败: {str(e)}") supported_hwaccels = "" - # 根据操作系统检测不同的硬件加速器 - if system == 'darwin': # macOS - _detect_macos_acceleration(supported_hwaccels) - elif system == 'windows': # Windows - _detect_windows_acceleration(supported_hwaccels) - elif system == 'linux': # Linux - _detect_linux_acceleration(supported_hwaccels) - else: - logger.warning(f"不支持的操作系统: {system}") - _FFMPEG_HW_ACCEL_INFO["message"] = f"不支持的操作系统: {system}" + # 创建测试输入 + test_input = create_test_video() - # 记录检测结果已经在启动时输出,这里不再重复输出 + try: + # 根据平台和GPU厂商获取优先级列表 + priority_list = HWACCEL_PRIORITY.get(system, {}).get(gpu_vendor, []) + if not priority_list: + priority_list = HWACCEL_PRIORITY.get(system, {}).get("unknown", []) + + logger.debug(f"硬件加速测试优先级: {priority_list}") + + # 按优先级测试硬件加速方法 + for method in priority_list: + # 检查FFmpeg是否支持该方法 + if method not in supported_hwaccels and method != "nvenc": # nvenc可能不在hwaccels列表中 + logger.debug(f"跳过不支持的硬件加速方法: {method}") + continue + + _FFMPEG_HW_ACCEL_INFO["tested_methods"].append(method) + + if test_hwaccel_method(method, test_input): + # 找到可用的硬件加速方法 + _FFMPEG_HW_ACCEL_INFO["available"] = True + _FFMPEG_HW_ACCEL_INFO["type"] = method + _FFMPEG_HW_ACCEL_INFO["encoder"] = ENCODER_MAPPING.get(method, "libx264") + + # 构建硬件加速参数 + if method == "cuda": + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "cuda", "-hwaccel_output_format", "cuda"] + elif method == "nvenc": + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "cuda"] + elif method == "videotoolbox": + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "videotoolbox"] + elif method == "qsv": + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "qsv"] + elif method == "vaapi": + render_device = _find_vaapi_device() + if render_device: + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "vaapi", "-vaapi_device", render_device] + else: + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "vaapi"] + elif method in ["d3d11va", "dxva2"]: + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", method] + elif method == "amf": + _FFMPEG_HW_ACCEL_INFO["hwaccel_args"] = ["-hwaccel", "auto"] + + # 判断是否为独立GPU + _FFMPEG_HW_ACCEL_INFO["is_dedicated_gpu"] = gpu_vendor in ["nvidia", "amd"] or (gpu_vendor == "intel" and "arc" in _get_gpu_info().lower()) + + _FFMPEG_HW_ACCEL_INFO["message"] = f"使用 {method} 硬件加速 ({gpu_vendor} GPU)" + logger.info(f"硬件加速检测成功: {method} ({gpu_vendor})") + break + + # 如果没有找到硬件加速,设置软件编码作为备用 + if not _FFMPEG_HW_ACCEL_INFO["available"]: + _FFMPEG_HW_ACCEL_INFO["fallback_available"] = True + _FFMPEG_HW_ACCEL_INFO["fallback_encoder"] = "libx264" + _FFMPEG_HW_ACCEL_INFO["message"] = f"未找到可用的硬件加速,将使用软件编码 (平台: {system}, GPU: {gpu_vendor})" + logger.info("未检测到硬件加速,将使用软件编码") + + finally: + # 清理测试文件 + cleanup_test_video(test_input) return _FFMPEG_HW_ACCEL_INFO +def _get_gpu_info() -> str: + """ + 获取GPU信息的统一接口 + + Returns: + str: GPU信息字符串 + """ + system = platform.system().lower() + + if system == "windows": + return _get_windows_gpu_info() + elif system == "darwin": + return _get_macos_gpu_info() + elif system == "linux": + return _get_linux_gpu_info() + else: + return "unknown" + + +def _get_macos_gpu_info() -> str: + """ + 获取macOS系统的GPU信息 + + Returns: + str: GPU信息字符串 + """ + try: + # 使用system_profiler获取显卡信息 + result = subprocess.run( + ['system_profiler', 'SPDisplaysDataType'], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False + ) + if result.returncode == 0: + return result.stdout + + # 备用方法:检查是否为Apple Silicon + if platform.machine().lower() in ['arm64', 'aarch64']: + return "Apple Silicon GPU" + else: + return "Intel Mac GPU" + except Exception as e: + logger.debug(f"获取macOS GPU信息失败: {str(e)}") + return "unknown" + + +def _find_vaapi_device() -> Optional[str]: + """ + 查找可用的VAAPI设备 + + Returns: + Optional[str]: VAAPI设备路径,如果没有找到则返回None + """ + try: + # 常见的VAAPI设备路径 + possible_devices = [ + "/dev/dri/renderD128", + "/dev/dri/renderD129", + "/dev/dri/card0", + "/dev/dri/card1" + ] + + for device in possible_devices: + if os.path.exists(device): + # 测试设备是否可用 + test_cmd = subprocess.run( + ["ffmpeg", "-hide_banner", "-loglevel", "error", + "-hwaccel", "vaapi", "-vaapi_device", device, + "-f", "lavfi", "-i", "color=black:size=64x64:duration=0.1", + "-f", "null", "-"], + stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=False + ) + if test_cmd.returncode == 0: + logger.debug(f"找到可用的VAAPI设备: {device}") + return device + + logger.debug("未找到可用的VAAPI设备") + return None + except Exception as e: + logger.debug(f"查找VAAPI设备失败: {str(e)}") + return None + + def _detect_macos_acceleration(supported_hwaccels: str) -> None: """ 检测macOS系统的硬件加速 @@ -511,3 +853,165 @@ def is_dedicated_gpu() -> bool: detect_hardware_acceleration() return _FFMPEG_HW_ACCEL_INFO["is_dedicated_gpu"] + + +def get_optimal_ffmpeg_encoder() -> str: + """ + 获取最优的FFmpeg编码器 + + Returns: + str: 编码器名称 + """ + # 如果还没有检测过,先进行检测 + if _FFMPEG_HW_ACCEL_INFO["type"] is None: + detect_hardware_acceleration() + + if _FFMPEG_HW_ACCEL_INFO["available"]: + return _FFMPEG_HW_ACCEL_INFO["encoder"] + elif _FFMPEG_HW_ACCEL_INFO["fallback_available"]: + return _FFMPEG_HW_ACCEL_INFO["fallback_encoder"] + else: + return "libx264" # 默认软件编码器 + + +def get_ffmpeg_command_with_hwaccel(input_path: str, output_path: str, **kwargs) -> List[str]: + """ + 生成带有硬件加速的FFmpeg命令 + + Args: + input_path: 输入文件路径 + output_path: 输出文件路径 + **kwargs: 其他FFmpeg参数 + + Returns: + List[str]: FFmpeg命令列表 + """ + # 如果还没有检测过,先进行检测 + if _FFMPEG_HW_ACCEL_INFO["type"] is None: + detect_hardware_acceleration() + + cmd = ["ffmpeg", "-y"] + + # 添加硬件加速参数 + if _FFMPEG_HW_ACCEL_INFO["available"]: + cmd.extend(_FFMPEG_HW_ACCEL_INFO["hwaccel_args"]) + + # 添加输入文件 + cmd.extend(["-i", input_path]) + + # 添加编码器 + encoder = get_optimal_ffmpeg_encoder() + cmd.extend(["-c:v", encoder]) + + # 添加其他参数 + for key, value in kwargs.items(): + if key.startswith("_"): # 跳过内部参数 + continue + if isinstance(value, list): + cmd.extend(value) + else: + cmd.extend([f"-{key}", str(value)]) + + # 添加输出文件 + cmd.append(output_path) + + return cmd + + +def test_ffmpeg_compatibility() -> Dict[str, any]: + """ + 测试FFmpeg兼容性并返回详细报告 + + Returns: + Dict: 兼容性测试报告 + """ + report = { + "ffmpeg_installed": False, + "platform": platform.system().lower(), + "gpu_vendor": "unknown", + "hardware_acceleration": { + "available": False, + "type": None, + "encoder": None, + "tested_methods": [] + }, + "software_fallback": { + "available": False, + "encoder": "libx264" + }, + "recommendations": [] + } + + # 检查FFmpeg安装 + report["ffmpeg_installed"] = check_ffmpeg_installation() + if not report["ffmpeg_installed"]: + report["recommendations"].append("请安装FFmpeg并确保其在系统PATH中") + return report + + # 检测硬件加速 + hwaccel_info = detect_hardware_acceleration() + report["gpu_vendor"] = hwaccel_info.get("gpu_vendor", "unknown") + report["hardware_acceleration"]["available"] = hwaccel_info.get("available", False) + report["hardware_acceleration"]["type"] = hwaccel_info.get("type") + report["hardware_acceleration"]["encoder"] = hwaccel_info.get("encoder") + report["hardware_acceleration"]["tested_methods"] = hwaccel_info.get("tested_methods", []) + + # 检查软件备用方案 + report["software_fallback"]["available"] = hwaccel_info.get("fallback_available", True) + report["software_fallback"]["encoder"] = hwaccel_info.get("fallback_encoder", "libx264") + + # 生成建议 + if not report["hardware_acceleration"]["available"]: + if report["gpu_vendor"] == "nvidia": + report["recommendations"].append("建议安装NVIDIA驱动和CUDA工具包以启用硬件加速") + elif report["gpu_vendor"] == "amd": + report["recommendations"].append("AMD显卡硬件加速支持有限,建议使用软件编码") + elif report["gpu_vendor"] == "intel": + report["recommendations"].append("建议更新Intel显卡驱动以启用QSV硬件加速") + else: + report["recommendations"].append("未检测到支持的GPU,将使用软件编码") + + return report + + +def force_software_encoding() -> None: + """ + 强制使用软件编码,禁用硬件加速 + """ + global _FFMPEG_HW_ACCEL_INFO + + _FFMPEG_HW_ACCEL_INFO.update({ + "available": False, + "type": "software", + "encoder": "libx264", + "hwaccel_args": [], + "message": "强制使用软件编码", + "is_dedicated_gpu": False, + "fallback_available": True, + "fallback_encoder": "libx264" + }) + + logger.info("已强制切换到软件编码模式") + + +def reset_hwaccel_detection() -> None: + """ + 重置硬件加速检测结果,强制重新检测 + """ + global _FFMPEG_HW_ACCEL_INFO + + _FFMPEG_HW_ACCEL_INFO = { + "available": False, + "type": None, + "encoder": None, + "hwaccel_args": [], + "message": "", + "is_dedicated_gpu": False, + "fallback_available": False, + "fallback_encoder": None, + "platform": None, + "gpu_vendor": None, + "tested_methods": [] + } + + logger.info("已重置硬件加速检测结果") diff --git a/config.example.toml b/config.example.toml index adafb84..a05cb30 100644 --- a/config.example.toml +++ b/config.example.toml @@ -1,5 +1,5 @@ [app] - project_version="0.6.2" + project_version="0.6.5" # 支持视频理解的大模型提供商 # gemini (谷歌, 需要 VPN) # siliconflow (硅基流动) diff --git a/project_version b/project_version index a0a1517..e0ea44c 100644 --- a/project_version +++ b/project_version @@ -1 +1 @@ -0.6.3 \ No newline at end of file +0.6.5 \ No newline at end of file diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index a58ca60..e422d48 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -3,6 +3,7 @@ import os from uuid import uuid4 from app.config import config from app.services import voice +from app.models.schema import AudioVolumeDefaults from app.utils import utils from webui.utils.cache import get_songs_cache @@ -94,12 +95,12 @@ def render_azure_v2_settings(tr): def render_voice_parameters(tr): """渲染语音参数设置""" - # 音量 + # 音量 - 使用统一的默认值 voice_volume = st.slider( tr("Speech Volume"), - min_value=0.0, - max_value=1.0, - value=1.0, + min_value=AudioVolumeDefaults.MIN_VOLUME, + max_value=AudioVolumeDefaults.MAX_VOLUME, + value=AudioVolumeDefaults.VOICE_VOLUME, step=0.01, help=tr("Adjust the volume of the original audio") ) @@ -187,12 +188,12 @@ def render_bgm_settings(tr): if custom_bgm_file and os.path.exists(custom_bgm_file): st.session_state['bgm_file'] = custom_bgm_file - # 背景音乐音量 + # 背景音乐音量 - 使用统一的默认值 bgm_volume = st.slider( tr("Background Music Volume"), - min_value=0.0, - max_value=1.0, - value=0.3, + min_value=AudioVolumeDefaults.MIN_VOLUME, + max_value=AudioVolumeDefaults.MAX_VOLUME, + value=AudioVolumeDefaults.BGM_VOLUME, step=0.01, help=tr("Adjust the volume of the original audio") ) @@ -203,10 +204,10 @@ def get_audio_params(): """获取音频参数""" return { 'voice_name': config.ui.get("voice_name", ""), - 'voice_volume': st.session_state.get('voice_volume', 1.0), + 'voice_volume': st.session_state.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME), 'voice_rate': st.session_state.get('voice_rate', 1.0), 'voice_pitch': st.session_state.get('voice_pitch', 1.0), 'bgm_type': st.session_state.get('bgm_type', 'random'), 'bgm_file': st.session_state.get('bgm_file', ''), - 'bgm_volume': st.session_state.get('bgm_volume', 0.3), + 'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME), } diff --git a/webui/components/video_settings.py b/webui/components/video_settings.py index 8a9b3f4..f0aec33 100644 --- a/webui/components/video_settings.py +++ b/webui/components/video_settings.py @@ -1,5 +1,5 @@ import streamlit as st -from app.models.schema import VideoClipParams, VideoAspect +from app.models.schema import VideoClipParams, VideoAspect, AudioVolumeDefaults def render_video_panel(tr): @@ -41,12 +41,12 @@ def render_video_config(tr, params): ) st.session_state['video_quality'] = video_qualities[quality_index][1] - # 原声音量 + # 原声音量 - 使用统一的默认值 params.original_volume = st.slider( tr("Original Volume"), - min_value=0.0, - max_value=1.0, - value=0.7, + min_value=AudioVolumeDefaults.MIN_VOLUME, + max_value=AudioVolumeDefaults.MAX_VOLUME, + value=AudioVolumeDefaults.ORIGINAL_VOLUME, step=0.01, help=tr("Adjust the volume of the original audio") ) @@ -58,5 +58,5 @@ def get_video_params(): return { 'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value), 'video_quality': st.session_state.get('video_quality', '1080p'), - 'original_volume': st.session_state.get('original_volume', 0.7) + 'original_volume': st.session_state.get('original_volume', AudioVolumeDefaults.ORIGINAL_VOLUME) }