diff --git a/.gitignore b/.gitignore index 8096610..f3c7489 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,6 @@ resource/srt/*.srt app/models/faster-whisper-large-v2/* app/models/faster-whisper-large-v3/* app/models/bert/* + +bug清单.md +task.md \ No newline at end of file diff --git a/app/models/schema.py b/app/models/schema.py index ddf0ad1..b059b36 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -1,6 +1,6 @@ import warnings from enum import Enum -from typing import Any, List, Optional +from typing import Any, List, Optional, Union import pydantic from pydantic import BaseModel, Field @@ -13,6 +13,24 @@ warnings.filterwarnings( ) +class AudioVolumeDefaults: + """音量配置默认值常量类 - 确保全局一致性""" + + # 语音音量默认值 + VOICE_VOLUME = 1.0 + TTS_VOLUME = 1.0 + + # 原声音量默认值 - 这是修复bug的关键 + ORIGINAL_VOLUME = 0.7 + + # 背景音乐音量默认值 + BGM_VOLUME = 0.3 + + # 音量范围 + MIN_VOLUME = 0.0 + MAX_VOLUME = 1.0 + + class VideoConcatMode(str, Enum): random = "random" sequential = "sequential" @@ -101,7 +119,7 @@ class VideoParams(BaseModel): video_subject: str video_script: str = "" # 用于生成视频的脚本 - video_terms: Optional[str | list] = None # 用于生成视频的关键词 + video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词 video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value video_clip_duration: Optional[int] = 5 @@ -113,11 +131,11 @@ class VideoParams(BaseModel): video_language: Optional[str] = "" # auto detect voice_name: Optional[str] = "" - voice_volume: Optional[float] = 1.0 + voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME voice_rate: Optional[float] = 1.0 bgm_type: Optional[str] = "random" bgm_file: Optional[str] = "" - bgm_volume: Optional[float] = 0.2 + bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME subtitle_enabled: Optional[bool] = True subtitle_position: Optional[str] = "bottom" # top, bottom, center @@ -157,11 +175,11 @@ class AudioRequest(BaseModel): video_script: str video_language: Optional[str] = "" voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female" - voice_volume: Optional[float] = 1.0 + voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME voice_rate: Optional[float] = 1.2 bgm_type: Optional[str] = "random" bgm_file: Optional[str] = "" - bgm_volume: Optional[float] = 0.2 + bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME video_source: Optional[str] = "local" @@ -347,7 +365,7 @@ class VideoClipParams(BaseModel): # video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称") - voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量") + voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量") voice_rate: Optional[float] = Field(default=1.0, description="语速") voice_pitch: Optional[float] = Field(default=1.0, description="语调") @@ -367,9 +385,9 @@ class VideoClipParams(BaseModel): n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度 - tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)") - original_volume: Optional[float] = Field(default=1.0, description="视频原声音量") - bgm_volume: Optional[float] = Field(default=0.3, description="背景音乐音量") + tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)") + original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量") + bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量") class VideoTranscriptionRequest(BaseModel): diff --git a/app/services/generate_video.py b/app/services/generate_video.py index f125c05..74b11af 100644 --- a/app/services/generate_video.py +++ b/app/services/generate_video.py @@ -24,6 +24,7 @@ from moviepy.video.tools.subtitles import SubtitlesClip from PIL import ImageFont from app.utils import utils +from app.models.schema import AudioVolumeDefaults def merge_materials( @@ -66,11 +67,12 @@ def merge_materials( if options is None: options = {} - # 设置默认参数值 - voice_volume = options.get('voice_volume', 1.0) - bgm_volume = options.get('bgm_volume', 0.3) - original_audio_volume = options.get('original_audio_volume', 0.0) # 默认为0,即不保留原声 - keep_original_audio = options.get('keep_original_audio', False) # 是否保留原声 + # 设置默认参数值 - 使用统一的音量配置 + voice_volume = options.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME) + bgm_volume = options.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME) + # 修复bug: 将原声音量默认值从0.0改为0.7,确保短剧解说模式下原片音量正常 + original_audio_volume = options.get('original_audio_volume', AudioVolumeDefaults.ORIGINAL_VOLUME) + keep_original_audio = options.get('keep_original_audio', True) # 默认保留原声 subtitle_font = options.get('subtitle_font', '') subtitle_font_size = options.get('subtitle_font_size', 40) subtitle_color = options.get('subtitle_color', '#FFFFFF') @@ -81,11 +83,29 @@ def merge_materials( stroke_width = options.get('stroke_width', 1) threads = options.get('threads', 2) fps = options.get('fps', 30) - + + # 音量配置日志 - 便于调试音量问题 + logger.info(f"音量配置详情:") + logger.info(f" - 配音音量: {voice_volume}") + logger.info(f" - 背景音乐音量: {bgm_volume}") + logger.info(f" - 原声音量: {original_audio_volume}") + logger.info(f" - 是否保留原声: {keep_original_audio}") + + # 音量参数验证 + def validate_volume(volume, name): + if not (AudioVolumeDefaults.MIN_VOLUME <= volume <= AudioVolumeDefaults.MAX_VOLUME): + logger.warning(f"{name}音量 {volume} 超出有效范围 [{AudioVolumeDefaults.MIN_VOLUME}, {AudioVolumeDefaults.MAX_VOLUME}],将被限制") + return max(AudioVolumeDefaults.MIN_VOLUME, min(volume, AudioVolumeDefaults.MAX_VOLUME)) + return volume + + voice_volume = validate_volume(voice_volume, "配音") + bgm_volume = validate_volume(bgm_volume, "背景音乐") + original_audio_volume = validate_volume(original_audio_volume, "原声") + # 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值 if subtitle_bg_color == 'transparent': subtitle_bg_color = None # None在新版MoviePy中表示透明背景 - + # 创建输出目录(如果不存在) output_dir = os.path.dirname(output_path) os.makedirs(output_dir, exist_ok=True) diff --git a/app/services/video.py b/app/services/video.py index 087dbdf..661c5f7 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -314,24 +314,35 @@ def generate_video_v3( audio_clips = [] # 添加原声(设置音量) - logger.debug(f"音量配置: {volume_config}") + logger.info(f"音量配置详情: {volume_config}") if video.audio is not None: - original_audio = video.audio.volumex(volume_config['original']) + original_volume = volume_config['original'] + logger.info(f"应用原声音量: {original_volume}") + original_audio = video.audio.volumex(original_volume) audio_clips.append(original_audio) + logger.info("原声音频已添加到合成列表") + else: + logger.warning("视频没有音轨,无法添加原声") # 添加BGM(如果提供) if bgm_path: + logger.info(f"添加背景音乐: {bgm_path}") bgm = AudioFileClip(bgm_path) if bgm.duration < video.duration: bgm = loop_audio_clip(bgm, video.duration) else: bgm = bgm.subclip(0, video.duration) - bgm = bgm.volumex(volume_config['bgm']) + bgm_volume = volume_config['bgm'] + logger.info(f"应用BGM音量: {bgm_volume}") + bgm = bgm.volumex(bgm_volume) audio_clips.append(bgm) # 添加解说音频(如果提供) if narration_path: - narration = AudioFileClip(narration_path).volumex(volume_config['narration']) + logger.info(f"添加解说音频: {narration_path}") + narration_volume = volume_config['narration'] + logger.info(f"应用解说音量: {narration_volume}") + narration = AudioFileClip(narration_path).volumex(narration_volume) audio_clips.append(narration) # 合成最终视频(包含字幕) @@ -342,8 +353,12 @@ def generate_video_v3( final_video = video if audio_clips: + logger.info(f"合成音频轨道,共 {len(audio_clips)} 个音频片段") final_audio = CompositeAudioClip(audio_clips) final_video = final_video.set_audio(final_audio) + logger.info("音频合成完成") + else: + logger.warning("没有音频轨道需要合成") # 导出视频 logger.info("开始导出视频...") # 调试信息 diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index a58ca60..e422d48 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -3,6 +3,7 @@ import os from uuid import uuid4 from app.config import config from app.services import voice +from app.models.schema import AudioVolumeDefaults from app.utils import utils from webui.utils.cache import get_songs_cache @@ -94,12 +95,12 @@ def render_azure_v2_settings(tr): def render_voice_parameters(tr): """渲染语音参数设置""" - # 音量 + # 音量 - 使用统一的默认值 voice_volume = st.slider( tr("Speech Volume"), - min_value=0.0, - max_value=1.0, - value=1.0, + min_value=AudioVolumeDefaults.MIN_VOLUME, + max_value=AudioVolumeDefaults.MAX_VOLUME, + value=AudioVolumeDefaults.VOICE_VOLUME, step=0.01, help=tr("Adjust the volume of the original audio") ) @@ -187,12 +188,12 @@ def render_bgm_settings(tr): if custom_bgm_file and os.path.exists(custom_bgm_file): st.session_state['bgm_file'] = custom_bgm_file - # 背景音乐音量 + # 背景音乐音量 - 使用统一的默认值 bgm_volume = st.slider( tr("Background Music Volume"), - min_value=0.0, - max_value=1.0, - value=0.3, + min_value=AudioVolumeDefaults.MIN_VOLUME, + max_value=AudioVolumeDefaults.MAX_VOLUME, + value=AudioVolumeDefaults.BGM_VOLUME, step=0.01, help=tr("Adjust the volume of the original audio") ) @@ -203,10 +204,10 @@ def get_audio_params(): """获取音频参数""" return { 'voice_name': config.ui.get("voice_name", ""), - 'voice_volume': st.session_state.get('voice_volume', 1.0), + 'voice_volume': st.session_state.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME), 'voice_rate': st.session_state.get('voice_rate', 1.0), 'voice_pitch': st.session_state.get('voice_pitch', 1.0), 'bgm_type': st.session_state.get('bgm_type', 'random'), 'bgm_file': st.session_state.get('bgm_file', ''), - 'bgm_volume': st.session_state.get('bgm_volume', 0.3), + 'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME), } diff --git a/webui/components/video_settings.py b/webui/components/video_settings.py index 8a9b3f4..f0aec33 100644 --- a/webui/components/video_settings.py +++ b/webui/components/video_settings.py @@ -1,5 +1,5 @@ import streamlit as st -from app.models.schema import VideoClipParams, VideoAspect +from app.models.schema import VideoClipParams, VideoAspect, AudioVolumeDefaults def render_video_panel(tr): @@ -41,12 +41,12 @@ def render_video_config(tr, params): ) st.session_state['video_quality'] = video_qualities[quality_index][1] - # 原声音量 + # 原声音量 - 使用统一的默认值 params.original_volume = st.slider( tr("Original Volume"), - min_value=0.0, - max_value=1.0, - value=0.7, + min_value=AudioVolumeDefaults.MIN_VOLUME, + max_value=AudioVolumeDefaults.MAX_VOLUME, + value=AudioVolumeDefaults.ORIGINAL_VOLUME, step=0.01, help=tr("Adjust the volume of the original audio") ) @@ -58,5 +58,5 @@ def get_video_params(): return { 'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value), 'video_quality': st.session_state.get('video_quality', '1080p'), - 'original_volume': st.session_state.get('original_volume', 0.7) + 'original_volume': st.session_state.get('original_volume', AudioVolumeDefaults.ORIGINAL_VOLUME) }