feat(audio): 统一音量配置并修复原声音量默认值问题

引入AudioVolumeDefaults类集中管理音量配置,确保全局一致性
修复原声音量默认值为0.7以解决短剧解说模式问题
添加音量验证和详细日志便于调试
This commit is contained in:
linyq 2025-07-02 17:54:00 +08:00
parent 7a8de5e791
commit 1792311ef4
6 changed files with 94 additions and 37 deletions

3
.gitignore vendored
View File

@ -34,3 +34,6 @@ resource/srt/*.srt
app/models/faster-whisper-large-v2/*
app/models/faster-whisper-large-v3/*
app/models/bert/*
bug清单.md
task.md

View File

@ -1,6 +1,6 @@
import warnings
from enum import Enum
from typing import Any, List, Optional
from typing import Any, List, Optional, Union
import pydantic
from pydantic import BaseModel, Field
@ -13,6 +13,24 @@ warnings.filterwarnings(
)
class AudioVolumeDefaults:
"""音量配置默认值常量类 - 确保全局一致性"""
# 语音音量默认值
VOICE_VOLUME = 1.0
TTS_VOLUME = 1.0
# 原声音量默认值 - 这是修复bug的关键
ORIGINAL_VOLUME = 0.7
# 背景音乐音量默认值
BGM_VOLUME = 0.3
# 音量范围
MIN_VOLUME = 0.0
MAX_VOLUME = 1.0
class VideoConcatMode(str, Enum):
random = "random"
sequential = "sequential"
@ -101,7 +119,7 @@ class VideoParams(BaseModel):
video_subject: str
video_script: str = "" # 用于生成视频的脚本
video_terms: Optional[str | list] = None # 用于生成视频的关键词
video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
video_clip_duration: Optional[int] = 5
@ -113,11 +131,11 @@ class VideoParams(BaseModel):
video_language: Optional[str] = "" # auto detect
voice_name: Optional[str] = ""
voice_volume: Optional[float] = 1.0
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
voice_rate: Optional[float] = 1.0
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
subtitle_enabled: Optional[bool] = True
subtitle_position: Optional[str] = "bottom" # top, bottom, center
@ -157,11 +175,11 @@ class AudioRequest(BaseModel):
video_script: str
video_language: Optional[str] = ""
voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
voice_volume: Optional[float] = 1.0
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
voice_rate: Optional[float] = 1.2
bgm_type: Optional[str] = "random"
bgm_file: Optional[str] = ""
bgm_volume: Optional[float] = 0.2
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
video_source: Optional[str] = "local"
@ -347,7 +365,7 @@ class VideoClipParams(BaseModel):
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量")
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
voice_rate: Optional[float] = Field(default=1.0, description="语速")
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
@ -367,9 +385,9 @@ class VideoClipParams(BaseModel):
n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
bgm_volume: Optional[float] = Field(default=0.3, description="背景音乐音量")
tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)")
original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量")
bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量")
class VideoTranscriptionRequest(BaseModel):

View File

@ -24,6 +24,7 @@ from moviepy.video.tools.subtitles import SubtitlesClip
from PIL import ImageFont
from app.utils import utils
from app.models.schema import AudioVolumeDefaults
def merge_materials(
@ -66,11 +67,12 @@ def merge_materials(
if options is None:
options = {}
# 设置默认参数值
voice_volume = options.get('voice_volume', 1.0)
bgm_volume = options.get('bgm_volume', 0.3)
original_audio_volume = options.get('original_audio_volume', 0.0) # 默认为0即不保留原声
keep_original_audio = options.get('keep_original_audio', False) # 是否保留原声
# 设置默认参数值 - 使用统一的音量配置
voice_volume = options.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME)
bgm_volume = options.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME)
# 修复bug: 将原声音量默认值从0.0改为0.7,确保短剧解说模式下原片音量正常
original_audio_volume = options.get('original_audio_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
keep_original_audio = options.get('keep_original_audio', True) # 默认保留原声
subtitle_font = options.get('subtitle_font', '')
subtitle_font_size = options.get('subtitle_font_size', 40)
subtitle_color = options.get('subtitle_color', '#FFFFFF')
@ -81,11 +83,29 @@ def merge_materials(
stroke_width = options.get('stroke_width', 1)
threads = options.get('threads', 2)
fps = options.get('fps', 30)
# 音量配置日志 - 便于调试音量问题
logger.info(f"音量配置详情:")
logger.info(f" - 配音音量: {voice_volume}")
logger.info(f" - 背景音乐音量: {bgm_volume}")
logger.info(f" - 原声音量: {original_audio_volume}")
logger.info(f" - 是否保留原声: {keep_original_audio}")
# 音量参数验证
def validate_volume(volume, name):
if not (AudioVolumeDefaults.MIN_VOLUME <= volume <= AudioVolumeDefaults.MAX_VOLUME):
logger.warning(f"{name}音量 {volume} 超出有效范围 [{AudioVolumeDefaults.MIN_VOLUME}, {AudioVolumeDefaults.MAX_VOLUME}],将被限制")
return max(AudioVolumeDefaults.MIN_VOLUME, min(volume, AudioVolumeDefaults.MAX_VOLUME))
return volume
voice_volume = validate_volume(voice_volume, "配音")
bgm_volume = validate_volume(bgm_volume, "背景音乐")
original_audio_volume = validate_volume(original_audio_volume, "原声")
# 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值
if subtitle_bg_color == 'transparent':
subtitle_bg_color = None # None在新版MoviePy中表示透明背景
# 创建输出目录(如果不存在)
output_dir = os.path.dirname(output_path)
os.makedirs(output_dir, exist_ok=True)

View File

@ -314,24 +314,35 @@ def generate_video_v3(
audio_clips = []
# 添加原声(设置音量)
logger.debug(f"音量配置: {volume_config}")
logger.info(f"音量配置详情: {volume_config}")
if video.audio is not None:
original_audio = video.audio.volumex(volume_config['original'])
original_volume = volume_config['original']
logger.info(f"应用原声音量: {original_volume}")
original_audio = video.audio.volumex(original_volume)
audio_clips.append(original_audio)
logger.info("原声音频已添加到合成列表")
else:
logger.warning("视频没有音轨,无法添加原声")
# 添加BGM如果提供
if bgm_path:
logger.info(f"添加背景音乐: {bgm_path}")
bgm = AudioFileClip(bgm_path)
if bgm.duration < video.duration:
bgm = loop_audio_clip(bgm, video.duration)
else:
bgm = bgm.subclip(0, video.duration)
bgm = bgm.volumex(volume_config['bgm'])
bgm_volume = volume_config['bgm']
logger.info(f"应用BGM音量: {bgm_volume}")
bgm = bgm.volumex(bgm_volume)
audio_clips.append(bgm)
# 添加解说音频(如果提供)
if narration_path:
narration = AudioFileClip(narration_path).volumex(volume_config['narration'])
logger.info(f"添加解说音频: {narration_path}")
narration_volume = volume_config['narration']
logger.info(f"应用解说音量: {narration_volume}")
narration = AudioFileClip(narration_path).volumex(narration_volume)
audio_clips.append(narration)
# 合成最终视频(包含字幕)
@ -342,8 +353,12 @@ def generate_video_v3(
final_video = video
if audio_clips:
logger.info(f"合成音频轨道,共 {len(audio_clips)} 个音频片段")
final_audio = CompositeAudioClip(audio_clips)
final_video = final_video.set_audio(final_audio)
logger.info("音频合成完成")
else:
logger.warning("没有音频轨道需要合成")
# 导出视频
logger.info("开始导出视频...") # 调试信息

View File

@ -3,6 +3,7 @@ import os
from uuid import uuid4
from app.config import config
from app.services import voice
from app.models.schema import AudioVolumeDefaults
from app.utils import utils
from webui.utils.cache import get_songs_cache
@ -94,12 +95,12 @@ def render_azure_v2_settings(tr):
def render_voice_parameters(tr):
"""渲染语音参数设置"""
# 音量
# 音量 - 使用统一的默认值
voice_volume = st.slider(
tr("Speech Volume"),
min_value=0.0,
max_value=1.0,
value=1.0,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.VOICE_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -187,12 +188,12 @@ def render_bgm_settings(tr):
if custom_bgm_file and os.path.exists(custom_bgm_file):
st.session_state['bgm_file'] = custom_bgm_file
# 背景音乐音量
# 背景音乐音量 - 使用统一的默认值
bgm_volume = st.slider(
tr("Background Music Volume"),
min_value=0.0,
max_value=1.0,
value=0.3,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.BGM_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -203,10 +204,10 @@ def get_audio_params():
"""获取音频参数"""
return {
'voice_name': config.ui.get("voice_name", ""),
'voice_volume': st.session_state.get('voice_volume', 1.0),
'voice_volume': st.session_state.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME),
'voice_rate': st.session_state.get('voice_rate', 1.0),
'voice_pitch': st.session_state.get('voice_pitch', 1.0),
'bgm_type': st.session_state.get('bgm_type', 'random'),
'bgm_file': st.session_state.get('bgm_file', ''),
'bgm_volume': st.session_state.get('bgm_volume', 0.3),
'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME),
}

View File

@ -1,5 +1,5 @@
import streamlit as st
from app.models.schema import VideoClipParams, VideoAspect
from app.models.schema import VideoClipParams, VideoAspect, AudioVolumeDefaults
def render_video_panel(tr):
@ -41,12 +41,12 @@ def render_video_config(tr, params):
)
st.session_state['video_quality'] = video_qualities[quality_index][1]
# 原声音量
# 原声音量 - 使用统一的默认值
params.original_volume = st.slider(
tr("Original Volume"),
min_value=0.0,
max_value=1.0,
value=0.7,
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.ORIGINAL_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
@ -58,5 +58,5 @@ def get_video_params():
return {
'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value),
'video_quality': st.session_state.get('video_quality', '1080p'),
'original_volume': st.session_state.get('original_volume', 0.7)
'original_volume': st.session_state.get('original_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
}