mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-11 02:12:50 +00:00
feat(audio): 统一音量配置并修复原声音量默认值问题
引入AudioVolumeDefaults类集中管理音量配置,确保全局一致性 修复原声音量默认值为0.7以解决短剧解说模式问题 添加音量验证和详细日志便于调试
This commit is contained in:
parent
7a8de5e791
commit
1792311ef4
3
.gitignore
vendored
3
.gitignore
vendored
@ -34,3 +34,6 @@ resource/srt/*.srt
|
||||
app/models/faster-whisper-large-v2/*
|
||||
app/models/faster-whisper-large-v3/*
|
||||
app/models/bert/*
|
||||
|
||||
bug清单.md
|
||||
task.md
|
||||
@ -1,6 +1,6 @@
|
||||
import warnings
|
||||
from enum import Enum
|
||||
from typing import Any, List, Optional
|
||||
from typing import Any, List, Optional, Union
|
||||
|
||||
import pydantic
|
||||
from pydantic import BaseModel, Field
|
||||
@ -13,6 +13,24 @@ warnings.filterwarnings(
|
||||
)
|
||||
|
||||
|
||||
class AudioVolumeDefaults:
|
||||
"""音量配置默认值常量类 - 确保全局一致性"""
|
||||
|
||||
# 语音音量默认值
|
||||
VOICE_VOLUME = 1.0
|
||||
TTS_VOLUME = 1.0
|
||||
|
||||
# 原声音量默认值 - 这是修复bug的关键
|
||||
ORIGINAL_VOLUME = 0.7
|
||||
|
||||
# 背景音乐音量默认值
|
||||
BGM_VOLUME = 0.3
|
||||
|
||||
# 音量范围
|
||||
MIN_VOLUME = 0.0
|
||||
MAX_VOLUME = 1.0
|
||||
|
||||
|
||||
class VideoConcatMode(str, Enum):
|
||||
random = "random"
|
||||
sequential = "sequential"
|
||||
@ -101,7 +119,7 @@ class VideoParams(BaseModel):
|
||||
|
||||
video_subject: str
|
||||
video_script: str = "" # 用于生成视频的脚本
|
||||
video_terms: Optional[str | list] = None # 用于生成视频的关键词
|
||||
video_terms: Optional[Union[str, list]] = None # 用于生成视频的关键词
|
||||
video_aspect: Optional[VideoAspect] = VideoAspect.portrait.value
|
||||
video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
|
||||
video_clip_duration: Optional[int] = 5
|
||||
@ -113,11 +131,11 @@ class VideoParams(BaseModel):
|
||||
video_language: Optional[str] = "" # auto detect
|
||||
|
||||
voice_name: Optional[str] = ""
|
||||
voice_volume: Optional[float] = 1.0
|
||||
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
|
||||
voice_rate: Optional[float] = 1.0
|
||||
bgm_type: Optional[str] = "random"
|
||||
bgm_file: Optional[str] = ""
|
||||
bgm_volume: Optional[float] = 0.2
|
||||
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
|
||||
|
||||
subtitle_enabled: Optional[bool] = True
|
||||
subtitle_position: Optional[str] = "bottom" # top, bottom, center
|
||||
@ -157,11 +175,11 @@ class AudioRequest(BaseModel):
|
||||
video_script: str
|
||||
video_language: Optional[str] = ""
|
||||
voice_name: Optional[str] = "zh-CN-XiaoxiaoNeural-Female"
|
||||
voice_volume: Optional[float] = 1.0
|
||||
voice_volume: Optional[float] = AudioVolumeDefaults.VOICE_VOLUME
|
||||
voice_rate: Optional[float] = 1.2
|
||||
bgm_type: Optional[str] = "random"
|
||||
bgm_file: Optional[str] = ""
|
||||
bgm_volume: Optional[float] = 0.2
|
||||
bgm_volume: Optional[float] = AudioVolumeDefaults.BGM_VOLUME
|
||||
video_source: Optional[str] = "local"
|
||||
|
||||
|
||||
@ -347,7 +365,7 @@ class VideoClipParams(BaseModel):
|
||||
# video_concat_mode: Optional[VideoConcatMode] = VideoConcatMode.random.value
|
||||
|
||||
voice_name: Optional[str] = Field(default="zh-CN-YunjianNeural", description="语音名称")
|
||||
voice_volume: Optional[float] = Field(default=1.0, description="解说语音音量")
|
||||
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
|
||||
voice_rate: Optional[float] = Field(default=1.0, description="语速")
|
||||
voice_pitch: Optional[float] = Field(default=1.0, description="语调")
|
||||
|
||||
@ -367,9 +385,9 @@ class VideoClipParams(BaseModel):
|
||||
|
||||
n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度
|
||||
|
||||
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
|
||||
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
|
||||
bgm_volume: Optional[float] = Field(default=0.3, description="背景音乐音量")
|
||||
tts_volume: Optional[float] = Field(default=AudioVolumeDefaults.TTS_VOLUME, description="解说语音音量(后处理)")
|
||||
original_volume: Optional[float] = Field(default=AudioVolumeDefaults.ORIGINAL_VOLUME, description="视频原声音量")
|
||||
bgm_volume: Optional[float] = Field(default=AudioVolumeDefaults.BGM_VOLUME, description="背景音乐音量")
|
||||
|
||||
|
||||
class VideoTranscriptionRequest(BaseModel):
|
||||
|
||||
@ -24,6 +24,7 @@ from moviepy.video.tools.subtitles import SubtitlesClip
|
||||
from PIL import ImageFont
|
||||
|
||||
from app.utils import utils
|
||||
from app.models.schema import AudioVolumeDefaults
|
||||
|
||||
|
||||
def merge_materials(
|
||||
@ -66,11 +67,12 @@ def merge_materials(
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
# 设置默认参数值
|
||||
voice_volume = options.get('voice_volume', 1.0)
|
||||
bgm_volume = options.get('bgm_volume', 0.3)
|
||||
original_audio_volume = options.get('original_audio_volume', 0.0) # 默认为0,即不保留原声
|
||||
keep_original_audio = options.get('keep_original_audio', False) # 是否保留原声
|
||||
# 设置默认参数值 - 使用统一的音量配置
|
||||
voice_volume = options.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME)
|
||||
bgm_volume = options.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME)
|
||||
# 修复bug: 将原声音量默认值从0.0改为0.7,确保短剧解说模式下原片音量正常
|
||||
original_audio_volume = options.get('original_audio_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
|
||||
keep_original_audio = options.get('keep_original_audio', True) # 默认保留原声
|
||||
subtitle_font = options.get('subtitle_font', '')
|
||||
subtitle_font_size = options.get('subtitle_font_size', 40)
|
||||
subtitle_color = options.get('subtitle_color', '#FFFFFF')
|
||||
@ -81,11 +83,29 @@ def merge_materials(
|
||||
stroke_width = options.get('stroke_width', 1)
|
||||
threads = options.get('threads', 2)
|
||||
fps = options.get('fps', 30)
|
||||
|
||||
|
||||
# 音量配置日志 - 便于调试音量问题
|
||||
logger.info(f"音量配置详情:")
|
||||
logger.info(f" - 配音音量: {voice_volume}")
|
||||
logger.info(f" - 背景音乐音量: {bgm_volume}")
|
||||
logger.info(f" - 原声音量: {original_audio_volume}")
|
||||
logger.info(f" - 是否保留原声: {keep_original_audio}")
|
||||
|
||||
# 音量参数验证
|
||||
def validate_volume(volume, name):
|
||||
if not (AudioVolumeDefaults.MIN_VOLUME <= volume <= AudioVolumeDefaults.MAX_VOLUME):
|
||||
logger.warning(f"{name}音量 {volume} 超出有效范围 [{AudioVolumeDefaults.MIN_VOLUME}, {AudioVolumeDefaults.MAX_VOLUME}],将被限制")
|
||||
return max(AudioVolumeDefaults.MIN_VOLUME, min(volume, AudioVolumeDefaults.MAX_VOLUME))
|
||||
return volume
|
||||
|
||||
voice_volume = validate_volume(voice_volume, "配音")
|
||||
bgm_volume = validate_volume(bgm_volume, "背景音乐")
|
||||
original_audio_volume = validate_volume(original_audio_volume, "原声")
|
||||
|
||||
# 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值
|
||||
if subtitle_bg_color == 'transparent':
|
||||
subtitle_bg_color = None # None在新版MoviePy中表示透明背景
|
||||
|
||||
|
||||
# 创建输出目录(如果不存在)
|
||||
output_dir = os.path.dirname(output_path)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
@ -314,24 +314,35 @@ def generate_video_v3(
|
||||
audio_clips = []
|
||||
|
||||
# 添加原声(设置音量)
|
||||
logger.debug(f"音量配置: {volume_config}")
|
||||
logger.info(f"音量配置详情: {volume_config}")
|
||||
if video.audio is not None:
|
||||
original_audio = video.audio.volumex(volume_config['original'])
|
||||
original_volume = volume_config['original']
|
||||
logger.info(f"应用原声音量: {original_volume}")
|
||||
original_audio = video.audio.volumex(original_volume)
|
||||
audio_clips.append(original_audio)
|
||||
logger.info("原声音频已添加到合成列表")
|
||||
else:
|
||||
logger.warning("视频没有音轨,无法添加原声")
|
||||
|
||||
# 添加BGM(如果提供)
|
||||
if bgm_path:
|
||||
logger.info(f"添加背景音乐: {bgm_path}")
|
||||
bgm = AudioFileClip(bgm_path)
|
||||
if bgm.duration < video.duration:
|
||||
bgm = loop_audio_clip(bgm, video.duration)
|
||||
else:
|
||||
bgm = bgm.subclip(0, video.duration)
|
||||
bgm = bgm.volumex(volume_config['bgm'])
|
||||
bgm_volume = volume_config['bgm']
|
||||
logger.info(f"应用BGM音量: {bgm_volume}")
|
||||
bgm = bgm.volumex(bgm_volume)
|
||||
audio_clips.append(bgm)
|
||||
|
||||
# 添加解说音频(如果提供)
|
||||
if narration_path:
|
||||
narration = AudioFileClip(narration_path).volumex(volume_config['narration'])
|
||||
logger.info(f"添加解说音频: {narration_path}")
|
||||
narration_volume = volume_config['narration']
|
||||
logger.info(f"应用解说音量: {narration_volume}")
|
||||
narration = AudioFileClip(narration_path).volumex(narration_volume)
|
||||
audio_clips.append(narration)
|
||||
|
||||
# 合成最终视频(包含字幕)
|
||||
@ -342,8 +353,12 @@ def generate_video_v3(
|
||||
final_video = video
|
||||
|
||||
if audio_clips:
|
||||
logger.info(f"合成音频轨道,共 {len(audio_clips)} 个音频片段")
|
||||
final_audio = CompositeAudioClip(audio_clips)
|
||||
final_video = final_video.set_audio(final_audio)
|
||||
logger.info("音频合成完成")
|
||||
else:
|
||||
logger.warning("没有音频轨道需要合成")
|
||||
|
||||
# 导出视频
|
||||
logger.info("开始导出视频...") # 调试信息
|
||||
|
||||
@ -3,6 +3,7 @@ import os
|
||||
from uuid import uuid4
|
||||
from app.config import config
|
||||
from app.services import voice
|
||||
from app.models.schema import AudioVolumeDefaults
|
||||
from app.utils import utils
|
||||
from webui.utils.cache import get_songs_cache
|
||||
|
||||
@ -94,12 +95,12 @@ def render_azure_v2_settings(tr):
|
||||
|
||||
def render_voice_parameters(tr):
|
||||
"""渲染语音参数设置"""
|
||||
# 音量
|
||||
# 音量 - 使用统一的默认值
|
||||
voice_volume = st.slider(
|
||||
tr("Speech Volume"),
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=1.0,
|
||||
min_value=AudioVolumeDefaults.MIN_VOLUME,
|
||||
max_value=AudioVolumeDefaults.MAX_VOLUME,
|
||||
value=AudioVolumeDefaults.VOICE_VOLUME,
|
||||
step=0.01,
|
||||
help=tr("Adjust the volume of the original audio")
|
||||
)
|
||||
@ -187,12 +188,12 @@ def render_bgm_settings(tr):
|
||||
if custom_bgm_file and os.path.exists(custom_bgm_file):
|
||||
st.session_state['bgm_file'] = custom_bgm_file
|
||||
|
||||
# 背景音乐音量
|
||||
# 背景音乐音量 - 使用统一的默认值
|
||||
bgm_volume = st.slider(
|
||||
tr("Background Music Volume"),
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.3,
|
||||
min_value=AudioVolumeDefaults.MIN_VOLUME,
|
||||
max_value=AudioVolumeDefaults.MAX_VOLUME,
|
||||
value=AudioVolumeDefaults.BGM_VOLUME,
|
||||
step=0.01,
|
||||
help=tr("Adjust the volume of the original audio")
|
||||
)
|
||||
@ -203,10 +204,10 @@ def get_audio_params():
|
||||
"""获取音频参数"""
|
||||
return {
|
||||
'voice_name': config.ui.get("voice_name", ""),
|
||||
'voice_volume': st.session_state.get('voice_volume', 1.0),
|
||||
'voice_volume': st.session_state.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME),
|
||||
'voice_rate': st.session_state.get('voice_rate', 1.0),
|
||||
'voice_pitch': st.session_state.get('voice_pitch', 1.0),
|
||||
'bgm_type': st.session_state.get('bgm_type', 'random'),
|
||||
'bgm_file': st.session_state.get('bgm_file', ''),
|
||||
'bgm_volume': st.session_state.get('bgm_volume', 0.3),
|
||||
'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME),
|
||||
}
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
import streamlit as st
|
||||
from app.models.schema import VideoClipParams, VideoAspect
|
||||
from app.models.schema import VideoClipParams, VideoAspect, AudioVolumeDefaults
|
||||
|
||||
|
||||
def render_video_panel(tr):
|
||||
@ -41,12 +41,12 @@ def render_video_config(tr, params):
|
||||
)
|
||||
st.session_state['video_quality'] = video_qualities[quality_index][1]
|
||||
|
||||
# 原声音量
|
||||
# 原声音量 - 使用统一的默认值
|
||||
params.original_volume = st.slider(
|
||||
tr("Original Volume"),
|
||||
min_value=0.0,
|
||||
max_value=1.0,
|
||||
value=0.7,
|
||||
min_value=AudioVolumeDefaults.MIN_VOLUME,
|
||||
max_value=AudioVolumeDefaults.MAX_VOLUME,
|
||||
value=AudioVolumeDefaults.ORIGINAL_VOLUME,
|
||||
step=0.01,
|
||||
help=tr("Adjust the volume of the original audio")
|
||||
)
|
||||
@ -58,5 +58,5 @@ def get_video_params():
|
||||
return {
|
||||
'video_aspect': st.session_state.get('video_aspect', VideoAspect.portrait.value),
|
||||
'video_quality': st.session_state.get('video_quality', '1080p'),
|
||||
'original_volume': st.session_state.get('original_volume', 0.7)
|
||||
'original_volume': st.session_state.get('original_volume', AudioVolumeDefaults.ORIGINAL_VOLUME)
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user