NarratoAI/webui/components/audio_settings.py
linyq e1f45db95a feat(tts): 添加 SoulVoice TTS 引擎支持
实现 SoulVoice TTS 引擎集成,包括配置管理、语音选择、API 调用和字幕处理
新增 SoulVoice 配置项和示例配置
修改音频设置面板以支持 SoulVoice 选项
优化音频时长计算和异常处理
更新多语言文案以反映 SoulVoice 支持
2025-08-03 04:26:42 +08:00

317 lines
11 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import streamlit as st
import os
from uuid import uuid4
from app.config import config
from app.services import voice
from app.models.schema import AudioVolumeDefaults
from app.utils import utils
from webui.utils.cache import get_songs_cache
def get_soulvoice_voices():
"""获取 SoulVoice 语音列表"""
# 检查是否配置了 SoulVoice API key
api_key = config.soulvoice.get("api_key", "")
if not api_key:
return []
# 只返回一个 SoulVoice 选项,音色通过输入框自定义
return ["soulvoice:custom"]
def render_audio_panel(tr):
"""渲染音频设置面板"""
with st.container(border=True):
st.write(tr("Audio Settings"))
# 渲染TTS设置
render_tts_settings(tr)
# 渲染背景音乐设置
render_bgm_settings(tr)
def render_tts_settings(tr):
"""渲染TTS(文本转语音)设置"""
# 获取支持的语音列表
support_locales = ["zh-CN", "en-US"]
azure_voices = voice.get_all_azure_voices(filter_locals=support_locales)
# 添加 SoulVoice 语音选项
soulvoice_voices = get_soulvoice_voices()
# 合并所有语音选项
all_voices = azure_voices + soulvoice_voices
# 创建友好的显示名称
friendly_names = {}
# Azure 语音的友好名称
for v in azure_voices:
friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "")
# SoulVoice 语音的友好名称
for v in soulvoice_voices:
friendly_names[v] = "SoulVoice (自定义音色)"
# 获取保存的语音设置
saved_voice_name = config.ui.get("voice_name", "")
saved_voice_name_index = 0
if saved_voice_name in friendly_names:
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
else:
# 如果没有保存的设置选择与UI语言匹配的第一个语音
for i, v in enumerate(all_voices):
if (v.lower().startswith(st.session_state["ui_language"].lower())
and "V2" not in v and not v.startswith("soulvoice:")):
saved_voice_name_index = i
break
# 语音选择下拉框
selected_friendly_name = st.selectbox(
tr("Speech Synthesis"),
options=list(friendly_names.values()),
index=saved_voice_name_index,
)
# 获取实际的语音名称
voice_name = list(friendly_names.keys())[
list(friendly_names.values()).index(selected_friendly_name)
]
# 如果选择的是 SoulVoice 自定义选项,使用配置的音色 URI
if voice_name == "soulvoice:custom":
custom_voice_uri = config.soulvoice.get("voice_uri", "")
if custom_voice_uri:
# 确保音色 URI 有正确的前缀
if not custom_voice_uri.startswith("soulvoice:") and not custom_voice_uri.startswith("speech:"):
voice_name = f"soulvoice:{custom_voice_uri}"
else:
voice_name = custom_voice_uri if custom_voice_uri.startswith("soulvoice:") else f"soulvoice:{custom_voice_uri}"
# 保存设置
config.ui["voice_name"] = voice_name
# 根据语音类型渲染不同的设置
if voice.is_soulvoice_voice(voice_name):
render_soulvoice_settings(tr)
elif voice.is_azure_v2_voice(voice_name):
render_azure_v2_settings(tr)
# 语音参数设置
render_voice_parameters(tr, voice_name)
# 试听按钮
render_voice_preview(tr, voice_name)
def render_soulvoice_settings(tr):
"""渲染 SoulVoice 语音设置"""
saved_api_key = config.soulvoice.get("api_key", "")
saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr")
# API Key 输入
api_key = st.text_input(
"SoulVoice API Key",
value=saved_api_key,
type="password",
help="请输入您的 SoulVoice API 密钥"
)
# 音色 URI 输入
voice_uri = st.text_input(
"音色 URI",
value=saved_voice_uri,
help="请输入 SoulVoice 音色标识符格式如speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr",
placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
)
# API URL 输入(可选)
with st.expander("高级设置", expanded=False):
api_url = st.text_input(
"API 地址",
value=saved_api_url,
help="SoulVoice API 接口地址"
)
model = st.text_input(
"模型名称",
value=saved_model,
help="使用的 TTS 模型"
)
# 保存配置
config.soulvoice["api_key"] = api_key
config.soulvoice["voice_uri"] = voice_uri
config.soulvoice["api_url"] = api_url
config.soulvoice["model"] = model
# 显示配置状态
if api_key and voice_uri:
st.success("✅ SoulVoice 配置已设置")
elif not api_key:
st.warning("⚠️ 请配置 SoulVoice API Key")
elif not voice_uri:
st.warning("⚠️ 请配置音色 URI")
def render_azure_v2_settings(tr):
"""渲染Azure V2语音设置"""
saved_azure_speech_region = config.azure.get("speech_region", "")
saved_azure_speech_key = config.azure.get("speech_key", "")
azure_speech_region = st.text_input(
tr("Speech Region"),
value=saved_azure_speech_region
)
azure_speech_key = st.text_input(
tr("Speech Key"),
value=saved_azure_speech_key,
type="password"
)
config.azure["speech_region"] = azure_speech_region
config.azure["speech_key"] = azure_speech_key
def render_voice_parameters(tr, voice_name):
"""渲染语音参数设置"""
# 音量 - 使用统一的默认值
voice_volume = st.slider(
tr("Speech Volume"),
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.VOICE_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
st.session_state['voice_volume'] = voice_volume
# 检查是否为 SoulVoice 引擎
is_soulvoice = voice.is_soulvoice_voice(voice_name)
# 语速
if is_soulvoice:
# SoulVoice 支持更精细的语速控制
voice_rate = st.slider(
tr("Speech Rate"),
min_value=0.5,
max_value=2.0,
value=1.0,
step=0.1,
help="SoulVoice 语音速度控制"
)
else:
# Azure TTS 使用预设选项
voice_rate = st.selectbox(
tr("Speech Rate"),
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
index=2,
)
st.session_state['voice_rate'] = voice_rate
# 音调 - SoulVoice 不支持音调调节
if not is_soulvoice:
voice_pitch = st.selectbox(
tr("Speech Pitch"),
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
index=2,
)
st.session_state['voice_pitch'] = voice_pitch
else:
# SoulVoice 不支持音调调节,设置默认值
st.session_state['voice_pitch'] = 1.0
st.info(" SoulVoice 引擎不支持音调调节")
def render_voice_preview(tr, voice_name):
"""渲染语音试听功能"""
if st.button(tr("Play Voice")):
play_content = "感谢关注 NarratoAI有任何问题或建议可以关注微信公众号求助或讨论"
if not play_content:
play_content = st.session_state.get('video_script', '')
if not play_content:
play_content = tr("Voice Example")
with st.spinner(tr("Synthesizing Voice")):
temp_dir = utils.storage_dir("temp", create=True)
audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
sub_maker = voice.tts(
text=play_content,
voice_name=voice_name,
voice_rate=st.session_state.get('voice_rate', 1.0),
voice_pitch=st.session_state.get('voice_pitch', 1.0),
voice_file=audio_file,
)
# 如果语音文件生成失败,使用默认内容重试
if not sub_maker:
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
sub_maker = voice.tts(
text=play_content,
voice_name=voice_name,
voice_rate=st.session_state.get('voice_rate', 1.0),
voice_pitch=st.session_state.get('voice_pitch', 1.0),
voice_file=audio_file,
)
if sub_maker and os.path.exists(audio_file):
st.audio(audio_file, format="audio/mp3")
if os.path.exists(audio_file):
os.remove(audio_file)
def render_bgm_settings(tr):
"""渲染背景音乐设置"""
# 背景音乐选项
bgm_options = [
(tr("No Background Music"), ""),
(tr("Random Background Music"), "random"),
(tr("Custom Background Music"), "custom"),
]
selected_index = st.selectbox(
tr("Background Music"),
index=1,
options=range(len(bgm_options)),
format_func=lambda x: bgm_options[x][0],
)
# 获取选择的背景音乐类型
bgm_type = bgm_options[selected_index][1]
st.session_state['bgm_type'] = bgm_type
# 自定义背景音乐处理
if bgm_type == "custom":
custom_bgm_file = st.text_input(tr("Custom Background Music File"))
if custom_bgm_file and os.path.exists(custom_bgm_file):
st.session_state['bgm_file'] = custom_bgm_file
# 背景音乐音量 - 使用统一的默认值
bgm_volume = st.slider(
tr("Background Music Volume"),
min_value=AudioVolumeDefaults.MIN_VOLUME,
max_value=AudioVolumeDefaults.MAX_VOLUME,
value=AudioVolumeDefaults.BGM_VOLUME,
step=0.01,
help=tr("Adjust the volume of the original audio")
)
st.session_state['bgm_volume'] = bgm_volume
def get_audio_params():
"""获取音频参数"""
return {
'voice_name': config.ui.get("voice_name", ""),
'voice_volume': st.session_state.get('voice_volume', AudioVolumeDefaults.VOICE_VOLUME),
'voice_rate': st.session_state.get('voice_rate', 1.0),
'voice_pitch': st.session_state.get('voice_pitch', 1.0),
'bgm_type': st.session_state.get('bgm_type', 'random'),
'bgm_file': st.session_state.get('bgm_file', ''),
'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME),
}