优化腾讯tts引擎

This commit is contained in:
linyq 2025-09-17 00:08:01 +08:00
parent a1474bed02
commit a39c11e0d5
3 changed files with 15 additions and 12 deletions

View File

@ -176,7 +176,7 @@ class VideoClipParams(BaseModel):
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量") voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
voice_rate: Optional[float] = Field(default=1.0, description="语速") voice_rate: Optional[float] = Field(default=1.0, description="语速")
voice_pitch: Optional[float] = Field(default=1.0, description="语调") voice_pitch: Optional[float] = Field(default=1.0, description="语调")
tts_engine: Optional[str] = Field(default="tencent", description="TTS 引擎") tts_engine: Optional[str] = Field(default="", description="TTS 引擎")
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称") bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型") bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
bgm_file: Optional[str] = Field(default="", description="背景音乐文件") bgm_file: Optional[str] = Field(default="", description="背景音乐文件")

View File

@ -1085,7 +1085,7 @@ def tts(
) -> Union[SubMaker, None]: ) -> Union[SubMaker, None]:
logger.info(f"使用 TTS 引擎: '{tts_engine}', 语音: '{voice_name}'") logger.info(f"使用 TTS 引擎: '{tts_engine}', 语音: '{voice_name}'")
if tts_engine == "tencent": if tts_engine == "tencent_tts":
logger.info("分发到腾讯云 TTS") logger.info("分发到腾讯云 TTS")
return tencent_tts(text, voice_name, voice_file, speed=voice_rate) return tencent_tts(text, voice_name, voice_file, speed=voice_rate)
@ -1093,12 +1093,16 @@ def tts(
logger.info("分发到 SoulVoice TTS") logger.info("分发到 SoulVoice TTS")
return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate)
if tts_engine == "azure": if tts_engine == "azure_speech":
if should_use_azure_speech_services(voice_name): if should_use_azure_speech_services(voice_name):
logger.info("分发到 Azure Speech Services (V2)") logger.info("分发到 Azure Speech Services (V2)")
return azure_tts_v2(text, voice_name, voice_file) return azure_tts_v2(text, voice_name, voice_file)
logger.info("分发到 Edge TTS (Azure V1)") logger.info("分发到 Edge TTS (Azure V1)")
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
if tts_engine == "edge_tts":
logger.info("分发到 Edge TTS")
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
# Fallback for unknown engine - default to azure v1 # Fallback for unknown engine - default to azure v1
logger.warning(f"未知的 TTS 引擎: '{tts_engine}', 将默认使用 Edge TTS (Azure V1)。") logger.warning(f"未知的 TTS 引擎: '{tts_engine}', 将默认使用 Edge TTS (Azure V1)。")

View File

@ -112,6 +112,7 @@ def render_tts_settings(tr):
# 保存TTS引擎选择 # 保存TTS引擎选择
config.ui["tts_engine"] = selected_engine config.ui["tts_engine"] = selected_engine
st.session_state['tts_engine'] = selected_engine
# 2. 显示引擎详细说明 # 2. 显示引擎详细说明
if selected_engine in engine_descriptions: if selected_engine in engine_descriptions:
@ -490,17 +491,14 @@ def render_soulvoice_engine_settings(tr):
# 音色 URI 输入 # 音色 URI 输入
voice_uri = st.text_input( voice_uri = st.text_input(
"音色URI", "音色URI",
value=config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"), value=config.soulvoice.get("voice_uri", "speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav"),
help="请输入 SoulVoice 音色标识符", help="请输入 SoulVoice 音色标识符",
placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" placeholder="speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav"
) )
# 模型名称选择 # 模型名称选择
model_options = [ model_options = [
"FunAudioLLM/CosyVoice2-0.5B", "FunAudioLLM/CosyVoice2-0.5B"
"FunAudioLLM/CosyVoice-300M",
"FunAudioLLM/CosyVoice-300M-SFT",
"FunAudioLLM/CosyVoice-300M-Instruct"
] ]
saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
@ -636,7 +634,7 @@ def render_soulvoice_settings(tr):
saved_api_key = config.soulvoice.get("api_key", "") saved_api_key = config.soulvoice.get("api_key", "")
saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts") saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr") saved_voice_uri = config.soulvoice.get("voice_uri", "speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav")
# API Key 输入 # API Key 输入
api_key = st.text_input( api_key = st.text_input(
@ -650,8 +648,8 @@ def render_soulvoice_settings(tr):
voice_uri = st.text_input( voice_uri = st.text_input(
"音色 URI", "音色 URI",
value=saved_voice_uri, value=saved_voice_uri,
help="请输入 SoulVoice 音色标识符格式如speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr", help="请输入 SoulVoice 音色标识符格式如speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav",
placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" placeholder="speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav"
) )
# API URL 输入(可选) # API URL 输入(可选)
@ -822,4 +820,5 @@ def get_audio_params():
'bgm_type': st.session_state.get('bgm_type', 'random'), 'bgm_type': st.session_state.get('bgm_type', 'random'),
'bgm_file': st.session_state.get('bgm_file', ''), 'bgm_file': st.session_state.get('bgm_file', ''),
'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME), 'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME),
'tts_engine': st.session_state.get('tts_engine', "edge_tts"),
} }