From a39c11e0d5d5fca1f7789f1b9ed1eea5d1a0d67a Mon Sep 17 00:00:00 2001 From: linyq Date: Wed, 17 Sep 2025 00:08:01 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=85=BE=E8=AE=AFtts?= =?UTF-8?q?=E5=BC=95=E6=93=8E?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/models/schema.py | 2 +- app/services/voice.py | 8 ++++++-- webui/components/audio_settings.py | 17 ++++++++--------- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/app/models/schema.py b/app/models/schema.py index e0447b7..52e9aef 100644 --- a/app/models/schema.py +++ b/app/models/schema.py @@ -176,7 +176,7 @@ class VideoClipParams(BaseModel): voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量") voice_rate: Optional[float] = Field(default=1.0, description="语速") voice_pitch: Optional[float] = Field(default=1.0, description="语调") - tts_engine: Optional[str] = Field(default="tencent", description="TTS 引擎") + tts_engine: Optional[str] = Field(default="", description="TTS 引擎") bgm_name: Optional[str] = Field(default="random", description="背景音乐名称") bgm_type: Optional[str] = Field(default="random", description="背景音乐类型") bgm_file: Optional[str] = Field(default="", description="背景音乐文件") diff --git a/app/services/voice.py b/app/services/voice.py index a114534..355dfcf 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -1085,7 +1085,7 @@ def tts( ) -> Union[SubMaker, None]: logger.info(f"使用 TTS 引擎: '{tts_engine}', 语音: '{voice_name}'") - if tts_engine == "tencent": + if tts_engine == "tencent_tts": logger.info("分发到腾讯云 TTS") return tencent_tts(text, voice_name, voice_file, speed=voice_rate) @@ -1093,12 +1093,16 @@ def tts( logger.info("分发到 SoulVoice TTS") return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) - if tts_engine == "azure": + if tts_engine == "azure_speech": if should_use_azure_speech_services(voice_name): logger.info("分发到 Azure Speech Services (V2)") return azure_tts_v2(text, voice_name, voice_file) logger.info("分发到 Edge TTS (Azure V1)") return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) + + if tts_engine == "edge_tts": + logger.info("分发到 Edge TTS") + return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) # Fallback for unknown engine - default to azure v1 logger.warning(f"未知的 TTS 引擎: '{tts_engine}', 将默认使用 Edge TTS (Azure V1)。") diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index 368ce2e..d83a88d 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -112,6 +112,7 @@ def render_tts_settings(tr): # 保存TTS引擎选择 config.ui["tts_engine"] = selected_engine + st.session_state['tts_engine'] = selected_engine # 2. 显示引擎详细说明 if selected_engine in engine_descriptions: @@ -490,17 +491,14 @@ def render_soulvoice_engine_settings(tr): # 音色 URI 输入 voice_uri = st.text_input( "音色URI", - value=config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"), + value=config.soulvoice.get("voice_uri", "speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav"), help="请输入 SoulVoice 音色标识符", - placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" + placeholder="speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav" ) # 模型名称选择 model_options = [ - "FunAudioLLM/CosyVoice2-0.5B", - "FunAudioLLM/CosyVoice-300M", - "FunAudioLLM/CosyVoice-300M-SFT", - "FunAudioLLM/CosyVoice-300M-Instruct" + "FunAudioLLM/CosyVoice2-0.5B" ] saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") @@ -636,7 +634,7 @@ def render_soulvoice_settings(tr): saved_api_key = config.soulvoice.get("api_key", "") saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts") saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") - saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr") + saved_voice_uri = config.soulvoice.get("voice_uri", "speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav") # API Key 输入 api_key = st.text_input( @@ -650,8 +648,8 @@ def render_soulvoice_settings(tr): voice_uri = st.text_input( "音色 URI", value=saved_voice_uri, - help="请输入 SoulVoice 音色标识符,格式如:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr", - placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" + help="请输入 SoulVoice 音色标识符,格式如:speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav", + placeholder="speech:2c2hp73s:clzkyf4vy00e5qr6hywum4u84:itjmezhxyynkyzrhhjav" ) # API URL 输入(可选) @@ -822,4 +820,5 @@ def get_audio_params(): 'bgm_type': st.session_state.get('bgm_type', 'random'), 'bgm_file': st.session_state.get('bgm_file', ''), 'bgm_volume': st.session_state.get('bgm_volume', AudioVolumeDefaults.BGM_VOLUME), + 'tts_engine': st.session_state.get('tts_engine', "edge_tts"), }