feat(tts): 添加多引擎TTS支持并重构语音设置界面

- 新增Azure Speech Services和Edge TTS引擎支持 - 重构语音设置界面，支持不同引擎的独立配置 - 添加引擎选择器和详细说明 - 更新requirements.txt添加azure-cognitiveservices-speech依赖 - 改进音色名称验证逻辑
2026-03-13 23:31:15 +00:00 · 2025-08-03 18:45:33 +08:00 · 2025-08-03 18:45:33 +08:00 · 062d317261
commit 062d317261
parent e59fd6030c
4 changed files with 507 additions and 78 deletions
--- a/app/services/voice.py
+++ b/app/services/voice.py
@ -1058,6 +1058,27 @@ def is_azure_v2_voice(voice_name: str):
    return ""


+def should_use_azure_speech_services(voice_name: str) -> bool:
+    """判断音色是否应该使用Azure Speech Services"""
+    if not voice_name or is_soulvoice_voice(voice_name):
+        return False
+
+    voice_name = voice_name.strip()
+
+    # 如果是带-V2后缀的，肯定是Azure Speech Services
+    if voice_name.endswith("-V2"):
+        return True
+
+    # 检查是否为Azure官方音色格式 (如: zh-CN-YunzeNeural)
+    # Azure音色通常格式为: [语言]-[地区]-[名称]Neural
+    import re
+    pattern = r'^[a-z]{2}-[A-Z]{2}-\w+Neural$'
+    if re.match(pattern, voice_name):
+        return True
+
+    return False
+
+
 def tts(
    text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
 ) -> Union[SubMaker, None]:
@ -1065,11 +1086,11 @@ def tts(
    if is_soulvoice_voice(voice_name):
        return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate)

-    # 检查是否为 Azure V2 引擎
-    if is_azure_v2_voice(voice_name):
+    # 检查是否应该使用 Azure Speech Services
+    if should_use_azure_speech_services(voice_name):
        return azure_tts_v2(text, voice_name, voice_file)

-    # 默认使用 Azure V1 引擎
+    # 默认使用 Edge TTS (Azure V1)
    return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)


@ -1140,12 +1161,22 @@ def azure_tts_v1(


 def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
-    voice_name = is_azure_v2_voice(voice_name)
-    if not voice_name:
-        logger.error(f"invalid voice name: {voice_name}")
-        raise ValueError(f"invalid voice name: {voice_name}")
+    # 直接使用官方音色名称，不需要V2后缀验证
+    # Azure Speech Services 的音色名称如: zh-CN-YunzeNeural, en-US-AvaMultilingualNeural
+    processed_voice_name = voice_name.strip()
+    if not processed_voice_name:
+        logger.error(f"invalid voice name: {voice_name} (empty)")
+        raise ValueError(f"invalid voice name: {voice_name} (empty)")
    text = text.strip()

+    # 检查Azure Speech SDK是否可用
+    try:
+        import azure.cognitiveservices.speech as speechsdk
+    except ImportError as e:
+        logger.error("Azure Speech SDK 未安装。请运行: pip install azure-cognitiveservices-speech")
+        logger.error("或者使用 Edge TTS 引擎作为替代方案")
+        return None
+
    def _format_duration_to_offset(duration) -> int:
        if isinstance(duration, str):
            time_obj = datetime.strptime(duration, "%H:%M:%S.%f")
@ -1164,9 +1195,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker,

    for i in range(3):
        try:
-            logger.info(f"start, voice name: {voice_name}, try: {i + 1}")
-
-            import azure.cognitiveservices.speech as speechsdk
+            logger.info(f"start, voice name: {processed_voice_name}, try: {i + 1}")

            sub_maker = SubMaker()

@ -1185,7 +1214,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker,
            speech_config = speechsdk.SpeechConfig(
                subscription=speech_key, region=service_region
            )
-            speech_config.speech_synthesis_voice_name = voice_name
+            speech_config.speech_synthesis_voice_name = processed_voice_name
            # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary,
            #                            value='true')
            speech_config.set_property(
--- a/config.example.toml
+++ b/config.example.toml
@ -92,6 +92,22 @@
    # 默认模型（可选）
    model = "FunAudioLLM/CosyVoice2-0.5B"

+[ui]
+    # TTS引擎选择 (edge_tts, azure_speech, soulvoice)
+    tts_engine = "edge_tts"
+
+    # Edge TTS 配置
+    edge_voice_name = "zh-CN-XiaoyiNeural-Female"
+    edge_volume = 80
+    edge_rate = 1.0
+    edge_pitch = 0
+
+    # Azure Speech Services 配置
+    azure_voice_name = "zh-CN-XiaoyiNeural-Female"
+    azure_volume = 80
+    azure_rate = 1.0
+    azure_pitch = 0
+
 [proxy]
    # clash 默认地址：http://127.0.0.1:7890
    http = ""
--- a/requirements.txt
+++ b/requirements.txt
@ -29,7 +29,7 @@ google-generativeai>=0.8.5
 # python-multipart~=0.0.9
 # redis==5.0.3
 # opencv-python~=4.10.0.84
-# azure-cognitiveservices-speech~=1.37.0
+azure-cognitiveservices-speech~=1.37.0
 # git-changelog~=2.5.2
 # watchdog==5.0.2
 # pydub==0.25.1
--- a/webui/components/audio_settings.py
+++ b/webui/components/audio_settings.py
@ -19,6 +19,53 @@ def get_soulvoice_voices():
    return ["soulvoice:custom"]


+def get_tts_engine_options():
+    """获取TTS引擎选项"""
+    return {
+        "edge_tts": "Edge TTS",
+        "azure_speech": "Azure Speech Services",
+        "soulvoice": "SoulVoice"
+    }
+
+
+def get_tts_engine_descriptions():
+    """获取TTS引擎详细描述"""
+    return {
+        "edge_tts": {
+            "title": "Edge TTS",
+            "features": "完全免费，但服务稳定性一般，不支持语音克隆功能",
+            "use_case": "测试和轻量级使用",
+            "registration": None
+        },
+        "azure_speech": {
+            "title": "Azure Speech Services",
+            "features": "提供一定免费额度，超出后按量付费，需要绑定海外信用卡",
+            "use_case": "企业级应用，需要稳定服务",
+            "registration": "https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices"
+        },
+        "soulvoice": {
+            "title": "SoulVoice",
+            "features": "提供免费额度，支持语音克隆，支持微信购买额度，无需信用卡，性价比极高",
+            "use_case": "个人用户和中小企业，需要语音克隆功能",
+            "registration": "https://soulvoice.scsmtech.cn/"
+        }
+    }
+
+
+def is_valid_azure_voice_name(voice_name: str) -> bool:
+    """检查是否为有效的Azure音色名称格式"""
+    if not voice_name or not isinstance(voice_name, str):
+        return False
+
+    voice_name = voice_name.strip()
+
+    # Azure音色名称通常格式为: [语言]-[地区]-[名称]Neural
+    # 例如: zh-CN-YunzeNeural, en-US-AvaMultilingualNeural
+    import re
+    pattern = r'^[a-z]{2}-[A-Z]{2}-\w+Neural$'
+    return bool(re.match(pattern, voice_name))
+
+
 def render_audio_panel(tr):
    """渲染音频设置面板"""
    with st.container(border=True):
@ -33,46 +80,91 @@ def render_audio_panel(tr):

 def render_tts_settings(tr):
    """渲染TTS(文本转语音)设置"""
+
+    # 1. TTS引擎选择器
+    # st.subheader("🎤 TTS引擎选择")
+
+    engine_options = get_tts_engine_options()
+    engine_descriptions = get_tts_engine_descriptions()
+
+    # 获取保存的TTS引擎设置
+    saved_tts_engine = config.ui.get("tts_engine", "edge_tts")
+
+    # 确保保存的引擎在可用选项中
+    if saved_tts_engine not in engine_options:
+        saved_tts_engine = "edge_tts"
+
+    # TTS引擎选择下拉框
+    selected_engine = st.selectbox(
+        "选择TTS引擎",
+        options=list(engine_options.keys()),
+        format_func=lambda x: engine_options[x],
+        index=list(engine_options.keys()).index(saved_tts_engine),
+        help="选择您要使用的文本转语音引擎"
+    )
+
+    # 保存TTS引擎选择
+    config.ui["tts_engine"] = selected_engine
+
+    # 2. 显示引擎详细说明
+    if selected_engine in engine_descriptions:
+        desc = engine_descriptions[selected_engine]
+
+        with st.expander(f"📋 {desc['title']} 详细说明", expanded=True):
+            st.markdown(f"**特点：** {desc['features']}")
+            st.markdown(f"**适用场景：** {desc['use_case']}")
+
+            if desc['registration']:
+                st.markdown(f"**注册地址：** [{desc['registration']}]({desc['registration']})")
+
+    # 3. 根据选择的引擎渲染对应的配置界面
+    # st.subheader("⚙️ 引擎配置")
+
+    if selected_engine == "edge_tts":
+        render_edge_tts_settings(tr)
+    elif selected_engine == "azure_speech":
+        render_azure_speech_settings(tr)
+    elif selected_engine == "soulvoice":
+        render_soulvoice_engine_settings(tr)
+
+    # 4. 试听功能
+    render_voice_preview_new(tr, selected_engine)
+
+
+def render_edge_tts_settings(tr):
+    """渲染 Edge TTS 引擎设置"""
    # 获取支持的语音列表
    support_locales = ["zh-CN", "en-US"]
-    azure_voices = voice.get_all_azure_voices(filter_locals=support_locales)
+    all_voices = voice.get_all_azure_voices(filter_locals=support_locales)

-    # 添加 SoulVoice 语音选项
-    soulvoice_voices = get_soulvoice_voices()
-
-    # 合并所有语音选项
-    all_voices = azure_voices + soulvoice_voices
+    # 只保留标准版本的语音（Edge TTS专用，不包含V2）
+    edge_voices = [v for v in all_voices if "-V2" not in v]

    # 创建友好的显示名称
    friendly_names = {}
-
-    # Azure 语音的友好名称
-    for v in azure_voices:
+    for v in edge_voices:
        friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "")

-    # SoulVoice 语音的友好名称
-    for v in soulvoice_voices:
-        friendly_names[v] = "SoulVoice (自定义音色)"
-
    # 获取保存的语音设置
-    saved_voice_name = config.ui.get("voice_name", "")
-    saved_voice_name_index = 0
+    saved_voice_name = config.ui.get("edge_voice_name", "zh-CN-XiaoxiaoNeural-Female")

-    if saved_voice_name in friendly_names:
-        saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
-    else:
-        # 如果没有保存的设置，选择与UI语言匹配的第一个语音
-        for i, v in enumerate(all_voices):
-            if (v.lower().startswith(st.session_state["ui_language"].lower())
-                    and "V2" not in v and not v.startswith("soulvoice:")):
-                saved_voice_name_index = i
+    # 确保保存的音色在可用列表中
+    if saved_voice_name not in friendly_names:
+        # 选择与UI语言匹配的第一个语音
+        for v in edge_voices:
+            if v.lower().startswith(st.session_state.get("ui_language", "zh-CN").lower()):
+                saved_voice_name = v
                break
+        else:
+            # 如果没找到匹配的，使用第一个
+            saved_voice_name = edge_voices[0] if edge_voices else ""

-    # 语音选择下拉框
+    # 音色选择下拉框（Edge TTS音色相对较少，保留下拉框）
    selected_friendly_name = st.selectbox(
-        tr("Speech Synthesis"),
+        "音色选择",
        options=list(friendly_names.values()),
-        index=saved_voice_name_index,
+        index=list(friendly_names.keys()).index(saved_voice_name) if saved_voice_name in friendly_names else 0,
+        help="选择Edge TTS音色"
    )

    # 获取实际的语音名称
@ -80,34 +172,342 @@ def render_tts_settings(tr):
        list(friendly_names.values()).index(selected_friendly_name)
    ]

-    # 如果选择的是 SoulVoice 自定义选项，使用配置的音色 URI
-    if voice_name == "soulvoice:custom":
-        custom_voice_uri = config.soulvoice.get("voice_uri", "")
-        if custom_voice_uri:
-            # 确保音色 URI 有正确的前缀
-            if not custom_voice_uri.startswith("soulvoice:") and not custom_voice_uri.startswith("speech:"):
-                voice_name = f"soulvoice:{custom_voice_uri}"
+    # 显示音色信息
+    with st.expander("💡 Edge TTS 音色说明", expanded=False):
+        st.write("**中文音色：**")
+        zh_voices = [v for v in edge_voices if v.startswith("zh-CN")]
+        for v in zh_voices:
+            gender = "女声" if "Female" in v else "男声"
+            name = v.replace("-Female", "").replace("-Male", "").replace("zh-CN-", "").replace("Neural", "")
+            st.write(f"• {name} ({gender})")
+
+        st.write("")
+        st.write("**英文音色：**")
+        en_voices = [v for v in edge_voices if v.startswith("en-US")][:5]  # 只显示前5个
+        for v in en_voices:
+            gender = "女声" if "Female" in v else "男声"
+            name = v.replace("-Female", "").replace("-Male", "").replace("en-US-", "").replace("Neural", "")
+            st.write(f"• {name} ({gender})")
+
+        if len([v for v in edge_voices if v.startswith("en-US")]) > 5:
+            st.write("• ... 更多英文音色")
+
+    config.ui["edge_voice_name"] = voice_name
+    config.ui["voice_name"] = voice_name  # 兼容性
+
+    # 音量调节
+    voice_volume = st.slider(
+        "音量调节",
+        min_value=0,
+        max_value=100,
+        value=int(config.ui.get("edge_volume", 80)),
+        step=1,
+        help="调节语音音量 (0-100)"
+    )
+    config.ui["edge_volume"] = voice_volume
+    st.session_state['voice_volume'] = voice_volume / 100.0
+
+    # 语速调节
+    voice_rate = st.slider(
+        "语速调节",
+        min_value=0.5,
+        max_value=2.0,
+        value=config.ui.get("edge_rate", 1.0),
+        step=0.1,
+        help="调节语音速度 (0.5-2.0倍速)"
+    )
+    config.ui["edge_rate"] = voice_rate
+    st.session_state['voice_rate'] = voice_rate
+
+    # 语调调节
+    voice_pitch = st.slider(
+        "语调调节",
+        min_value=-50,
+        max_value=50,
+        value=int(config.ui.get("edge_pitch", 0)),
+        step=5,
+        help="调节语音音调 (-50%到+50%)"
+    )
+    config.ui["edge_pitch"] = voice_pitch
+    # 转换为比例值
+    st.session_state['voice_pitch'] = 1.0 + (voice_pitch / 100.0)
+
+
+def render_azure_speech_settings(tr):
+    """渲染 Azure Speech Services 引擎设置"""
+    # 服务区域配置
+    azure_speech_region = st.text_input(
+        "服务区域",
+        value=config.azure.get("speech_region", ""),
+        placeholder="例如：eastus",
+        help="Azure Speech Services 服务区域，如：eastus, westus2, eastasia 等"
+    )
+
+    # API Key配置
+    azure_speech_key = st.text_input(
+        "API Key",
+        value=config.azure.get("speech_key", ""),
+        type="password",
+        help="Azure Speech Services API 密钥"
+    )
+
+    # 保存Azure配置
+    config.azure["speech_region"] = azure_speech_region
+    config.azure["speech_key"] = azure_speech_key
+
+    # 音色名称输入框
+    saved_voice_name = config.ui.get("azure_voice_name", "zh-CN-XiaoxiaoMultilingualNeural")
+
+    # 音色名称输入
+    voice_name = st.text_input(
+        "音色名称",
+        value=saved_voice_name,
+        help="输入Azure Speech Services音色名称，直接使用官方音色名称即可。例如：zh-CN-YunzeNeural",
+        placeholder="zh-CN-YunzeNeural"
+    )
+
+    # 显示常用音色示例
+    with st.expander("💡 常用音色参考", expanded=False):
+        st.write("**中文音色：**")
+        st.write("• zh-CN-XiaoxiaoMultilingualNeural (女声，多语言)")
+        st.write("• zh-CN-YunzeNeural (男声)")
+        st.write("• zh-CN-YunxiNeural (男声)")
+        st.write("• zh-CN-XiaochenNeural (女声)")
+        st.write("")
+        st.write("**英文音色：**")
+        st.write("• en-US-AndrewMultilingualNeural (男声，多语言)")
+        st.write("• en-US-AvaMultilingualNeural (女声，多语言)")
+        st.write("• en-US-BrianMultilingualNeural (男声，多语言)")
+        st.write("• en-US-EmmaMultilingualNeural (女声，多语言)")
+        st.write("")
+        st.info("💡 更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)")
+
+    # 快速选择按钮
+    st.write("**快速选择：**")
+    cols = st.columns(3)
+    with cols[0]:
+        if st.button("中文女声", help="zh-CN-XiaoxiaoMultilingualNeural"):
+            voice_name = "zh-CN-XiaoxiaoMultilingualNeural"
+            st.rerun()
+    with cols[1]:
+        if st.button("中文男声", help="zh-CN-YunzeNeural"):
+            voice_name = "zh-CN-YunzeNeural"
+            st.rerun()
+    with cols[2]:
+        if st.button("英文女声", help="en-US-AvaMultilingualNeural"):
+            voice_name = "en-US-AvaMultilingualNeural"
+            st.rerun()
+
+    # 验证音色名称并显示状态
+    if voice_name.strip():
+        # 检查是否为有效的Azure音色格式
+        if is_valid_azure_voice_name(voice_name):
+            st.success(f"✅ 音色名称有效: {voice_name}")
+        else:
+            st.warning(f"⚠️ 音色名称格式可能不正确: {voice_name}")
+            st.info("💡 Azure音色名称通常格式为: [语言]-[地区]-[名称]Neural")
+
+    # 保存配置
+    config.ui["azure_voice_name"] = voice_name
+    config.ui["voice_name"] = voice_name  # 兼容性
+
+    # 音量调节
+    voice_volume = st.slider(
+        "音量调节",
+        min_value=0,
+        max_value=100,
+        value=int(config.ui.get("azure_volume", 80)),
+        step=1,
+        help="调节语音音量 (0-100)"
+    )
+    config.ui["azure_volume"] = voice_volume
+    st.session_state['voice_volume'] = voice_volume / 100.0
+
+    # 语速调节
+    voice_rate = st.slider(
+        "语速调节",
+        min_value=0.5,
+        max_value=2.0,
+        value=config.ui.get("azure_rate", 1.0),
+        step=0.1,
+        help="调节语音速度 (0.5-2.0倍速)"
+    )
+    config.ui["azure_rate"] = voice_rate
+    st.session_state['voice_rate'] = voice_rate
+
+    # 语调调节
+    voice_pitch = st.slider(
+        "语调调节",
+        min_value=-50,
+        max_value=50,
+        value=int(config.ui.get("azure_pitch", 0)),
+        step=5,
+        help="调节语音音调 (-50%到+50%)"
+    )
+    config.ui["azure_pitch"] = voice_pitch
+    # 转换为比例值
+    st.session_state['voice_pitch'] = 1.0 + (voice_pitch / 100.0)
+
+    # 显示配置状态
+    if azure_speech_region and azure_speech_key:
+        st.success("✅ Azure Speech Services 配置已设置")
+    elif not azure_speech_region:
+        st.warning("⚠️ 请配置服务区域")
+    elif not azure_speech_key:
+        st.warning("⚠️ 请配置 API Key")
+
+
+def render_soulvoice_engine_settings(tr):
+    """渲染 SoulVoice 引擎设置"""
+    # API Key 输入
+    api_key = st.text_input(
+        "API Key",
+        value=config.soulvoice.get("api_key", ""),
+        type="password",
+        help="请输入您的 SoulVoice API 密钥"
+    )
+
+    # 音色 URI 输入
+    voice_uri = st.text_input(
+        "音色URI",
+        value=config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"),
+        help="请输入 SoulVoice 音色标识符",
+        placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
+    )
+
+    # 模型名称选择
+    model_options = [
+        "FunAudioLLM/CosyVoice2-0.5B",
+        "FunAudioLLM/CosyVoice-300M",
+        "FunAudioLLM/CosyVoice-300M-SFT",
+        "FunAudioLLM/CosyVoice-300M-Instruct"
+    ]
+
+    saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
+    if saved_model not in model_options:
+        model_options.append(saved_model)
+
+    model = st.selectbox(
+        "模型名称",
+        options=model_options,
+        index=model_options.index(saved_model),
+        help="选择使用的 TTS 模型"
+    )
+
+    # 高级设置
+    with st.expander("高级设置", expanded=False):
+        api_url = st.text_input(
+            "API 地址",
+            value=config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts"),
+            help="SoulVoice API 接口地址"
+        )
+
+    # 保存配置
+    config.soulvoice["api_key"] = api_key
+    config.soulvoice["voice_uri"] = voice_uri
+    config.soulvoice["model"] = model
+    config.soulvoice["api_url"] = api_url
+
+    # 设置兼容性配置
+    if voice_uri:
+        # 确保音色 URI 有正确的前缀
+        if not voice_uri.startswith("soulvoice:") and not voice_uri.startswith("speech:"):
+            voice_name = f"soulvoice:{voice_uri}"
+        else:
+            voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}"
+        config.ui["voice_name"] = voice_name
+
+    # 显示配置状态
+    if api_key and voice_uri:
+        st.success("✅ SoulVoice 配置已设置")
+    elif not api_key:
+        st.warning("⚠️ 请配置 SoulVoice API Key")
+    elif not voice_uri:
+        st.warning("⚠️ 请配置音色 URI")
+
+
+def render_voice_preview_new(tr, selected_engine):
+    """渲染新的语音试听功能"""
+    if st.button("🎵 试听语音合成", use_container_width=True):
+        play_content = "感谢关注 NarratoAI，有任何问题或建议，可以关注微信公众号，求助或讨论"
+
+        # 根据选择的引擎获取对应的语音配置
+        voice_name = ""
+        voice_rate = 1.0
+        voice_pitch = 1.0
+
+        if selected_engine == "edge_tts":
+            voice_name = config.ui.get("edge_voice_name", "zh-CN-XiaoyiNeural-Female")
+            voice_rate = config.ui.get("edge_rate", 1.0)
+            voice_pitch = 1.0 + (config.ui.get("edge_pitch", 0) / 100.0)
+        elif selected_engine == "azure_speech":
+            voice_name = config.ui.get("azure_voice_name", "zh-CN-XiaoxiaoMultilingualNeural")
+            voice_rate = config.ui.get("azure_rate", 1.0)
+            voice_pitch = 1.0 + (config.ui.get("azure_pitch", 0) / 100.0)
+        elif selected_engine == "soulvoice":
+            voice_uri = config.soulvoice.get("voice_uri", "")
+            if voice_uri:
+                if not voice_uri.startswith("soulvoice:") and not voice_uri.startswith("speech:"):
+                    voice_name = f"soulvoice:{voice_uri}"
+                else:
+                    voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}"
+            voice_rate = 1.0  # SoulVoice 使用默认语速
+            voice_pitch = 1.0  # SoulVoice 不支持音调调节
+
+        if not voice_name:
+            st.error("请先配置语音设置")
+            return
+
+        with st.spinner("正在合成语音..."):
+            temp_dir = utils.storage_dir("temp", create=True)
+            audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3")
+
+            sub_maker = voice.tts(
+                text=play_content,
+                voice_name=voice_name,
+                voice_rate=voice_rate,
+                voice_pitch=voice_pitch,
+                voice_file=audio_file,
+            )
+
+            if sub_maker and os.path.exists(audio_file):
+                st.success("✅ 语音合成成功！")
+
+                # 播放音频
+                with open(audio_file, 'rb') as audio_file_obj:
+                    audio_bytes = audio_file_obj.read()
+                    st.audio(audio_bytes, format='audio/mp3')
+
+                # 清理临时文件
+                try:
+                    os.remove(audio_file)
+                except:
+                    pass
            else:
-                voice_name = custom_voice_uri if custom_voice_uri.startswith("soulvoice:") else f"soulvoice:{custom_voice_uri}"
+                st.error("❌ 语音合成失败，请检查配置")

-    # 保存设置
-    config.ui["voice_name"] = voice_name

-    # 根据语音类型渲染不同的设置
-    if voice.is_soulvoice_voice(voice_name):
-        render_soulvoice_settings(tr)
-    elif voice.is_azure_v2_voice(voice_name):
-        render_azure_v2_settings(tr)
+def render_azure_v2_settings(tr):
+    """渲染Azure V2语音设置（保留兼容性）"""
+    saved_azure_speech_region = config.azure.get("speech_region", "")
+    saved_azure_speech_key = config.azure.get("speech_key", "")

-    # 语音参数设置
-    render_voice_parameters(tr, voice_name)
+    azure_speech_region = st.text_input(
+        tr("Speech Region"),
+        value=saved_azure_speech_region
+    )
+    azure_speech_key = st.text_input(
+        tr("Speech Key"),
+        value=saved_azure_speech_key,
+        type="password"
+    )

-    # 试听按钮
-    render_voice_preview(tr, voice_name)
+    config.azure["speech_region"] = azure_speech_region
+    config.azure["speech_key"] = azure_speech_key


 def render_soulvoice_settings(tr):
-    """渲染 SoulVoice 语音设置"""
+    """渲染 SoulVoice 语音设置（保留兼容性）"""
    saved_api_key = config.soulvoice.get("api_key", "")
    saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
    saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
@ -158,27 +558,8 @@ def render_soulvoice_settings(tr):
        st.warning("⚠️ 请配置音色 URI")


-def render_azure_v2_settings(tr):
-    """渲染Azure V2语音设置"""
-    saved_azure_speech_region = config.azure.get("speech_region", "")
-    saved_azure_speech_key = config.azure.get("speech_key", "")
-
-    azure_speech_region = st.text_input(
-        tr("Speech Region"),
-        value=saved_azure_speech_region
-    )
-    azure_speech_key = st.text_input(
-        tr("Speech Key"),
-        value=saved_azure_speech_key,
-        type="password"
-    )
-
-    config.azure["speech_region"] = azure_speech_region
-    config.azure["speech_key"] = azure_speech_key
-
-
 def render_voice_parameters(tr, voice_name):
-    """渲染语音参数设置"""
+    """渲染语音参数设置（保留兼容性）"""
    # 音量 - 使用统一的默认值
    voice_volume = st.slider(
        tr("Speech Volume"),
@ -260,9 +641,12 @@ def render_voice_preview(tr, voice_name):
                )

            if sub_maker and os.path.exists(audio_file):
+                st.success(tr("Voice synthesis successful"))
                st.audio(audio_file, format="audio/mp3")
                if os.path.exists(audio_file):
                    os.remove(audio_file)
+            else:
+                st.error(tr("Voice synthesis failed"))


 def render_bgm_settings(tr):