diff --git a/app/config/config.py b/app/config/config.py index 4b02d7c..cb9d1fe 100644 --- a/app/config/config.py +++ b/app/config/config.py @@ -51,6 +51,7 @@ def save_config(): _cfg["tencent"] = tencent _cfg["soulvoice"] = soulvoice _cfg["ui"] = ui + _cfg["tts_qwen"] = tts_qwen f.write(toml.dumps(_cfg)) @@ -63,6 +64,7 @@ tencent = _cfg.get("tencent", {}) soulvoice = _cfg.get("soulvoice", {}) ui = _cfg.get("ui", {}) frames = _cfg.get("frames", {}) +tts_qwen = _cfg.get("tts_qwen", {}) hostname = socket.gethostname() diff --git a/app/services/voice.py b/app/services/voice.py index db64187..fdc07c8 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -1089,6 +1089,10 @@ def tts( logger.info("分发到腾讯云 TTS") return tencent_tts(text, voice_name, voice_file, speed=voice_rate) + if tts_engine == "qwen3_tts": + logger.info("分发到 Qwen3 TTS", voice_name) + return qwen3_tts(text, voice_name, voice_file, speed=voice_rate) + if tts_engine == "soulvoice": logger.info("分发到 SoulVoice TTS") return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) @@ -1538,7 +1542,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f continue else: # SoulVoice 引擎不生成字幕文件 - if is_soulvoice_voice(voice_name): + if is_soulvoice_voice(voice_name) or is_qwen_engine(tts_engine): # 获取实际音频文件的时长 duration = get_audio_duration_from_file(audio_file) if duration <= 0: @@ -1619,6 +1623,111 @@ def parse_tencent_voice(voice_name: str) -> str: return voice_name +def parse_qwen3_voice(voice_name: str) -> str: + """ + 解析 Qwen3 语音名称 + """ + if isinstance(voice_name, str) and voice_name.startswith("qwen3:"): + return voice_name[6:] + return voice_name + + +def qwen3_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]: + """ + 使用通义千问 Qwen3 TTS 生成语音(仅使用 DashScope SDK) + """ + # 读取配置 + tts_qwen_cfg = getattr(config, "tts_qwen", {}) or {} + api_key = tts_qwen_cfg.get("api_key", "") + model_name = tts_qwen_cfg.get("model_name", "qwen3-tts-flash") + if not api_key: + logger.error("Qwen3 TTS API key 未配置") + return None + + # 准备参数 + voice_type = parse_qwen3_voice(voice_name) + safe_speed = float(max(0.5, min(2.0, speed))) + text = text.strip() + + + + # SDK 调用 + try: + import dashscope + except ImportError: + logger.error("未安装 dashscope SDK,请执行: pip install dashscope") + return None + except Exception as e: + logger.error(f"DashScope SDK 初始化失败: {e}") + return None + + # Qwen3 TTS 直接使用英文参数,不需要映射 + mapped_voice = voice_type or "Cherry" + + for i in range(3): + try: + # 打印详细的请求参数日志 + logger.info(f"=== Qwen3 TTS 请求参数 (第 {i+1} 次调用) ===") + + # 官方推荐:使用 MultiModalConversation.call + result = dashscope.MultiModalConversation.call( + # 仅支持 qwen-tts 系列模型 + model=(model_name or "qwen3-tts-flash"), + # 同时显式传入 api_key,并兼容示例中从环境变量读取 + api_key=api_key, + text=text, + voice=mapped_voice + ) + logger.info(f"Qwen3 TTS API 响应: {result}") + + + audio_bytes: bytes | None = None + + # 解析返回结果,提取音频URL并下载 + try:# 假设 result 是你收到的字符串 + audio_url = None + + if result.output and result.output.audio: + audio_url = result.output.audio.url + # 从响应中提取音频URL + + if audio_url: + # 直接下载音频文件 + response = requests.get(audio_url, timeout=30) + response.raise_for_status() + audio_bytes = response.content + else: + logger.warning("API响应中未找到音频URL") + + except Exception as e: + logger.error(f"解析API响应失败: {str(e)}") + + if not audio_bytes: + logger.warning("DashScope SDK 返回空音频数据,重试") + if i < 2: + time.sleep(1) + continue + + # 写入文件 + with open(voice_file, "wb") as f: + f.write(audio_bytes) + + # 估算字幕 + sub = SubMaker() + est_ms = max(800, int(len(text) * 180)) + sub.create_sub((0, est_ms), text) + + logger.info(f"Qwen3 TTS 生成成功(DashScope SDK),文件大小: {len(audio_bytes)} 字节") + return sub + + except Exception as e: + logger.error(f"DashScope SDK 合成失败: {e}") + if i < 2: + time.sleep(1) + + return None + + def tencent_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]: """ 使用腾讯云 TTS 生成语音 @@ -1819,6 +1928,8 @@ def is_soulvoice_voice(voice_name: str) -> bool: """ return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:") +def is_qwen_engine(tts_engine: str) -> bool: + return tts_engine == "qwen3_tts" def parse_soulvoice_voice(voice_name: str) -> str: """ diff --git a/config.example.toml b/config.example.toml index 6e097fc..00470ba 100644 --- a/config.example.toml +++ b/config.example.toml @@ -114,8 +114,14 @@ # 默认模型(可选) model = "FunAudioLLM/CosyVoice2-0.5B" +[tts_qwen] + # 通义千问 Qwen3 TTS 配置 + # 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥 + api_key = "" + model_name = "qwen3-tts-flash" + [ui] - # TTS引擎选择 (edge_tts, azure_speech, soulvoice, tencent_tts) + # TTS引擎选择 (edge_tts, azure_speech, soulvoice, tencent_tts, tts_qwen) tts_engine = "edge_tts" # Edge TTS 配置 diff --git a/requirements.txt b/requirements.txt index 640251e..6d5e86a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,7 @@ openai>=1.77.0 google-generativeai>=0.8.5 azure-cognitiveservices-speech>=1.37.0 tencentcloud-sdk-python>=3.0.1200 +dashscope>=1.24.6 # 图像处理依赖 Pillow>=10.3.0 diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index ddee80d..a9969c5 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -1,3 +1,4 @@ +from venv import logger import streamlit as st import os from uuid import uuid4 @@ -24,7 +25,8 @@ def get_tts_engine_options(): return { "edge_tts": "Edge TTS", "azure_speech": "Azure Speech Services", - "tencent_tts": "腾讯云 TTS" + "tencent_tts": "腾讯云 TTS", + "qwen3_tts": "通义千问 Qwen3 TTS" } @@ -48,6 +50,12 @@ def get_tts_engine_descriptions(): "features": "提供免费额度,音质优秀,支持多种音色,国内访问速度快", "use_case": "个人和企业用户,需要稳定的中文语音合成", "registration": "https://console.cloud.tencent.com/tts" + }, + "qwen3_tts": { + "title": "通义千问 Qwen3 TTS", + "features": "阿里云通义千问语音合成,音质优秀,支持多种音色", + "use_case": "需要高质量中文语音合成的用户", + "registration": "https://dashscope.aliyuncs.com/" } } @@ -129,6 +137,8 @@ def render_tts_settings(tr): render_soulvoice_engine_settings(tr) elif selected_engine == "tencent_tts": render_tencent_tts_settings(tr) + elif selected_engine == "qwen3_tts": + render_qwen3_tts_settings(tr) # 4. 试听功能 render_voice_preview_new(tr, selected_engine) @@ -469,8 +479,87 @@ def render_tencent_tts_settings(tr): config.tencent["region"] = region config.ui["tencent_voice_type"] = voice_type config.ui["tencent_rate"] = voice_rate + config.ui["voice_name"] = saved_voice_type #兼容性 +def render_qwen3_tts_settings(tr): + """渲染 Qwen3 TTS 设置""" + api_key = st.text_input( + "API Key", + value=config.tts_qwen.get("api_key", ""), + type="password", + help="通义千问 DashScope API Key" + ) + + model_name = st.text_input( + "模型名称", + value=config.tts_qwen.get("model_name", "qwen3-tts-flash"), + help="Qwen TTS 模型名,例如 qwen3-tts-flash" + ) + + # Qwen3 TTS 音色选项 - 中文名: 英文参数 + voice_options = { + "芊悦": "Cherry", + "晨煦": "Ethan", + "不吃鱼": "Nofish", + "詹妮弗": "Jennifer", + "甜茶": "Ryan", + "卡捷琳娜": "Katerina", + "墨讲师": "Elias", + "上海-阿珍": "Jada", + "北京-晓东": "Dylan", + "四川-晴儿": "Sunny", + "南京-老李": "Li", + "陕西-秦川": "Marcus", + "闽南-阿杰": "Roy", + "天津-李彼得": "Peter", + "粤语-阿强": "Rocky", + "粤语-阿清": "Kiki", + "四川-程川": "Eric" + } + + # 显示给用户的中文名称列表 + display_names = list(voice_options.keys()) + saved_voice_param = config.ui.get("qwen_voice_type", "Cherry") + + # 如果保存的英文参数不在选项中,查找对应的中文名称 + saved_display_name = "芊悦" # 默认值 + for chinese_name, english_param in voice_options.items(): + if english_param == saved_voice_param: + saved_display_name = chinese_name + break + + # 如果保存的音色不在选项中,添加到自定义选项 + if saved_display_name not in display_names: + display_names.append(saved_display_name) + voice_options[saved_display_name] = saved_voice_param + + selected_display_name = st.selectbox( + "音色选择", + options=display_names, + index=display_names.index(saved_display_name) if saved_display_name in display_names else 0, + help="选择Qwen3 TTS音色" + ) + + # 获取对应的英文参数 + voice_type = voice_options.get(selected_display_name, "Cherry") + + voice_rate = st.slider( + "语速调节", + min_value=0.5, + max_value=2.0, + value=1.0, + step=0.1, + help="调节语音速度 (0.5-2.0)" + ) + + # 保存配置 + config.tts_qwen["api_key"] = api_key + config.tts_qwen["model_name"] = model_name + config.ui["qwen_voice_type"] = voice_type + config.ui["qwen3_rate"] = voice_rate + config.ui["voice_name"] = voice_type #兼容性 + def render_voice_preview_new(tr, selected_engine): """渲染新的语音试听功能""" if st.button("🎵 试听语音合成", use_container_width=True): @@ -503,6 +592,11 @@ def render_voice_preview_new(tr, selected_engine): voice_name = f"tencent:{voice_type}" voice_rate = config.ui.get("tencent_rate", 1.0) voice_pitch = 1.0 # 腾讯云 TTS 不支持音调调节 + elif selected_engine == "qwen3_tts": + vt = config.ui.get("qwen_voice_type", "Cherry") + voice_name = f"qwen3:{vt}" + voice_rate = config.ui.get("qwen3_rate", 1.0) + voice_pitch = 1.0 # Qwen3 TTS 不支持音调调节 if not voice_name: st.error("请先配置语音设置") diff --git a/webui/components/subtitle_settings.py b/webui/components/subtitle_settings.py index 53b98c7..52667bd 100644 --- a/webui/components/subtitle_settings.py +++ b/webui/components/subtitle_settings.py @@ -1,3 +1,5 @@ + +from loguru import logger import streamlit as st from app.config import config from webui.utils.cache import get_fonts_cache @@ -9,14 +11,15 @@ def render_subtitle_panel(tr): with st.container(border=True): st.write(tr("Subtitle Settings")) - # 检查是否选择了 SoulVoice 引擎 + # 检查是否选择了 SoulVoice qwen3_tts引擎 from app.services import voice - current_voice = st.session_state.get('voice_name', '') - is_soulvoice = voice.is_soulvoice_voice(current_voice) + # current_voice = st.session_state.get('voice_name', '') + tts_engine = config.ui.get('tts_engine', '') + is_disabled_subtitle = is_disabled_subtitle_settings(tts_engine) - if is_soulvoice: + if is_disabled_subtitle: # SoulVoice 引擎时显示禁用提示 - st.warning("⚠️ SoulVoice TTS 不支持精确字幕生成") + st.warning(f"⚠️ {tts_engine}不支持精确字幕生成") st.info("💡 建议使用专业剪辑工具(如剪映、PR等)手动添加字幕") # 强制禁用字幕 @@ -84,6 +87,10 @@ def render_font_settings(tr): st.session_state['font_size'] = font_size +def is_disabled_subtitle_settings(tts_engine:str)->bool: + """是否禁用字幕设置""" + return tts_engine=="soulvoice" or tts_engine=="qwen3_tts" + def render_position_settings(tr): """渲染位置设置""" subtitle_positions = [