mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-11 02:12:50 +00:00
新增qwen3 tts服务
This commit is contained in:
parent
debf1f95b1
commit
01c8c8097e
@ -51,6 +51,7 @@ def save_config():
|
||||
_cfg["tencent"] = tencent
|
||||
_cfg["soulvoice"] = soulvoice
|
||||
_cfg["ui"] = ui
|
||||
_cfg["tts_qwen"] = tts_qwen
|
||||
f.write(toml.dumps(_cfg))
|
||||
|
||||
|
||||
@ -63,6 +64,7 @@ tencent = _cfg.get("tencent", {})
|
||||
soulvoice = _cfg.get("soulvoice", {})
|
||||
ui = _cfg.get("ui", {})
|
||||
frames = _cfg.get("frames", {})
|
||||
tts_qwen = _cfg.get("tts_qwen", {})
|
||||
|
||||
hostname = socket.gethostname()
|
||||
|
||||
|
||||
@ -1089,6 +1089,10 @@ def tts(
|
||||
logger.info("分发到腾讯云 TTS")
|
||||
return tencent_tts(text, voice_name, voice_file, speed=voice_rate)
|
||||
|
||||
if tts_engine == "qwen3_tts":
|
||||
logger.info("分发到 Qwen3 TTS", voice_name)
|
||||
return qwen3_tts(text, voice_name, voice_file, speed=voice_rate)
|
||||
|
||||
if tts_engine == "soulvoice":
|
||||
logger.info("分发到 SoulVoice TTS")
|
||||
return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate)
|
||||
@ -1538,7 +1542,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
continue
|
||||
else:
|
||||
# SoulVoice 引擎不生成字幕文件
|
||||
if is_soulvoice_voice(voice_name):
|
||||
if is_soulvoice_voice(voice_name) or is_qwen_engine(tts_engine):
|
||||
# 获取实际音频文件的时长
|
||||
duration = get_audio_duration_from_file(audio_file)
|
||||
if duration <= 0:
|
||||
@ -1619,6 +1623,111 @@ def parse_tencent_voice(voice_name: str) -> str:
|
||||
return voice_name
|
||||
|
||||
|
||||
def parse_qwen3_voice(voice_name: str) -> str:
|
||||
"""
|
||||
解析 Qwen3 语音名称
|
||||
"""
|
||||
if isinstance(voice_name, str) and voice_name.startswith("qwen3:"):
|
||||
return voice_name[6:]
|
||||
return voice_name
|
||||
|
||||
|
||||
def qwen3_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
|
||||
"""
|
||||
使用通义千问 Qwen3 TTS 生成语音(仅使用 DashScope SDK)
|
||||
"""
|
||||
# 读取配置
|
||||
tts_qwen_cfg = getattr(config, "tts_qwen", {}) or {}
|
||||
api_key = tts_qwen_cfg.get("api_key", "")
|
||||
model_name = tts_qwen_cfg.get("model_name", "qwen3-tts-flash")
|
||||
if not api_key:
|
||||
logger.error("Qwen3 TTS API key 未配置")
|
||||
return None
|
||||
|
||||
# 准备参数
|
||||
voice_type = parse_qwen3_voice(voice_name)
|
||||
safe_speed = float(max(0.5, min(2.0, speed)))
|
||||
text = text.strip()
|
||||
|
||||
|
||||
|
||||
# SDK 调用
|
||||
try:
|
||||
import dashscope
|
||||
except ImportError:
|
||||
logger.error("未安装 dashscope SDK,请执行: pip install dashscope")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"DashScope SDK 初始化失败: {e}")
|
||||
return None
|
||||
|
||||
# Qwen3 TTS 直接使用英文参数,不需要映射
|
||||
mapped_voice = voice_type or "Cherry"
|
||||
|
||||
for i in range(3):
|
||||
try:
|
||||
# 打印详细的请求参数日志
|
||||
logger.info(f"=== Qwen3 TTS 请求参数 (第 {i+1} 次调用) ===")
|
||||
|
||||
# 官方推荐:使用 MultiModalConversation.call
|
||||
result = dashscope.MultiModalConversation.call(
|
||||
# 仅支持 qwen-tts 系列模型
|
||||
model=(model_name or "qwen3-tts-flash"),
|
||||
# 同时显式传入 api_key,并兼容示例中从环境变量读取
|
||||
api_key=api_key,
|
||||
text=text,
|
||||
voice=mapped_voice
|
||||
)
|
||||
logger.info(f"Qwen3 TTS API 响应: {result}")
|
||||
|
||||
|
||||
audio_bytes: bytes | None = None
|
||||
|
||||
# 解析返回结果,提取音频URL并下载
|
||||
try:# 假设 result 是你收到的字符串
|
||||
audio_url = None
|
||||
|
||||
if result.output and result.output.audio:
|
||||
audio_url = result.output.audio.url
|
||||
# 从响应中提取音频URL
|
||||
|
||||
if audio_url:
|
||||
# 直接下载音频文件
|
||||
response = requests.get(audio_url, timeout=30)
|
||||
response.raise_for_status()
|
||||
audio_bytes = response.content
|
||||
else:
|
||||
logger.warning("API响应中未找到音频URL")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"解析API响应失败: {str(e)}")
|
||||
|
||||
if not audio_bytes:
|
||||
logger.warning("DashScope SDK 返回空音频数据,重试")
|
||||
if i < 2:
|
||||
time.sleep(1)
|
||||
continue
|
||||
|
||||
# 写入文件
|
||||
with open(voice_file, "wb") as f:
|
||||
f.write(audio_bytes)
|
||||
|
||||
# 估算字幕
|
||||
sub = SubMaker()
|
||||
est_ms = max(800, int(len(text) * 180))
|
||||
sub.create_sub((0, est_ms), text)
|
||||
|
||||
logger.info(f"Qwen3 TTS 生成成功(DashScope SDK),文件大小: {len(audio_bytes)} 字节")
|
||||
return sub
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"DashScope SDK 合成失败: {e}")
|
||||
if i < 2:
|
||||
time.sleep(1)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def tencent_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
|
||||
"""
|
||||
使用腾讯云 TTS 生成语音
|
||||
@ -1819,6 +1928,8 @@ def is_soulvoice_voice(voice_name: str) -> bool:
|
||||
"""
|
||||
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
|
||||
|
||||
def is_qwen_engine(tts_engine: str) -> bool:
|
||||
return tts_engine == "qwen3_tts"
|
||||
|
||||
def parse_soulvoice_voice(voice_name: str) -> str:
|
||||
"""
|
||||
|
||||
@ -114,8 +114,14 @@
|
||||
# 默认模型(可选)
|
||||
model = "FunAudioLLM/CosyVoice2-0.5B"
|
||||
|
||||
[tts_qwen]
|
||||
# 通义千问 Qwen3 TTS 配置
|
||||
# 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
|
||||
api_key = ""
|
||||
model_name = "qwen3-tts-flash"
|
||||
|
||||
[ui]
|
||||
# TTS引擎选择 (edge_tts, azure_speech, soulvoice, tencent_tts)
|
||||
# TTS引擎选择 (edge_tts, azure_speech, soulvoice, tencent_tts, tts_qwen)
|
||||
tts_engine = "edge_tts"
|
||||
|
||||
# Edge TTS 配置
|
||||
|
||||
@ -15,6 +15,7 @@ openai>=1.77.0
|
||||
google-generativeai>=0.8.5
|
||||
azure-cognitiveservices-speech>=1.37.0
|
||||
tencentcloud-sdk-python>=3.0.1200
|
||||
dashscope>=1.24.6
|
||||
|
||||
# 图像处理依赖
|
||||
Pillow>=10.3.0
|
||||
|
||||
@ -1,3 +1,4 @@
|
||||
from venv import logger
|
||||
import streamlit as st
|
||||
import os
|
||||
from uuid import uuid4
|
||||
@ -24,7 +25,8 @@ def get_tts_engine_options():
|
||||
return {
|
||||
"edge_tts": "Edge TTS",
|
||||
"azure_speech": "Azure Speech Services",
|
||||
"tencent_tts": "腾讯云 TTS"
|
||||
"tencent_tts": "腾讯云 TTS",
|
||||
"qwen3_tts": "通义千问 Qwen3 TTS"
|
||||
}
|
||||
|
||||
|
||||
@ -48,6 +50,12 @@ def get_tts_engine_descriptions():
|
||||
"features": "提供免费额度,音质优秀,支持多种音色,国内访问速度快",
|
||||
"use_case": "个人和企业用户,需要稳定的中文语音合成",
|
||||
"registration": "https://console.cloud.tencent.com/tts"
|
||||
},
|
||||
"qwen3_tts": {
|
||||
"title": "通义千问 Qwen3 TTS",
|
||||
"features": "阿里云通义千问语音合成,音质优秀,支持多种音色",
|
||||
"use_case": "需要高质量中文语音合成的用户",
|
||||
"registration": "https://dashscope.aliyuncs.com/"
|
||||
}
|
||||
}
|
||||
|
||||
@ -129,6 +137,8 @@ def render_tts_settings(tr):
|
||||
render_soulvoice_engine_settings(tr)
|
||||
elif selected_engine == "tencent_tts":
|
||||
render_tencent_tts_settings(tr)
|
||||
elif selected_engine == "qwen3_tts":
|
||||
render_qwen3_tts_settings(tr)
|
||||
|
||||
# 4. 试听功能
|
||||
render_voice_preview_new(tr, selected_engine)
|
||||
@ -469,8 +479,87 @@ def render_tencent_tts_settings(tr):
|
||||
config.tencent["region"] = region
|
||||
config.ui["tencent_voice_type"] = voice_type
|
||||
config.ui["tencent_rate"] = voice_rate
|
||||
config.ui["voice_name"] = saved_voice_type #兼容性
|
||||
|
||||
|
||||
def render_qwen3_tts_settings(tr):
|
||||
"""渲染 Qwen3 TTS 设置"""
|
||||
api_key = st.text_input(
|
||||
"API Key",
|
||||
value=config.tts_qwen.get("api_key", ""),
|
||||
type="password",
|
||||
help="通义千问 DashScope API Key"
|
||||
)
|
||||
|
||||
model_name = st.text_input(
|
||||
"模型名称",
|
||||
value=config.tts_qwen.get("model_name", "qwen3-tts-flash"),
|
||||
help="Qwen TTS 模型名,例如 qwen3-tts-flash"
|
||||
)
|
||||
|
||||
# Qwen3 TTS 音色选项 - 中文名: 英文参数
|
||||
voice_options = {
|
||||
"芊悦": "Cherry",
|
||||
"晨煦": "Ethan",
|
||||
"不吃鱼": "Nofish",
|
||||
"詹妮弗": "Jennifer",
|
||||
"甜茶": "Ryan",
|
||||
"卡捷琳娜": "Katerina",
|
||||
"墨讲师": "Elias",
|
||||
"上海-阿珍": "Jada",
|
||||
"北京-晓东": "Dylan",
|
||||
"四川-晴儿": "Sunny",
|
||||
"南京-老李": "Li",
|
||||
"陕西-秦川": "Marcus",
|
||||
"闽南-阿杰": "Roy",
|
||||
"天津-李彼得": "Peter",
|
||||
"粤语-阿强": "Rocky",
|
||||
"粤语-阿清": "Kiki",
|
||||
"四川-程川": "Eric"
|
||||
}
|
||||
|
||||
# 显示给用户的中文名称列表
|
||||
display_names = list(voice_options.keys())
|
||||
saved_voice_param = config.ui.get("qwen_voice_type", "Cherry")
|
||||
|
||||
# 如果保存的英文参数不在选项中,查找对应的中文名称
|
||||
saved_display_name = "芊悦" # 默认值
|
||||
for chinese_name, english_param in voice_options.items():
|
||||
if english_param == saved_voice_param:
|
||||
saved_display_name = chinese_name
|
||||
break
|
||||
|
||||
# 如果保存的音色不在选项中,添加到自定义选项
|
||||
if saved_display_name not in display_names:
|
||||
display_names.append(saved_display_name)
|
||||
voice_options[saved_display_name] = saved_voice_param
|
||||
|
||||
selected_display_name = st.selectbox(
|
||||
"音色选择",
|
||||
options=display_names,
|
||||
index=display_names.index(saved_display_name) if saved_display_name in display_names else 0,
|
||||
help="选择Qwen3 TTS音色"
|
||||
)
|
||||
|
||||
# 获取对应的英文参数
|
||||
voice_type = voice_options.get(selected_display_name, "Cherry")
|
||||
|
||||
voice_rate = st.slider(
|
||||
"语速调节",
|
||||
min_value=0.5,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help="调节语音速度 (0.5-2.0)"
|
||||
)
|
||||
|
||||
# 保存配置
|
||||
config.tts_qwen["api_key"] = api_key
|
||||
config.tts_qwen["model_name"] = model_name
|
||||
config.ui["qwen_voice_type"] = voice_type
|
||||
config.ui["qwen3_rate"] = voice_rate
|
||||
config.ui["voice_name"] = voice_type #兼容性
|
||||
|
||||
def render_voice_preview_new(tr, selected_engine):
|
||||
"""渲染新的语音试听功能"""
|
||||
if st.button("🎵 试听语音合成", use_container_width=True):
|
||||
@ -503,6 +592,11 @@ def render_voice_preview_new(tr, selected_engine):
|
||||
voice_name = f"tencent:{voice_type}"
|
||||
voice_rate = config.ui.get("tencent_rate", 1.0)
|
||||
voice_pitch = 1.0 # 腾讯云 TTS 不支持音调调节
|
||||
elif selected_engine == "qwen3_tts":
|
||||
vt = config.ui.get("qwen_voice_type", "Cherry")
|
||||
voice_name = f"qwen3:{vt}"
|
||||
voice_rate = config.ui.get("qwen3_rate", 1.0)
|
||||
voice_pitch = 1.0 # Qwen3 TTS 不支持音调调节
|
||||
|
||||
if not voice_name:
|
||||
st.error("请先配置语音设置")
|
||||
|
||||
@ -1,3 +1,5 @@
|
||||
|
||||
from loguru import logger
|
||||
import streamlit as st
|
||||
from app.config import config
|
||||
from webui.utils.cache import get_fonts_cache
|
||||
@ -9,14 +11,15 @@ def render_subtitle_panel(tr):
|
||||
with st.container(border=True):
|
||||
st.write(tr("Subtitle Settings"))
|
||||
|
||||
# 检查是否选择了 SoulVoice 引擎
|
||||
# 检查是否选择了 SoulVoice qwen3_tts引擎
|
||||
from app.services import voice
|
||||
current_voice = st.session_state.get('voice_name', '')
|
||||
is_soulvoice = voice.is_soulvoice_voice(current_voice)
|
||||
# current_voice = st.session_state.get('voice_name', '')
|
||||
tts_engine = config.ui.get('tts_engine', '')
|
||||
is_disabled_subtitle = is_disabled_subtitle_settings(tts_engine)
|
||||
|
||||
if is_soulvoice:
|
||||
if is_disabled_subtitle:
|
||||
# SoulVoice 引擎时显示禁用提示
|
||||
st.warning("⚠️ SoulVoice TTS 不支持精确字幕生成")
|
||||
st.warning(f"⚠️ {tts_engine}不支持精确字幕生成")
|
||||
st.info("💡 建议使用专业剪辑工具(如剪映、PR等)手动添加字幕")
|
||||
|
||||
# 强制禁用字幕
|
||||
@ -84,6 +87,10 @@ def render_font_settings(tr):
|
||||
st.session_state['font_size'] = font_size
|
||||
|
||||
|
||||
def is_disabled_subtitle_settings(tts_engine:str)->bool:
|
||||
"""是否禁用字幕设置"""
|
||||
return tts_engine=="soulvoice" or tts_engine=="qwen3_tts"
|
||||
|
||||
def render_position_settings(tr):
|
||||
"""渲染位置设置"""
|
||||
subtitle_positions = [
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user