新增腾讯云 TTS 服务

This commit is contained in:
Emily-LMH 2025-09-16 14:40:08 +08:00 committed by linyq
parent da27d8d8a1
commit a1474bed02
11 changed files with 348 additions and 44 deletions

View File

@ -22,10 +22,9 @@ RUN python -m pip install --upgrade pip setuptools wheel && \
# 激活虚拟环境 # 激活虚拟环境
ENV PATH="/opt/venv/bin:$PATH" ENV PATH="/opt/venv/bin:$PATH"
# 复制 requirements.txt 并安装 Python 依赖 # 复制 requirements.txt 并使用镜像安装 Python 依赖
COPY requirements.txt . COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip && \ RUN pip install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt
pip install --no-cache-dir -r requirements.txt
# 运行阶段 # 运行阶段
FROM python:3.12-slim-bookworm FROM python:3.12-slim-bookworm
@ -48,7 +47,7 @@ ENV PATH="/opt/venv/bin:$PATH" \
LANG=C.UTF-8 \ LANG=C.UTF-8 \
LC_ALL=C.UTF-8 LC_ALL=C.UTF-8
# 安装运行时系统依赖 # 一次性安装所有依赖、创建用户、配置系统,减少层级
RUN apt-get update && apt-get install -y --no-install-recommends \ RUN apt-get update && apt-get install -y --no-install-recommends \
imagemagick \ imagemagick \
ffmpeg \ ffmpeg \
@ -56,32 +55,25 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
curl \ curl \
git-lfs \ git-lfs \
ca-certificates \ ca-certificates \
dos2unix \
&& sed -i 's/<policy domain="path" rights="none" pattern="@\*"/<policy domain="path" rights="read|write" pattern="@\*"/' /etc/ImageMagick-6/policy.xml || true \
&& git lfs install \
&& groupadd -r narratoai && useradd -r -g narratoai -d /NarratoAI -s /bin/bash narratoai \
&& rm -rf /var/lib/apt/lists/* && rm -rf /var/lib/apt/lists/*
# 配置 ImageMagick 策略(允许处理更多格式) # 复制入口脚本并修复换行符问题
RUN sed -i 's/<policy domain="path" rights="none" pattern="@\*"/<policy domain="path" rights="read|write" pattern="@\*"/' /etc/ImageMagick-6/policy.xml || true COPY --chown=narratoai:narratoai docker-entrypoint.sh /usr/local/bin/
RUN dos2unix /usr/local/bin/docker-entrypoint.sh && chmod +x /usr/local/bin/docker-entrypoint.sh
# 初始化 git-lfs # 复制其余的应用代码
RUN git lfs install
# 创建非 root 用户(安全最佳实践)
RUN groupadd -r narratoai && useradd -r -g narratoai -d /NarratoAI -s /bin/bash narratoai
# 复制应用代码
COPY --chown=narratoai:narratoai . . COPY --chown=narratoai:narratoai . .
# 确保配置文件存在 # 创建目录、复制配置、设置权限
RUN if [ ! -f config.toml ]; then cp config.example.toml config.toml; fi
# 创建必要的目录并设置权限
RUN mkdir -p storage/temp storage/tasks storage/json storage/narration_scripts storage/drama_analysis && \ RUN mkdir -p storage/temp storage/tasks storage/json storage/narration_scripts storage/drama_analysis && \
if [ ! -f config.toml ]; then cp config.example.toml config.toml; fi && \
chown -R narratoai:narratoai /NarratoAI && \ chown -R narratoai:narratoai /NarratoAI && \
chmod -R 755 /NarratoAI chmod -R 755 /NarratoAI
# 复制并设置入口点脚本
COPY --chown=narratoai:narratoai docker-entrypoint.sh /usr/local/bin/
RUN chmod +x /usr/local/bin/docker-entrypoint.sh
# 切换到非 root 用户 # 切换到非 root 用户
USER narratoai USER narratoai
@ -93,5 +85,5 @@ HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD curl -f http://localhost:8501/_stcore/health || exit 1 CMD curl -f http://localhost:8501/_stcore/health || exit 1
# 设置入口点 # 设置入口点
ENTRYPOINT ["docker-entrypoint.sh"] ENTRYPOINT ["/usr/local/bin/docker-entrypoint.sh"]
CMD ["webui"] CMD ["webui"]

View File

@ -48,6 +48,7 @@ def save_config():
with open(config_file, "w", encoding="utf-8") as f: with open(config_file, "w", encoding="utf-8") as f:
_cfg["app"] = app _cfg["app"] = app
_cfg["azure"] = azure _cfg["azure"] = azure
_cfg["tencent"] = tencent
_cfg["soulvoice"] = soulvoice _cfg["soulvoice"] = soulvoice
_cfg["ui"] = ui _cfg["ui"] = ui
f.write(toml.dumps(_cfg)) f.write(toml.dumps(_cfg))
@ -58,6 +59,7 @@ app = _cfg.get("app", {})
whisper = _cfg.get("whisper", {}) whisper = _cfg.get("whisper", {})
proxy = _cfg.get("proxy", {}) proxy = _cfg.get("proxy", {})
azure = _cfg.get("azure", {}) azure = _cfg.get("azure", {})
tencent = _cfg.get("tencent", {})
soulvoice = _cfg.get("soulvoice", {}) soulvoice = _cfg.get("soulvoice", {})
ui = _cfg.get("ui", {}) ui = _cfg.get("ui", {})
frames = _cfg.get("frames", {}) frames = _cfg.get("frames", {})

View File

@ -176,7 +176,7 @@ class VideoClipParams(BaseModel):
voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量") voice_volume: Optional[float] = Field(default=AudioVolumeDefaults.VOICE_VOLUME, description="解说语音音量")
voice_rate: Optional[float] = Field(default=1.0, description="语速") voice_rate: Optional[float] = Field(default=1.0, description="语速")
voice_pitch: Optional[float] = Field(default=1.0, description="语调") voice_pitch: Optional[float] = Field(default=1.0, description="语调")
tts_engine: Optional[str] = Field(default="tencent", description="TTS 引擎")
bgm_name: Optional[str] = Field(default="random", description="背景音乐名称") bgm_name: Optional[str] = Field(default="random", description="背景音乐名称")
bgm_type: Optional[str] = Field(default="random", description="背景音乐类型") bgm_type: Optional[str] = Field(default="random", description="背景音乐类型")
bgm_file: Optional[str] = Field(default="", description="背景音乐文件") bgm_file: Optional[str] = Field(default="", description="背景音乐文件")

View File

@ -18,7 +18,6 @@ from pathlib import Path
from app.utils import ffmpeg_utils from app.utils import ffmpeg_utils
def parse_timestamp(timestamp: str) -> tuple: def parse_timestamp(timestamp: str) -> tuple:
""" """
解析时间戳字符串返回开始和结束时间 解析时间戳字符串返回开始和结束时间

View File

@ -43,5 +43,5 @@ __all__ = [
'QwenTextProvider', 'QwenTextProvider',
'DeepSeekTextProvider', 'DeepSeekTextProvider',
'SiliconflowVisionProvider', 'SiliconflowVisionProvider',
'SiliconflowTextProvider' 'SiliconflowTextProvider',
] ]

View File

@ -73,6 +73,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
tts_results = voice.tts_multiple( tts_results = voice.tts_multiple(
task_id=task_id, task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段 list_script=tts_segments, # 只传入需要TTS的片段
tts_engine=params.tts_engine,
voice_name=params.voice_name, voice_name=params.voice_name,
voice_rate=params.voice_rate, voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch, voice_pitch=params.voice_pitch,
@ -317,6 +318,7 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
tts_results = voice.tts_multiple( tts_results = voice.tts_multiple(
task_id=task_id, task_id=task_id,
list_script=tts_segments, # 只传入需要TTS的片段 list_script=tts_segments, # 只传入需要TTS的片段
tts_engine=params.tts_engine,
voice_name=params.voice_name, voice_name=params.voice_name,
voice_rate=params.voice_rate, voice_rate=params.voice_rate,
voice_pitch=params.voice_pitch, voice_pitch=params.voice_pitch,

View File

@ -5,6 +5,7 @@ import traceback
import edge_tts import edge_tts
import asyncio import asyncio
import requests import requests
import uuid
from loguru import logger from loguru import logger
from typing import List, Union, Tuple from typing import List, Union, Tuple
from datetime import datetime from datetime import datetime
@ -1080,17 +1081,27 @@ def should_use_azure_speech_services(voice_name: str) -> bool:
def tts( def tts(
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str, tts_engine: str = "azure"
) -> Union[SubMaker, None]: ) -> Union[SubMaker, None]:
# 检查是否为 SoulVoice 引擎 logger.info(f"使用 TTS 引擎: '{tts_engine}', 语音: '{voice_name}'")
if is_soulvoice_voice(voice_name):
if tts_engine == "tencent":
logger.info("分发到腾讯云 TTS")
return tencent_tts(text, voice_name, voice_file, speed=voice_rate)
if tts_engine == "soulvoice":
logger.info("分发到 SoulVoice TTS")
return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate)
# 检查是否应该使用 Azure Speech Services if tts_engine == "azure":
if should_use_azure_speech_services(voice_name): if should_use_azure_speech_services(voice_name):
return azure_tts_v2(text, voice_name, voice_file) logger.info("分发到 Azure Speech Services (V2)")
return azure_tts_v2(text, voice_name, voice_file)
logger.info("分发到 Edge TTS (Azure V1)")
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
# 默认使用 Edge TTS (Azure V1) # Fallback for unknown engine - default to azure v1
logger.warning(f"未知的 TTS 引擎: '{tts_engine}', 将默认使用 Edge TTS (Azure V1)。")
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
@ -1483,7 +1494,7 @@ def get_audio_duration(sub_maker: submaker.SubMaker):
return sub_maker.offset[-1][1] / 10000000 return sub_maker.offset[-1][1] / 10000000
def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float): def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float, tts_engine: str = "azure"):
""" """
根据JSON文件中的多段文本进行TTS转换 根据JSON文件中的多段文本进行TTS转换
@ -1491,6 +1502,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
:param list_script: 脚本列表 :param list_script: 脚本列表
:param voice_name: 语音名称 :param voice_name: 语音名称
:param voice_rate: 语音速率 :param voice_rate: 语音速率
:param tts_engine: TTS 引擎
:return: 生成的音频文件列表 :return: 生成的音频文件列表
""" """
voice_name = parse_voice_name(voice_name) voice_name = parse_voice_name(voice_name)
@ -1512,6 +1524,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
voice_rate=voice_rate, voice_rate=voice_rate,
voice_pitch=voice_pitch, voice_pitch=voice_pitch,
voice_file=audio_file, voice_file=audio_file,
tts_engine=tts_engine,
) )
if sub_maker is None: if sub_maker is None:
@ -1581,14 +1594,6 @@ def get_audio_duration_from_file(audio_file: str) -> float:
# 如果所有方法都失败,返回一个基于文本长度的估算 # 如果所有方法都失败,返回一个基于文本长度的估算
return 3.0 # 默认3秒避免返回0 return 3.0 # 默认3秒避免返回0
def is_soulvoice_voice(voice_name: str) -> bool:
"""
检查是否为 SoulVoice 语音
"""
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
def parse_soulvoice_voice(voice_name: str) -> str: def parse_soulvoice_voice(voice_name: str) -> str:
""" """
解析 SoulVoice 语音名称 解析 SoulVoice 语音名称
@ -1600,6 +1605,118 @@ def parse_soulvoice_voice(voice_name: str) -> str:
return voice_name[10:] # 移除 "soulvoice:" 前缀 return voice_name[10:] # 移除 "soulvoice:" 前缀
return voice_name return voice_name
def parse_tencent_voice(voice_name: str) -> str:
"""
解析腾讯云 TTS 语音名称
支持格式tencent:101001
"""
if voice_name.startswith("tencent:"):
return voice_name[8:] # 移除 "tencent:" 前缀
return voice_name
def tencent_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
"""
使用腾讯云 TTS 生成语音
"""
try:
# 导入腾讯云 SDK
from tencentcloud.common import credential
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.tts.v20190823 import tts_client, models
import base64
except ImportError as e:
logger.error(f"腾讯云 SDK 未安装: {e}")
return None
# 获取腾讯云配置
tencent_config = config.tencent
secret_id = tencent_config.get("secret_id")
secret_key = tencent_config.get("secret_key")
region = tencent_config.get("region", "ap-beijing")
if not secret_id or not secret_key:
logger.error("腾讯云 TTS 配置不完整,请检查 secret_id 和 secret_key")
return None
# 解析语音名称
voice_type = parse_tencent_voice(voice_name)
# 转换速度参数 (腾讯云支持 -2 到 2 的范围)
speed_value = max(-2.0, min(2.0, (speed - 1.0) * 2))
for i in range(3):
try:
logger.info(f"{i+1} 次使用腾讯云 TTS 生成音频")
# 创建认证对象
cred = credential.Credential(secret_id, secret_key)
# 创建 HTTP 配置
httpProfile = HttpProfile()
httpProfile.endpoint = "tts.tencentcloudapi.com"
# 创建客户端配置
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
# 创建客户端
client = tts_client.TtsClient(cred, region, clientProfile)
req = models.TextToVoiceRequest()
req.Text = text
req.SessionId = str(uuid.uuid4())
req.VoiceType = int(voice_type) if voice_type.isdigit() else 101001
req.Speed = speed_value
req.SampleRate = 16000
req.Codec = "mp3"
req.ProjectId = 0
req.ModelType = 1
req.PrimaryLanguage = 1
req.EnableSubtitle = True
# 发送请求
resp = client.TextToVoice(req)
# 检查响应
if not resp.Audio:
logger.warning(f"腾讯云 TTS 返回空音频数据")
if i < 2:
time.sleep(1)
continue
# 解码音频数据
audio_data = base64.b64decode(resp.Audio)
# 写入文件
with open(voice_file, "wb") as f:
f.write(audio_data)
# 创建字幕对象
sub_maker = SubMaker()
if resp.Subtitles:
for sub in resp.Subtitles:
start_ms = sub.BeginTime
end_ms = sub.EndTime
text = sub.Text
# 转换为 100ns 单位
sub_maker.create_sub((start_ms * 10000, end_ms * 10000), text)
else:
# 如果没有字幕返回,则使用估算作为后备方案
duration_ms = len(text) * 200
sub_maker.create_sub((0, duration_ms * 10000), text)
logger.info(f"腾讯云 TTS 生成成功,文件大小: {len(audio_data)} 字节")
return sub_maker
except Exception as e:
logger.error(f"腾讯云 TTS 生成音频时出错: {str(e)}")
if i < 2:
time.sleep(1)
return None
def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]: def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
""" """

View File

@ -96,6 +96,14 @@
speech_key = "" speech_key = ""
speech_region = "" speech_region = ""
[tencent]
# 腾讯云 TTS 配置
# 访问 https://console.cloud.tencent.com/cam/capi 获取你的密钥
secret_id = ""
secret_key = ""
# 地域配置,默认为 ap-beijing
region = "ap-beijing"
[soulvoice] [soulvoice]
# SoulVoice TTS API 密钥 # SoulVoice TTS API 密钥
api_key = "" api_key = ""
@ -107,7 +115,7 @@
model = "FunAudioLLM/CosyVoice2-0.5B" model = "FunAudioLLM/CosyVoice2-0.5B"
[ui] [ui]
# TTS引擎选择 (edge_tts, azure_speech, soulvoice) # TTS引擎选择 (edge_tts, azure_speech, soulvoice, tencent_tts)
tts_engine = "edge_tts" tts_engine = "edge_tts"
# Edge TTS 配置 # Edge TTS 配置

View File

@ -6,6 +6,61 @@ log() {
echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1"
} }
# 函数:安装运行时依赖
install_runtime_dependencies() {
log "检查并安装运行时依赖..."
# 检查是否需要安装新的依赖
local requirements_file="requirements.txt"
local installed_packages_file="/tmp/installed_packages.txt"
# 如果requirements.txt存在且比已安装包列表新则重新安装
if [ -f "$requirements_file" ]; then
if [ ! -f "$installed_packages_file" ] || [ "$requirements_file" -nt "$installed_packages_file" ]; then
log "发现新的依赖需求,开始安装..."
# 尝试使用sudo安装如果失败则使用用户级安装
if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then
log "尝试使用sudo安装依赖..."
sudo pip install --no-cache-dir -r "$requirements_file" 2>&1 | while read line; do
log "pip: $line"
done
INSTALL_RESULT=${PIPESTATUS[0]}
else
INSTALL_RESULT=1 # 设置为失败,触发用户级安装
fi
# 如果sudo安装失败尝试用户级安装
if [ $INSTALL_RESULT -ne 0 ]; then
log "尝试用户级安装依赖..."
pip install --user --no-cache-dir -r "$requirements_file" 2>&1 | while read line; do
log "pip: $line"
done
# 确保用户级安装的包在PATH中
export PATH="$HOME/.local/bin:$PATH"
fi
# 单独安装腾讯云SDK确保安装
log "确保腾讯云SDK已安装..."
if ! pip list | grep -q "tencentcloud-sdk-python"; then
log "安装腾讯云SDK..."
pip install --user tencentcloud-sdk-python>=3.0.1200
else
log "腾讯云SDK已安装"
fi
# 记录安装时间
touch "$installed_packages_file"
log "依赖安装完成"
else
log "依赖已是最新版本,跳过安装"
fi
else
log "未找到 requirements.txt 文件"
fi
}
# 函数:检查必要的文件和目录 # 函数:检查必要的文件和目录
check_requirements() { check_requirements() {
log "检查应用环境..." log "检查应用环境..."
@ -27,6 +82,9 @@ check_requirements() {
mkdir -p "$dir" mkdir -p "$dir"
fi fi
done done
# 安装运行时依赖
install_runtime_dependencies
log "环境检查完成" log "环境检查完成"
} }

View File

@ -14,6 +14,7 @@ pysrt==1.1.2
openai>=1.77.0 openai>=1.77.0
google-generativeai>=0.8.5 google-generativeai>=0.8.5
azure-cognitiveservices-speech>=1.37.0 azure-cognitiveservices-speech>=1.37.0
tencentcloud-sdk-python>=3.0.1200
# 图像处理依赖 # 图像处理依赖
Pillow>=10.3.0 Pillow>=10.3.0

View File

@ -24,7 +24,8 @@ def get_tts_engine_options():
return { return {
"edge_tts": "Edge TTS", "edge_tts": "Edge TTS",
"azure_speech": "Azure Speech Services", "azure_speech": "Azure Speech Services",
"soulvoice": "SoulVoice" "soulvoice": "SoulVoice",
"tencent_tts": "腾讯云 TTS"
} }
@ -48,6 +49,12 @@ def get_tts_engine_descriptions():
"features": "提供免费额度,支持语音克隆,支持微信购买额度,无需信用卡,性价比极高", "features": "提供免费额度,支持语音克隆,支持微信购买额度,无需信用卡,性价比极高",
"use_case": "个人用户和中小企业,需要语音克隆功能", "use_case": "个人用户和中小企业,需要语音克隆功能",
"registration": "https://soulvoice.scsmtech.cn/" "registration": "https://soulvoice.scsmtech.cn/"
},
"tencent_tts": {
"title": "腾讯云 TTS",
"features": "提供免费额度,音质优秀,支持多种音色,国内访问速度快",
"use_case": "个人和企业用户,需要稳定的中文语音合成",
"registration": "https://console.cloud.tencent.com/tts"
} }
} }
@ -126,6 +133,8 @@ def render_tts_settings(tr):
render_azure_speech_settings(tr) render_azure_speech_settings(tr)
elif selected_engine == "soulvoice": elif selected_engine == "soulvoice":
render_soulvoice_engine_settings(tr) render_soulvoice_engine_settings(tr)
elif selected_engine == "tencent_tts":
render_tencent_tts_settings(tr)
# 4. 试听功能 # 4. 试听功能
render_voice_preview_new(tr, selected_engine) render_voice_preview_new(tr, selected_engine)
@ -357,6 +366,117 @@ def render_azure_speech_settings(tr):
st.warning("⚠️ 请配置 API Key") st.warning("⚠️ 请配置 API Key")
def render_tencent_tts_settings(tr):
"""渲染腾讯云 TTS 引擎设置"""
# Secret ID 输入
secret_id = st.text_input(
"Secret ID",
value=config.tencent.get("secret_id", ""),
help="请输入您的腾讯云 Secret ID"
)
# Secret Key 输入
secret_key = st.text_input(
"Secret Key",
value=config.tencent.get("secret_key", ""),
type="password",
help="请输入您的腾讯云 Secret Key"
)
# 地域选择
region_options = [
"ap-beijing",
"ap-shanghai",
"ap-guangzhou",
"ap-chengdu",
"ap-nanjing",
"ap-singapore",
"ap-hongkong"
]
saved_region = config.tencent.get("region", "ap-beijing")
if saved_region not in region_options:
region_options.append(saved_region)
region = st.selectbox(
"服务地域",
options=region_options,
index=region_options.index(saved_region),
help="选择腾讯云 TTS 服务地域"
)
# 音色选择
voice_type_options = {
"101001": "智瑜 - 女声(推荐)",
"101002": "智聆 - 女声",
"101003": "智美 - 女声",
"101004": "智云 - 男声",
"101005": "智莉 - 女声",
"101006": "智言 - 男声",
"101007": "智娜 - 女声",
"101008": "智琪 - 女声",
"101009": "智芸 - 女声",
"101010": "智华 - 男声",
"101011": "智燕 - 女声",
"101012": "智丹 - 女声",
"101013": "智辉 - 男声",
"101014": "智宁 - 女声",
"101015": "智萌 - 女声",
"101016": "智甜 - 女声",
"101017": "智蓉 - 女声",
"101018": "智靖 - 男声"
}
saved_voice_type = config.ui.get("tencent_voice_type", "101001")
if saved_voice_type not in voice_type_options:
voice_type_options[saved_voice_type] = f"自定义音色 ({saved_voice_type})"
selected_voice_display = st.selectbox(
"音色选择",
options=list(voice_type_options.values()),
index=list(voice_type_options.keys()).index(saved_voice_type),
help="选择腾讯云 TTS 音色"
)
# 获取实际的音色ID
voice_type = list(voice_type_options.keys())[
list(voice_type_options.values()).index(selected_voice_display)
]
# 语速调节
voice_rate = st.slider(
"语速调节",
min_value=0.5,
max_value=2.0,
value=config.ui.get("tencent_rate", 1.0),
step=0.1,
help="调节语音速度 (0.5-2.0)"
)
# 显示音色说明
with st.expander("💡 腾讯云 TTS 音色说明", expanded=False):
st.write("**女声音色:**")
female_voices = [(k, v) for k, v in voice_type_options.items() if "女声" in v]
for voice_id, voice_desc in female_voices[:6]: # 显示前6个
st.write(f"{voice_desc} (ID: {voice_id})")
st.write("")
st.write("**男声音色:**")
male_voices = [(k, v) for k, v in voice_type_options.items() if "男声" in v]
for voice_id, voice_desc in male_voices:
st.write(f"{voice_desc} (ID: {voice_id})")
st.write("")
st.info("💡 更多音色请参考腾讯云官方文档")
# 保存配置
config.tencent["secret_id"] = secret_id
config.tencent["secret_key"] = secret_key
config.tencent["region"] = region
config.ui["tencent_voice_type"] = voice_type
config.ui["tencent_rate"] = voice_rate
def render_soulvoice_engine_settings(tr): def render_soulvoice_engine_settings(tr):
"""渲染 SoulVoice 引擎设置""" """渲染 SoulVoice 引擎设置"""
# API Key 输入 # API Key 输入
@ -453,6 +573,11 @@ def render_voice_preview_new(tr, selected_engine):
voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}" voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}"
voice_rate = 1.0 # SoulVoice 使用默认语速 voice_rate = 1.0 # SoulVoice 使用默认语速
voice_pitch = 1.0 # SoulVoice 不支持音调调节 voice_pitch = 1.0 # SoulVoice 不支持音调调节
elif selected_engine == "tencent_tts":
voice_type = config.ui.get("tencent_voice_type", "101001")
voice_name = f"tencent:{voice_type}"
voice_rate = config.ui.get("tencent_rate", 1.0)
voice_pitch = 1.0 # 腾讯云 TTS 不支持音调调节
if not voice_name: if not voice_name:
st.error("请先配置语音设置") st.error("请先配置语音设置")