diff --git a/Dockerfile b/Dockerfile index ee70617..fc0f316 100644 --- a/Dockerfile +++ b/Dockerfile @@ -22,10 +22,9 @@ RUN python -m pip install --upgrade pip setuptools wheel && \ # 激活虚拟环境 ENV PATH="/opt/venv/bin:$PATH" -# 复制 requirements.txt 并安装 Python 依赖 +# 复制 requirements.txt 并使用镜像安装 Python 依赖 COPY requirements.txt . -RUN pip install --no-cache-dir --upgrade pip && \ - pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir -i https://pypi.tuna.tsinghua.edu.cn/simple -r requirements.txt # 运行阶段 FROM python:3.12-slim-bookworm @@ -48,7 +47,7 @@ ENV PATH="/opt/venv/bin:$PATH" \ LANG=C.UTF-8 \ LC_ALL=C.UTF-8 -# 安装运行时系统依赖 +# 一次性安装所有依赖、创建用户、配置系统,减少层级 RUN apt-get update && apt-get install -y --no-install-recommends \ imagemagick \ ffmpeg \ @@ -56,32 +55,25 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ curl \ git-lfs \ ca-certificates \ + dos2unix \ + && sed -i 's/ tuple: """ 解析时间戳字符串,返回开始和结束时间 diff --git a/app/services/llm/providers/__init__.py b/app/services/llm/providers/__init__.py index ea1509d..16b764d 100644 --- a/app/services/llm/providers/__init__.py +++ b/app/services/llm/providers/__init__.py @@ -43,5 +43,5 @@ __all__ = [ 'QwenTextProvider', 'DeepSeekTextProvider', 'SiliconflowVisionProvider', - 'SiliconflowTextProvider' + 'SiliconflowTextProvider', ] diff --git a/app/services/task.py b/app/services/task.py index 3914df5..c3702af 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -73,6 +73,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di tts_results = voice.tts_multiple( task_id=task_id, list_script=tts_segments, # 只传入需要TTS的片段 + tts_engine=params.tts_engine, voice_name=params.voice_name, voice_rate=params.voice_rate, voice_pitch=params.voice_pitch, @@ -317,6 +318,7 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): tts_results = voice.tts_multiple( task_id=task_id, list_script=tts_segments, # 只传入需要TTS的片段 + tts_engine=params.tts_engine, voice_name=params.voice_name, voice_rate=params.voice_rate, voice_pitch=params.voice_pitch, diff --git a/app/services/voice.py b/app/services/voice.py index 76a7f88..a114534 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -5,6 +5,7 @@ import traceback import edge_tts import asyncio import requests +import uuid from loguru import logger from typing import List, Union, Tuple from datetime import datetime @@ -1080,17 +1081,27 @@ def should_use_azure_speech_services(voice_name: str) -> bool: def tts( - text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str + text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str, tts_engine: str = "azure" ) -> Union[SubMaker, None]: - # 检查是否为 SoulVoice 引擎 - if is_soulvoice_voice(voice_name): + logger.info(f"使用 TTS 引擎: '{tts_engine}', 语音: '{voice_name}'") + + if tts_engine == "tencent": + logger.info("分发到腾讯云 TTS") + return tencent_tts(text, voice_name, voice_file, speed=voice_rate) + + if tts_engine == "soulvoice": + logger.info("分发到 SoulVoice TTS") return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) - # 检查是否应该使用 Azure Speech Services - if should_use_azure_speech_services(voice_name): - return azure_tts_v2(text, voice_name, voice_file) + if tts_engine == "azure": + if should_use_azure_speech_services(voice_name): + logger.info("分发到 Azure Speech Services (V2)") + return azure_tts_v2(text, voice_name, voice_file) + logger.info("分发到 Edge TTS (Azure V1)") + return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) - # 默认使用 Edge TTS (Azure V1) + # Fallback for unknown engine - default to azure v1 + logger.warning(f"未知的 TTS 引擎: '{tts_engine}', 将默认使用 Edge TTS (Azure V1)。") return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) @@ -1483,7 +1494,7 @@ def get_audio_duration(sub_maker: submaker.SubMaker): return sub_maker.offset[-1][1] / 10000000 -def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float): +def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float, tts_engine: str = "azure"): """ 根据JSON文件中的多段文本进行TTS转换 @@ -1491,6 +1502,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f :param list_script: 脚本列表 :param voice_name: 语音名称 :param voice_rate: 语音速率 + :param tts_engine: TTS 引擎 :return: 生成的音频文件列表 """ voice_name = parse_voice_name(voice_name) @@ -1512,6 +1524,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f voice_rate=voice_rate, voice_pitch=voice_pitch, voice_file=audio_file, + tts_engine=tts_engine, ) if sub_maker is None: @@ -1581,14 +1594,6 @@ def get_audio_duration_from_file(audio_file: str) -> float: # 如果所有方法都失败,返回一个基于文本长度的估算 return 3.0 # 默认3秒,避免返回0 - -def is_soulvoice_voice(voice_name: str) -> bool: - """ - 检查是否为 SoulVoice 语音 - """ - return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:") - - def parse_soulvoice_voice(voice_name: str) -> str: """ 解析 SoulVoice 语音名称 @@ -1600,6 +1605,118 @@ def parse_soulvoice_voice(voice_name: str) -> str: return voice_name[10:] # 移除 "soulvoice:" 前缀 return voice_name +def parse_tencent_voice(voice_name: str) -> str: + """ + 解析腾讯云 TTS 语音名称 + 支持格式:tencent:101001 + """ + if voice_name.startswith("tencent:"): + return voice_name[8:] # 移除 "tencent:" 前缀 + return voice_name + + +def tencent_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]: + """ + 使用腾讯云 TTS 生成语音 + """ + try: + # 导入腾讯云 SDK + from tencentcloud.common import credential + from tencentcloud.common.profile.client_profile import ClientProfile + from tencentcloud.common.profile.http_profile import HttpProfile + from tencentcloud.tts.v20190823 import tts_client, models + import base64 + except ImportError as e: + logger.error(f"腾讯云 SDK 未安装: {e}") + return None + + # 获取腾讯云配置 + tencent_config = config.tencent + secret_id = tencent_config.get("secret_id") + secret_key = tencent_config.get("secret_key") + region = tencent_config.get("region", "ap-beijing") + + if not secret_id or not secret_key: + logger.error("腾讯云 TTS 配置不完整,请检查 secret_id 和 secret_key") + return None + + # 解析语音名称 + voice_type = parse_tencent_voice(voice_name) + + # 转换速度参数 (腾讯云支持 -2 到 2 的范围) + speed_value = max(-2.0, min(2.0, (speed - 1.0) * 2)) + + for i in range(3): + try: + logger.info(f"第 {i+1} 次使用腾讯云 TTS 生成音频") + + # 创建认证对象 + cred = credential.Credential(secret_id, secret_key) + + # 创建 HTTP 配置 + httpProfile = HttpProfile() + httpProfile.endpoint = "tts.tencentcloudapi.com" + + # 创建客户端配置 + clientProfile = ClientProfile() + clientProfile.httpProfile = httpProfile + + # 创建客户端 + client = tts_client.TtsClient(cred, region, clientProfile) + + req = models.TextToVoiceRequest() + req.Text = text + req.SessionId = str(uuid.uuid4()) + req.VoiceType = int(voice_type) if voice_type.isdigit() else 101001 + req.Speed = speed_value + req.SampleRate = 16000 + req.Codec = "mp3" + req.ProjectId = 0 + req.ModelType = 1 + req.PrimaryLanguage = 1 + req.EnableSubtitle = True + + # 发送请求 + resp = client.TextToVoice(req) + + # 检查响应 + if not resp.Audio: + logger.warning(f"腾讯云 TTS 返回空音频数据") + if i < 2: + time.sleep(1) + continue + + # 解码音频数据 + audio_data = base64.b64decode(resp.Audio) + + # 写入文件 + with open(voice_file, "wb") as f: + f.write(audio_data) + + # 创建字幕对象 + sub_maker = SubMaker() + if resp.Subtitles: + for sub in resp.Subtitles: + start_ms = sub.BeginTime + end_ms = sub.EndTime + text = sub.Text + # 转换为 100ns 单位 + sub_maker.create_sub((start_ms * 10000, end_ms * 10000), text) + else: + # 如果没有字幕返回,则使用估算作为后备方案 + duration_ms = len(text) * 200 + sub_maker.create_sub((0, duration_ms * 10000), text) + + logger.info(f"腾讯云 TTS 生成成功,文件大小: {len(audio_data)} 字节") + return sub_maker + + except Exception as e: + logger.error(f"腾讯云 TTS 生成音频时出错: {str(e)}") + if i < 2: + time.sleep(1) + + return None + def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]: """ diff --git a/config.example.toml b/config.example.toml index feaa4ee..4e69796 100644 --- a/config.example.toml +++ b/config.example.toml @@ -96,6 +96,14 @@ speech_key = "" speech_region = "" +[tencent] + # 腾讯云 TTS 配置 + # 访问 https://console.cloud.tencent.com/cam/capi 获取你的密钥 + secret_id = "" + secret_key = "" + # 地域配置,默认为 ap-beijing + region = "ap-beijing" + [soulvoice] # SoulVoice TTS API 密钥 api_key = "" @@ -107,7 +115,7 @@ model = "FunAudioLLM/CosyVoice2-0.5B" [ui] - # TTS引擎选择 (edge_tts, azure_speech, soulvoice) + # TTS引擎选择 (edge_tts, azure_speech, soulvoice, tencent_tts) tts_engine = "edge_tts" # Edge TTS 配置 diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 87e5ff4..22dc0e8 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -6,6 +6,61 @@ log() { echo "[$(date +'%Y-%m-%d %H:%M:%S')] $1" } +# 函数:安装运行时依赖 +install_runtime_dependencies() { + log "检查并安装运行时依赖..." + + # 检查是否需要安装新的依赖 + local requirements_file="requirements.txt" + local installed_packages_file="/tmp/installed_packages.txt" + + # 如果requirements.txt存在且比已安装包列表新,则重新安装 + if [ -f "$requirements_file" ]; then + if [ ! -f "$installed_packages_file" ] || [ "$requirements_file" -nt "$installed_packages_file" ]; then + log "发现新的依赖需求,开始安装..." + + # 尝试使用sudo安装,如果失败则使用用户级安装 + if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then + log "尝试使用sudo安装依赖..." + sudo pip install --no-cache-dir -r "$requirements_file" 2>&1 | while read line; do + log "pip: $line" + done + INSTALL_RESULT=${PIPESTATUS[0]} + else + INSTALL_RESULT=1 # 设置为失败,触发用户级安装 + fi + + # 如果sudo安装失败,尝试用户级安装 + if [ $INSTALL_RESULT -ne 0 ]; then + log "尝试用户级安装依赖..." + pip install --user --no-cache-dir -r "$requirements_file" 2>&1 | while read line; do + log "pip: $line" + done + + # 确保用户级安装的包在PATH中 + export PATH="$HOME/.local/bin:$PATH" + fi + + # 单独安装腾讯云SDK(确保安装) + log "确保腾讯云SDK已安装..." + if ! pip list | grep -q "tencentcloud-sdk-python"; then + log "安装腾讯云SDK..." + pip install --user tencentcloud-sdk-python>=3.0.1200 + else + log "腾讯云SDK已安装" + fi + + # 记录安装时间 + touch "$installed_packages_file" + log "依赖安装完成" + else + log "依赖已是最新版本,跳过安装" + fi + else + log "未找到 requirements.txt 文件" + fi +} + # 函数:检查必要的文件和目录 check_requirements() { log "检查应用环境..." @@ -27,6 +82,9 @@ check_requirements() { mkdir -p "$dir" fi done + + # 安装运行时依赖 + install_runtime_dependencies log "环境检查完成" } diff --git a/requirements.txt b/requirements.txt index b9bda86..640251e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ pysrt==1.1.2 openai>=1.77.0 google-generativeai>=0.8.5 azure-cognitiveservices-speech>=1.37.0 +tencentcloud-sdk-python>=3.0.1200 # 图像处理依赖 Pillow>=10.3.0 diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index 100cc44..368ce2e 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -24,7 +24,8 @@ def get_tts_engine_options(): return { "edge_tts": "Edge TTS", "azure_speech": "Azure Speech Services", - "soulvoice": "SoulVoice" + "soulvoice": "SoulVoice", + "tencent_tts": "腾讯云 TTS" } @@ -48,6 +49,12 @@ def get_tts_engine_descriptions(): "features": "提供免费额度,支持语音克隆,支持微信购买额度,无需信用卡,性价比极高", "use_case": "个人用户和中小企业,需要语音克隆功能", "registration": "https://soulvoice.scsmtech.cn/" + }, + "tencent_tts": { + "title": "腾讯云 TTS", + "features": "提供免费额度,音质优秀,支持多种音色,国内访问速度快", + "use_case": "个人和企业用户,需要稳定的中文语音合成", + "registration": "https://console.cloud.tencent.com/tts" } } @@ -126,6 +133,8 @@ def render_tts_settings(tr): render_azure_speech_settings(tr) elif selected_engine == "soulvoice": render_soulvoice_engine_settings(tr) + elif selected_engine == "tencent_tts": + render_tencent_tts_settings(tr) # 4. 试听功能 render_voice_preview_new(tr, selected_engine) @@ -357,6 +366,117 @@ def render_azure_speech_settings(tr): st.warning("⚠️ 请配置 API Key") +def render_tencent_tts_settings(tr): + """渲染腾讯云 TTS 引擎设置""" + # Secret ID 输入 + secret_id = st.text_input( + "Secret ID", + value=config.tencent.get("secret_id", ""), + help="请输入您的腾讯云 Secret ID" + ) + + # Secret Key 输入 + secret_key = st.text_input( + "Secret Key", + value=config.tencent.get("secret_key", ""), + type="password", + help="请输入您的腾讯云 Secret Key" + ) + + # 地域选择 + region_options = [ + "ap-beijing", + "ap-shanghai", + "ap-guangzhou", + "ap-chengdu", + "ap-nanjing", + "ap-singapore", + "ap-hongkong" + ] + + saved_region = config.tencent.get("region", "ap-beijing") + if saved_region not in region_options: + region_options.append(saved_region) + + region = st.selectbox( + "服务地域", + options=region_options, + index=region_options.index(saved_region), + help="选择腾讯云 TTS 服务地域" + ) + + # 音色选择 + voice_type_options = { + "101001": "智瑜 - 女声(推荐)", + "101002": "智聆 - 女声", + "101003": "智美 - 女声", + "101004": "智云 - 男声", + "101005": "智莉 - 女声", + "101006": "智言 - 男声", + "101007": "智娜 - 女声", + "101008": "智琪 - 女声", + "101009": "智芸 - 女声", + "101010": "智华 - 男声", + "101011": "智燕 - 女声", + "101012": "智丹 - 女声", + "101013": "智辉 - 男声", + "101014": "智宁 - 女声", + "101015": "智萌 - 女声", + "101016": "智甜 - 女声", + "101017": "智蓉 - 女声", + "101018": "智靖 - 男声" + } + + saved_voice_type = config.ui.get("tencent_voice_type", "101001") + if saved_voice_type not in voice_type_options: + voice_type_options[saved_voice_type] = f"自定义音色 ({saved_voice_type})" + + selected_voice_display = st.selectbox( + "音色选择", + options=list(voice_type_options.values()), + index=list(voice_type_options.keys()).index(saved_voice_type), + help="选择腾讯云 TTS 音色" + ) + + # 获取实际的音色ID + voice_type = list(voice_type_options.keys())[ + list(voice_type_options.values()).index(selected_voice_display) + ] + + # 语速调节 + voice_rate = st.slider( + "语速调节", + min_value=0.5, + max_value=2.0, + value=config.ui.get("tencent_rate", 1.0), + step=0.1, + help="调节语音速度 (0.5-2.0)" + ) + + # 显示音色说明 + with st.expander("💡 腾讯云 TTS 音色说明", expanded=False): + st.write("**女声音色:**") + female_voices = [(k, v) for k, v in voice_type_options.items() if "女声" in v] + for voice_id, voice_desc in female_voices[:6]: # 显示前6个 + st.write(f"• {voice_desc} (ID: {voice_id})") + + st.write("") + st.write("**男声音色:**") + male_voices = [(k, v) for k, v in voice_type_options.items() if "男声" in v] + for voice_id, voice_desc in male_voices: + st.write(f"• {voice_desc} (ID: {voice_id})") + + st.write("") + st.info("💡 更多音色请参考腾讯云官方文档") + + # 保存配置 + config.tencent["secret_id"] = secret_id + config.tencent["secret_key"] = secret_key + config.tencent["region"] = region + config.ui["tencent_voice_type"] = voice_type + config.ui["tencent_rate"] = voice_rate + + def render_soulvoice_engine_settings(tr): """渲染 SoulVoice 引擎设置""" # API Key 输入 @@ -453,6 +573,11 @@ def render_voice_preview_new(tr, selected_engine): voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}" voice_rate = 1.0 # SoulVoice 使用默认语速 voice_pitch = 1.0 # SoulVoice 不支持音调调节 + elif selected_engine == "tencent_tts": + voice_type = config.ui.get("tencent_voice_type", "101001") + voice_name = f"tencent:{voice_type}" + voice_rate = config.ui.get("tencent_rate", 1.0) + voice_pitch = 1.0 # 腾讯云 TTS 不支持音调调节 if not voice_name: st.error("请先配置语音设置")