From e1f45db95a330549bedd1e1d0dd6c537b9aab847 Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 04:26:42 +0800 Subject: [PATCH] =?UTF-8?q?feat(tts):=20=E6=B7=BB=E5=8A=A0=20SoulVoice=20T?= =?UTF-8?q?TS=20=E5=BC=95=E6=93=8E=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实现 SoulVoice TTS 引擎集成,包括配置管理、语音选择、API 调用和字幕处理 新增 SoulVoice 配置项和示例配置 修改音频设置面板以支持 SoulVoice 选项 优化音频时长计算和异常处理 更新多语言文案以反映 SoulVoice 支持 --- app/config/config.py | 2 + app/services/clip_video.py | 43 +++++ app/services/voice.py | 228 +++++++++++++++++++++++++- config.example.toml | 15 ++ webui/components/audio_settings.py | 153 ++++++++++++++--- webui/components/subtitle_settings.py | 35 +++- webui/i18n/en.json | 2 +- webui/i18n/zh.json | 2 +- 8 files changed, 440 insertions(+), 40 deletions(-) diff --git a/app/config/config.py b/app/config/config.py index 4b2b0b4..ceb8f11 100644 --- a/app/config/config.py +++ b/app/config/config.py @@ -48,6 +48,7 @@ def save_config(): with open(config_file, "w", encoding="utf-8") as f: _cfg["app"] = app _cfg["azure"] = azure + _cfg["soulvoice"] = soulvoice _cfg["ui"] = ui f.write(toml.dumps(_cfg)) @@ -57,6 +58,7 @@ app = _cfg.get("app", {}) whisper = _cfg.get("whisper", {}) proxy = _cfg.get("proxy", {}) azure = _cfg.get("azure", {}) +soulvoice = _cfg.get("soulvoice", {}) ui = _cfg.get("ui", {}) frames = _cfg.get("frames", {}) diff --git a/app/services/clip_video.py b/app/services/clip_video.py index 1a0e8e1..1c5fddf 100644 --- a/app/services/clip_video.py +++ b/app/services/clip_video.py @@ -613,6 +613,49 @@ def clip_video( # 根据持续时间计算真正的结束时间(加上1秒余量) duration = item["duration"] + + # 时长合理性检查和修正 + if duration <= 0 or duration > 300: # 超过5分钟认为不合理 + logger.warning(f"检测到异常时长 {duration}秒,片段: {timestamp}") + + # 尝试从时间戳计算实际时长 + try: + start_time_str, end_time_str = timestamp.split('-') + + # 解析开始时间 + if ',' in start_time_str: + time_part, ms_part = start_time_str.split(',') + h1, m1, s1 = map(int, time_part.split(':')) + ms1 = int(ms_part) + else: + h1, m1, s1 = map(int, start_time_str.split(':')) + ms1 = 0 + + # 解析结束时间 + if ',' in end_time_str: + time_part, ms_part = end_time_str.split(',') + h2, m2, s2 = map(int, time_part.split(':')) + ms2 = int(ms_part) + else: + h2, m2, s2 = map(int, end_time_str.split(':')) + ms2 = 0 + + # 计算实际时长 + start_total_ms = (h1 * 3600 + m1 * 60 + s1) * 1000 + ms1 + end_total_ms = (h2 * 3600 + m2 * 60 + s2) * 1000 + ms2 + actual_duration = (end_total_ms - start_total_ms) / 1000.0 + + if actual_duration > 0 and actual_duration <= 300: + duration = actual_duration + logger.info(f"使用时间戳计算的实际时长: {duration:.3f}秒") + else: + duration = 5.0 # 默认5秒 + logger.warning(f"时间戳计算也异常,使用默认时长: {duration}秒") + + except Exception as e: + duration = 5.0 # 默认5秒 + logger.warning(f"时长修正失败,使用默认时长: {duration}秒, 错误: {str(e)}") + calculated_end_time = calculate_end_time(start_time, duration) # 转换为FFmpeg兼容的时间格式(逗号替换为点) diff --git a/app/services/voice.py b/app/services/voice.py index 31f6d66..d45db75 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -4,19 +4,42 @@ import json import traceback import edge_tts import asyncio +import requests from loguru import logger -from typing import List, Union +from typing import List, Union, Tuple from datetime import datetime from xml.sax.saxutils import unescape from edge_tts import submaker, SubMaker -from edge_tts.submaker import mktimestamp +# from edge_tts.submaker import mktimestamp # 函数可能不存在,我们自己实现 from moviepy.video.tools import subtitles +try: + from moviepy import AudioFileClip + MOVIEPY_AVAILABLE = True +except ImportError: + MOVIEPY_AVAILABLE = False + logger.warning("moviepy 未安装,将使用估算方法计算音频时长") import time from app.config import config from app.utils import utils +def mktimestamp(time_seconds: float) -> str: + """ + 将秒数转换为 SRT 时间戳格式 + + Args: + time_seconds: 时间(秒) + + Returns: + str: SRT 格式的时间戳,如 "00:01:23.456" + """ + hours = int(time_seconds // 3600) + minutes = int((time_seconds % 3600) // 60) + seconds = time_seconds % 60 + return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}" + + def get_all_azure_voices(filter_locals=None) -> list[str]: if filter_locals is None: filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"] @@ -1038,8 +1061,15 @@ def is_azure_v2_voice(voice_name: str): def tts( text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str ) -> Union[SubMaker, None]: + # 检查是否为 SoulVoice 引擎 + if is_soulvoice_voice(voice_name): + return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) + + # 检查是否为 Azure V2 引擎 if is_azure_v2_voice(voice_name): return azure_tts_v2(text, voice_name, voice_file) + + # 默认使用 Azure V1 引擎 return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) @@ -1368,6 +1398,10 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) if start_time < 0: start_time = _start_time + # 将 100纳秒单位转换为秒 + start_time_seconds = start_time / 10000000 + end_time_seconds = end_time / 10000000 + sub = unescape(sub) sub_line += sub sub_text = match_line(sub_line, sub_index) @@ -1375,8 +1409,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) sub_index += 1 line = formatter( idx=sub_index, - start_time=start_time, - end_time=end_time, + start_time=start_time_seconds, + end_time=end_time_seconds, sub_text=sub_text, ) sub_items.append(line) @@ -1402,9 +1436,13 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}" f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}" ) + # 返回默认值,避免 None 错误 + return subtitle_file, 3.0 except Exception as e: logger.error(f"failed, error: {str(e)}") + # 返回默认值,避免 None 错误 + return subtitle_file, 3.0 def get_audio_duration(sub_maker: submaker.SubMaker): @@ -1453,8 +1491,21 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f f"或者使用其他 tts 引擎") continue else: - # 为当前片段生成字幕文件 - _, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file) + # SoulVoice 引擎不生成字幕文件 + if is_soulvoice_voice(voice_name): + # 获取实际音频文件的时长 + duration = get_audio_duration_from_file(audio_file) + if duration <= 0: + # 如果无法获取文件时长,尝试从 SubMaker 获取 + duration = get_audio_duration(sub_maker) + if duration <= 0: + # 最后的 fallback,基于文本长度估算 + duration = max(1.0, len(text) / 3.0) + logger.warning(f"无法获取音频时长,使用文本估算: {duration:.2f}秒") + # 不创建字幕文件 + subtitle_file = "" + else: + _, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file) tts_results.append({ "_id": item['_id'], @@ -1467,3 +1518,168 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f logger.info(f"已生成音频文件: {audio_file}") return tts_results + + +def get_audio_duration_from_file(audio_file: str) -> float: + """ + 获取音频文件的时长(秒) + """ + if MOVIEPY_AVAILABLE: + try: + audio_clip = AudioFileClip(audio_file) + duration = audio_clip.duration + audio_clip.close() + return duration + except Exception as e: + logger.error(f"使用 moviepy 获取音频时长失败: {str(e)}") + + # Fallback: 使用更准确的估算方法 + try: + import os + file_size = os.path.getsize(audio_file) + + # 更准确的 MP3 时长估算 + # 假设 MP3 平均比特率为 128kbps = 16KB/s + # 但实际文件还包含头部信息,所以调整系数 + estimated_duration = max(1.0, file_size / 20000) # 调整为更保守的估算 + + # 对于中文语音,根据文本长度进行二次校正 + # 一般中文语音速度约为 3-4 字/秒 + logger.warning(f"使用文件大小估算音频时长: {estimated_duration:.2f}秒") + return estimated_duration + except Exception as e: + logger.error(f"获取音频时长失败: {str(e)}") + # 如果所有方法都失败,返回一个基于文本长度的估算 + return 3.0 # 默认3秒,避免返回0 + + +def is_soulvoice_voice(voice_name: str) -> bool: + """ + 检查是否为 SoulVoice 语音 + """ + return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:") + + +def parse_soulvoice_voice(voice_name: str) -> str: + """ + 解析 SoulVoice 语音名称 + 支持格式: + - soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + - speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + """ + if voice_name.startswith("soulvoice:"): + return voice_name[10:] # 移除 "soulvoice:" 前缀 + return voice_name + + +def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]: + """ + 使用 SoulVoice API 进行文本转语音 + + Args: + text: 要转换的文本 + voice_name: 语音名称 + voice_file: 输出音频文件路径 + speed: 语音速度 + + Returns: + SubMaker: 包含时间戳信息的字幕制作器,失败时返回 None + """ + # 获取配置 + api_key = config.soulvoice.get("api_key", "") + api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts") + default_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") + + if not api_key: + logger.error("SoulVoice API key 未配置") + return None + + # 解析语音名称 + parsed_voice = parse_soulvoice_voice(voice_name) + + # 准备请求数据 + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + data = { + 'text': text.strip(), + 'model': default_model, + 'voice': parsed_voice, + 'speed': speed + } + + # 重试机制 + for attempt in range(3): + try: + logger.info(f"第 {attempt + 1} 次调用 SoulVoice API") + + # 设置代理 + proxies = {} + if config.proxy.get("http"): + proxies = { + 'http': config.proxy.get("http"), + 'https': config.proxy.get("https", config.proxy.get("http")) + } + + # 调用 API + response = requests.post( + api_url, + headers=headers, + json=data, + proxies=proxies, + timeout=60 + ) + + if response.status_code == 200: + # 保存音频文件 + with open(voice_file, 'wb') as f: + f.write(response.content) + + logger.info(f"SoulVoice TTS 成功生成音频: {voice_file}") + + # SoulVoice 不支持精确字幕生成,返回简单的 SubMaker 对象 + sub_maker = SubMaker() + sub_maker.subs = [text] # 整个文本作为一个段落 + sub_maker.offset = [(0, 0)] # 占位时间戳 + + return sub_maker + + else: + logger.error(f"SoulVoice API 调用失败: {response.status_code} - {response.text}") + + except requests.exceptions.Timeout: + logger.error(f"SoulVoice API 调用超时 (尝试 {attempt + 1}/3)") + except requests.exceptions.RequestException as e: + logger.error(f"SoulVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)") + except Exception as e: + logger.error(f"SoulVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)") + + if attempt < 2: # 不是最后一次尝试 + time.sleep(2) # 等待2秒后重试 + + logger.error("SoulVoice TTS 生成失败,已达到最大重试次数") + return None + + +def is_soulvoice_voice(voice_name: str) -> bool: + """ + 检查是否为 SoulVoice 语音 + """ + return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:") + + +def parse_soulvoice_voice(voice_name: str) -> str: + """ + 解析 SoulVoice 语音名称 + 支持格式: + - soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + - speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + """ + if voice_name.startswith("soulvoice:"): + return voice_name[10:] # 移除 "soulvoice:" 前缀 + return voice_name + + + diff --git a/config.example.toml b/config.example.toml index c9ca75f..ddf529a 100644 --- a/config.example.toml +++ b/config.example.toml @@ -77,6 +77,21 @@ # webui界面是否显示配置项 hide_config = true +[azure] + # Azure TTS 配置 + speech_key = "" + speech_region = "" + +[soulvoice] + # SoulVoice TTS API 密钥 + api_key = "" + # 音色 URI(必需) + voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" + # API 接口地址(可选,默认值如下) + api_url = "https://tts.scsmtech.cn/tts" + # 默认模型(可选) + model = "FunAudioLLM/CosyVoice2-0.5B" + [proxy] # clash 默认地址:http://127.0.0.1:7890 http = "" diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index e422d48..b194e81 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -8,6 +8,17 @@ from app.utils import utils from webui.utils.cache import get_songs_cache +def get_soulvoice_voices(): + """获取 SoulVoice 语音列表""" + # 检查是否配置了 SoulVoice API key + api_key = config.soulvoice.get("api_key", "") + if not api_key: + return [] + + # 只返回一个 SoulVoice 选项,音色通过输入框自定义 + return ["soulvoice:custom"] + + def render_audio_panel(tr): """渲染音频设置面板""" with st.container(border=True): @@ -24,15 +35,24 @@ def render_tts_settings(tr): """渲染TTS(文本转语音)设置""" # 获取支持的语音列表 support_locales = ["zh-CN", "en-US"] - voices = voice.get_all_azure_voices(filter_locals=support_locales) + azure_voices = voice.get_all_azure_voices(filter_locals=support_locales) + + # 添加 SoulVoice 语音选项 + soulvoice_voices = get_soulvoice_voices() + + # 合并所有语音选项 + all_voices = azure_voices + soulvoice_voices # 创建友好的显示名称 - friendly_names = { - v: v.replace("Female", tr("Female")) - .replace("Male", tr("Male")) - .replace("Neural", "") - for v in voices - } + friendly_names = {} + + # Azure 语音的友好名称 + for v in azure_voices: + friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "") + + # SoulVoice 语音的友好名称 + for v in soulvoice_voices: + friendly_names[v] = "SoulVoice (自定义音色)" # 获取保存的语音设置 saved_voice_name = config.ui.get("voice_name", "") @@ -42,9 +62,9 @@ def render_tts_settings(tr): saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) else: # 如果没有保存的设置,选择与UI语言匹配的第一个语音 - for i, v in enumerate(voices): + for i, v in enumerate(all_voices): if (v.lower().startswith(st.session_state["ui_language"].lower()) - and "V2" not in v): + and "V2" not in v and not v.startswith("soulvoice:")): saved_voice_name_index = i break @@ -60,20 +80,84 @@ def render_tts_settings(tr): list(friendly_names.values()).index(selected_friendly_name) ] + # 如果选择的是 SoulVoice 自定义选项,使用配置的音色 URI + if voice_name == "soulvoice:custom": + custom_voice_uri = config.soulvoice.get("voice_uri", "") + if custom_voice_uri: + # 确保音色 URI 有正确的前缀 + if not custom_voice_uri.startswith("soulvoice:") and not custom_voice_uri.startswith("speech:"): + voice_name = f"soulvoice:{custom_voice_uri}" + else: + voice_name = custom_voice_uri if custom_voice_uri.startswith("soulvoice:") else f"soulvoice:{custom_voice_uri}" + # 保存设置 config.ui["voice_name"] = voice_name - # Azure V2语音特殊处理 - if voice.is_azure_v2_voice(voice_name): + # 根据语音类型渲染不同的设置 + if voice.is_soulvoice_voice(voice_name): + render_soulvoice_settings(tr) + elif voice.is_azure_v2_voice(voice_name): render_azure_v2_settings(tr) # 语音参数设置 - render_voice_parameters(tr) + render_voice_parameters(tr, voice_name) # 试听按钮 render_voice_preview(tr, voice_name) +def render_soulvoice_settings(tr): + """渲染 SoulVoice 语音设置""" + saved_api_key = config.soulvoice.get("api_key", "") + saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts") + saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") + saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr") + + # API Key 输入 + api_key = st.text_input( + "SoulVoice API Key", + value=saved_api_key, + type="password", + help="请输入您的 SoulVoice API 密钥" + ) + + # 音色 URI 输入 + voice_uri = st.text_input( + "音色 URI", + value=saved_voice_uri, + help="请输入 SoulVoice 音色标识符,格式如:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr", + placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" + ) + + # API URL 输入(可选) + with st.expander("高级设置", expanded=False): + api_url = st.text_input( + "API 地址", + value=saved_api_url, + help="SoulVoice API 接口地址" + ) + + model = st.text_input( + "模型名称", + value=saved_model, + help="使用的 TTS 模型" + ) + + # 保存配置 + config.soulvoice["api_key"] = api_key + config.soulvoice["voice_uri"] = voice_uri + config.soulvoice["api_url"] = api_url + config.soulvoice["model"] = model + + # 显示配置状态 + if api_key and voice_uri: + st.success("✅ SoulVoice 配置已设置") + elif not api_key: + st.warning("⚠️ 请配置 SoulVoice API Key") + elif not voice_uri: + st.warning("⚠️ 请配置音色 URI") + + def render_azure_v2_settings(tr): """渲染Azure V2语音设置""" saved_azure_speech_region = config.azure.get("speech_region", "") @@ -93,7 +177,7 @@ def render_azure_v2_settings(tr): config.azure["speech_key"] = azure_speech_key -def render_voice_parameters(tr): +def render_voice_parameters(tr, voice_name): """渲染语音参数设置""" # 音量 - 使用统一的默认值 voice_volume = st.slider( @@ -106,22 +190,41 @@ def render_voice_parameters(tr): ) st.session_state['voice_volume'] = voice_volume + # 检查是否为 SoulVoice 引擎 + is_soulvoice = voice.is_soulvoice_voice(voice_name) # 语速 - voice_rate = st.selectbox( - tr("Speech Rate"), - options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], - index=2, - ) + if is_soulvoice: + # SoulVoice 支持更精细的语速控制 + voice_rate = st.slider( + tr("Speech Rate"), + min_value=0.5, + max_value=2.0, + value=1.0, + step=0.1, + help="SoulVoice 语音速度控制" + ) + else: + # Azure TTS 使用预设选项 + voice_rate = st.selectbox( + tr("Speech Rate"), + options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], + index=2, + ) st.session_state['voice_rate'] = voice_rate - # 音调 - voice_pitch = st.selectbox( - tr("Speech Pitch"), - options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], - index=2, - ) - st.session_state['voice_pitch'] = voice_pitch + # 音调 - SoulVoice 不支持音调调节 + if not is_soulvoice: + voice_pitch = st.selectbox( + tr("Speech Pitch"), + options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], + index=2, + ) + st.session_state['voice_pitch'] = voice_pitch + else: + # SoulVoice 不支持音调调节,设置默认值 + st.session_state['voice_pitch'] = 1.0 + st.info("ℹ️ SoulVoice 引擎不支持音调调节") def render_voice_preview(tr, voice_name): diff --git a/webui/components/subtitle_settings.py b/webui/components/subtitle_settings.py index ee27985..53b98c7 100644 --- a/webui/components/subtitle_settings.py +++ b/webui/components/subtitle_settings.py @@ -9,14 +9,35 @@ def render_subtitle_panel(tr): with st.container(border=True): st.write(tr("Subtitle Settings")) - # 启用字幕选项 - enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) - st.session_state['subtitle_enabled'] = enable_subtitles + # 检查是否选择了 SoulVoice 引擎 + from app.services import voice + current_voice = st.session_state.get('voice_name', '') + is_soulvoice = voice.is_soulvoice_voice(current_voice) - if enable_subtitles: - render_font_settings(tr) - render_position_settings(tr) - render_style_settings(tr) + if is_soulvoice: + # SoulVoice 引擎时显示禁用提示 + st.warning("⚠️ SoulVoice TTS 不支持精确字幕生成") + st.info("💡 建议使用专业剪辑工具(如剪映、PR等)手动添加字幕") + + # 强制禁用字幕 + st.session_state['subtitle_enabled'] = False + + # 显示禁用状态的复选框 + st.checkbox( + tr("Enable Subtitles"), + value=False, + disabled=True, + help="SoulVoice 引擎不支持字幕生成,请使用其他 TTS 引擎" + ) + else: + # 其他引擎正常显示字幕选项 + enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) + st.session_state['subtitle_enabled'] = enable_subtitles + + if enable_subtitles: + render_font_settings(tr) + render_position_settings(tr) + render_style_settings(tr) def render_font_settings(tr): diff --git a/webui/i18n/en.json b/webui/i18n/en.json index 63a2c36..3a69807 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -29,7 +29,7 @@ "Clip Duration": "Maximum Clip Duration (Seconds) (**Not the total length of the video**, refers to the length of each **composite segment**)", "Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously", "Audio Settings": "**Audio Settings**", - "Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY])", + "Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY; SoulVoice provides high-quality Chinese voices])", "Speech Region": "Service Region (:red[Required, [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Key": "API Key (:red[Required, either Key 1 or Key 2 is acceptable [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Volume": "Speech Volume (1.0 represents 100%)", diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index e028c9e..aad77e8 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -29,7 +29,7 @@ "Clip Duration": "视频片段最大时长(秒)(**不是视频总长度**,是指每个**合成片段**的长度)", "Number of Videos Generated Simultaneously": "同时生成视频数量", "Audio Settings": "**音频设置**", - "Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY])", + "Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY;SoulVoice 提供高质量中文语音])", "Speech Region": "服务区域 (:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Key": "API Key (:red[必填,密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Volume": "朗读音量(1.0表示100%)",