feat(tts): 添加 SoulVoice TTS 引擎支持

实现 SoulVoice TTS 引擎集成,包括配置管理、语音选择、API 调用和字幕处理
新增 SoulVoice 配置项和示例配置
修改音频设置面板以支持 SoulVoice 选项
优化音频时长计算和异常处理
更新多语言文案以反映 SoulVoice 支持
This commit is contained in:
linyq 2025-08-03 04:26:42 +08:00
parent 06cbee0654
commit e1f45db95a
8 changed files with 440 additions and 40 deletions

View File

@ -48,6 +48,7 @@ def save_config():
with open(config_file, "w", encoding="utf-8") as f: with open(config_file, "w", encoding="utf-8") as f:
_cfg["app"] = app _cfg["app"] = app
_cfg["azure"] = azure _cfg["azure"] = azure
_cfg["soulvoice"] = soulvoice
_cfg["ui"] = ui _cfg["ui"] = ui
f.write(toml.dumps(_cfg)) f.write(toml.dumps(_cfg))
@ -57,6 +58,7 @@ app = _cfg.get("app", {})
whisper = _cfg.get("whisper", {}) whisper = _cfg.get("whisper", {})
proxy = _cfg.get("proxy", {}) proxy = _cfg.get("proxy", {})
azure = _cfg.get("azure", {}) azure = _cfg.get("azure", {})
soulvoice = _cfg.get("soulvoice", {})
ui = _cfg.get("ui", {}) ui = _cfg.get("ui", {})
frames = _cfg.get("frames", {}) frames = _cfg.get("frames", {})

View File

@ -613,6 +613,49 @@ def clip_video(
# 根据持续时间计算真正的结束时间加上1秒余量 # 根据持续时间计算真正的结束时间加上1秒余量
duration = item["duration"] duration = item["duration"]
# 时长合理性检查和修正
if duration <= 0 or duration > 300: # 超过5分钟认为不合理
logger.warning(f"检测到异常时长 {duration}秒,片段: {timestamp}")
# 尝试从时间戳计算实际时长
try:
start_time_str, end_time_str = timestamp.split('-')
# 解析开始时间
if ',' in start_time_str:
time_part, ms_part = start_time_str.split(',')
h1, m1, s1 = map(int, time_part.split(':'))
ms1 = int(ms_part)
else:
h1, m1, s1 = map(int, start_time_str.split(':'))
ms1 = 0
# 解析结束时间
if ',' in end_time_str:
time_part, ms_part = end_time_str.split(',')
h2, m2, s2 = map(int, time_part.split(':'))
ms2 = int(ms_part)
else:
h2, m2, s2 = map(int, end_time_str.split(':'))
ms2 = 0
# 计算实际时长
start_total_ms = (h1 * 3600 + m1 * 60 + s1) * 1000 + ms1
end_total_ms = (h2 * 3600 + m2 * 60 + s2) * 1000 + ms2
actual_duration = (end_total_ms - start_total_ms) / 1000.0
if actual_duration > 0 and actual_duration <= 300:
duration = actual_duration
logger.info(f"使用时间戳计算的实际时长: {duration:.3f}")
else:
duration = 5.0 # 默认5秒
logger.warning(f"时间戳计算也异常,使用默认时长: {duration}")
except Exception as e:
duration = 5.0 # 默认5秒
logger.warning(f"时长修正失败,使用默认时长: {duration}秒, 错误: {str(e)}")
calculated_end_time = calculate_end_time(start_time, duration) calculated_end_time = calculate_end_time(start_time, duration)
# 转换为FFmpeg兼容的时间格式逗号替换为点 # 转换为FFmpeg兼容的时间格式逗号替换为点

View File

@ -4,19 +4,42 @@ import json
import traceback import traceback
import edge_tts import edge_tts
import asyncio import asyncio
import requests
from loguru import logger from loguru import logger
from typing import List, Union from typing import List, Union, Tuple
from datetime import datetime from datetime import datetime
from xml.sax.saxutils import unescape from xml.sax.saxutils import unescape
from edge_tts import submaker, SubMaker from edge_tts import submaker, SubMaker
from edge_tts.submaker import mktimestamp # from edge_tts.submaker import mktimestamp # 函数可能不存在,我们自己实现
from moviepy.video.tools import subtitles from moviepy.video.tools import subtitles
try:
from moviepy import AudioFileClip
MOVIEPY_AVAILABLE = True
except ImportError:
MOVIEPY_AVAILABLE = False
logger.warning("moviepy 未安装,将使用估算方法计算音频时长")
import time import time
from app.config import config from app.config import config
from app.utils import utils from app.utils import utils
def mktimestamp(time_seconds: float) -> str:
"""
将秒数转换为 SRT 时间戳格式
Args:
time_seconds: 时间
Returns:
str: SRT 格式的时间戳 "00:01:23.456"
"""
hours = int(time_seconds // 3600)
minutes = int((time_seconds % 3600) // 60)
seconds = time_seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
def get_all_azure_voices(filter_locals=None) -> list[str]: def get_all_azure_voices(filter_locals=None) -> list[str]:
if filter_locals is None: if filter_locals is None:
filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"] filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"]
@ -1038,8 +1061,15 @@ def is_azure_v2_voice(voice_name: str):
def tts( def tts(
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
) -> Union[SubMaker, None]: ) -> Union[SubMaker, None]:
# 检查是否为 SoulVoice 引擎
if is_soulvoice_voice(voice_name):
return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate)
# 检查是否为 Azure V2 引擎
if is_azure_v2_voice(voice_name): if is_azure_v2_voice(voice_name):
return azure_tts_v2(text, voice_name, voice_file) return azure_tts_v2(text, voice_name, voice_file)
# 默认使用 Azure V1 引擎
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
@ -1368,6 +1398,10 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
if start_time < 0: if start_time < 0:
start_time = _start_time start_time = _start_time
# 将 100纳秒单位转换为秒
start_time_seconds = start_time / 10000000
end_time_seconds = end_time / 10000000
sub = unescape(sub) sub = unescape(sub)
sub_line += sub sub_line += sub
sub_text = match_line(sub_line, sub_index) sub_text = match_line(sub_line, sub_index)
@ -1375,8 +1409,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
sub_index += 1 sub_index += 1
line = formatter( line = formatter(
idx=sub_index, idx=sub_index,
start_time=start_time, start_time=start_time_seconds,
end_time=end_time, end_time=end_time_seconds,
sub_text=sub_text, sub_text=sub_text,
) )
sub_items.append(line) sub_items.append(line)
@ -1402,9 +1436,13 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}" f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}"
f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}" f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}"
) )
# 返回默认值,避免 None 错误
return subtitle_file, 3.0
except Exception as e: except Exception as e:
logger.error(f"failed, error: {str(e)}") logger.error(f"failed, error: {str(e)}")
# 返回默认值,避免 None 错误
return subtitle_file, 3.0
def get_audio_duration(sub_maker: submaker.SubMaker): def get_audio_duration(sub_maker: submaker.SubMaker):
@ -1453,8 +1491,21 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
f"或者使用其他 tts 引擎") f"或者使用其他 tts 引擎")
continue continue
else: else:
# 为当前片段生成字幕文件 # SoulVoice 引擎不生成字幕文件
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file) if is_soulvoice_voice(voice_name):
# 获取实际音频文件的时长
duration = get_audio_duration_from_file(audio_file)
if duration <= 0:
# 如果无法获取文件时长,尝试从 SubMaker 获取
duration = get_audio_duration(sub_maker)
if duration <= 0:
# 最后的 fallback基于文本长度估算
duration = max(1.0, len(text) / 3.0)
logger.warning(f"无法获取音频时长,使用文本估算: {duration:.2f}")
# 不创建字幕文件
subtitle_file = ""
else:
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
tts_results.append({ tts_results.append({
"_id": item['_id'], "_id": item['_id'],
@ -1467,3 +1518,168 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
logger.info(f"已生成音频文件: {audio_file}") logger.info(f"已生成音频文件: {audio_file}")
return tts_results return tts_results
def get_audio_duration_from_file(audio_file: str) -> float:
"""
获取音频文件的时长
"""
if MOVIEPY_AVAILABLE:
try:
audio_clip = AudioFileClip(audio_file)
duration = audio_clip.duration
audio_clip.close()
return duration
except Exception as e:
logger.error(f"使用 moviepy 获取音频时长失败: {str(e)}")
# Fallback: 使用更准确的估算方法
try:
import os
file_size = os.path.getsize(audio_file)
# 更准确的 MP3 时长估算
# 假设 MP3 平均比特率为 128kbps = 16KB/s
# 但实际文件还包含头部信息,所以调整系数
estimated_duration = max(1.0, file_size / 20000) # 调整为更保守的估算
# 对于中文语音,根据文本长度进行二次校正
# 一般中文语音速度约为 3-4 字/秒
logger.warning(f"使用文件大小估算音频时长: {estimated_duration:.2f}")
return estimated_duration
except Exception as e:
logger.error(f"获取音频时长失败: {str(e)}")
# 如果所有方法都失败,返回一个基于文本长度的估算
return 3.0 # 默认3秒避免返回0
def is_soulvoice_voice(voice_name: str) -> bool:
"""
检查是否为 SoulVoice 语音
"""
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
def parse_soulvoice_voice(voice_name: str) -> str:
"""
解析 SoulVoice 语音名称
支持格式
- soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
- speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
"""
if voice_name.startswith("soulvoice:"):
return voice_name[10:] # 移除 "soulvoice:" 前缀
return voice_name
def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
"""
使用 SoulVoice API 进行文本转语音
Args:
text: 要转换的文本
voice_name: 语音名称
voice_file: 输出音频文件路径
speed: 语音速度
Returns:
SubMaker: 包含时间戳信息的字幕制作器失败时返回 None
"""
# 获取配置
api_key = config.soulvoice.get("api_key", "")
api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
default_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
if not api_key:
logger.error("SoulVoice API key 未配置")
return None
# 解析语音名称
parsed_voice = parse_soulvoice_voice(voice_name)
# 准备请求数据
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
data = {
'text': text.strip(),
'model': default_model,
'voice': parsed_voice,
'speed': speed
}
# 重试机制
for attempt in range(3):
try:
logger.info(f"{attempt + 1} 次调用 SoulVoice API")
# 设置代理
proxies = {}
if config.proxy.get("http"):
proxies = {
'http': config.proxy.get("http"),
'https': config.proxy.get("https", config.proxy.get("http"))
}
# 调用 API
response = requests.post(
api_url,
headers=headers,
json=data,
proxies=proxies,
timeout=60
)
if response.status_code == 200:
# 保存音频文件
with open(voice_file, 'wb') as f:
f.write(response.content)
logger.info(f"SoulVoice TTS 成功生成音频: {voice_file}")
# SoulVoice 不支持精确字幕生成,返回简单的 SubMaker 对象
sub_maker = SubMaker()
sub_maker.subs = [text] # 整个文本作为一个段落
sub_maker.offset = [(0, 0)] # 占位时间戳
return sub_maker
else:
logger.error(f"SoulVoice API 调用失败: {response.status_code} - {response.text}")
except requests.exceptions.Timeout:
logger.error(f"SoulVoice API 调用超时 (尝试 {attempt + 1}/3)")
except requests.exceptions.RequestException as e:
logger.error(f"SoulVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
except Exception as e:
logger.error(f"SoulVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
if attempt < 2: # 不是最后一次尝试
time.sleep(2) # 等待2秒后重试
logger.error("SoulVoice TTS 生成失败,已达到最大重试次数")
return None
def is_soulvoice_voice(voice_name: str) -> bool:
"""
检查是否为 SoulVoice 语音
"""
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
def parse_soulvoice_voice(voice_name: str) -> str:
"""
解析 SoulVoice 语音名称
支持格式
- soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
- speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
"""
if voice_name.startswith("soulvoice:"):
return voice_name[10:] # 移除 "soulvoice:" 前缀
return voice_name

View File

@ -77,6 +77,21 @@
# webui界面是否显示配置项 # webui界面是否显示配置项
hide_config = true hide_config = true
[azure]
# Azure TTS 配置
speech_key = ""
speech_region = ""
[soulvoice]
# SoulVoice TTS API 密钥
api_key = ""
# 音色 URI必需
voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
# API 接口地址(可选,默认值如下)
api_url = "https://tts.scsmtech.cn/tts"
# 默认模型(可选)
model = "FunAudioLLM/CosyVoice2-0.5B"
[proxy] [proxy]
# clash 默认地址http://127.0.0.1:7890 # clash 默认地址http://127.0.0.1:7890
http = "" http = ""

View File

@ -8,6 +8,17 @@ from app.utils import utils
from webui.utils.cache import get_songs_cache from webui.utils.cache import get_songs_cache
def get_soulvoice_voices():
"""获取 SoulVoice 语音列表"""
# 检查是否配置了 SoulVoice API key
api_key = config.soulvoice.get("api_key", "")
if not api_key:
return []
# 只返回一个 SoulVoice 选项,音色通过输入框自定义
return ["soulvoice:custom"]
def render_audio_panel(tr): def render_audio_panel(tr):
"""渲染音频设置面板""" """渲染音频设置面板"""
with st.container(border=True): with st.container(border=True):
@ -24,15 +35,24 @@ def render_tts_settings(tr):
"""渲染TTS(文本转语音)设置""" """渲染TTS(文本转语音)设置"""
# 获取支持的语音列表 # 获取支持的语音列表
support_locales = ["zh-CN", "en-US"] support_locales = ["zh-CN", "en-US"]
voices = voice.get_all_azure_voices(filter_locals=support_locales) azure_voices = voice.get_all_azure_voices(filter_locals=support_locales)
# 添加 SoulVoice 语音选项
soulvoice_voices = get_soulvoice_voices()
# 合并所有语音选项
all_voices = azure_voices + soulvoice_voices
# 创建友好的显示名称 # 创建友好的显示名称
friendly_names = { friendly_names = {}
v: v.replace("Female", tr("Female"))
.replace("Male", tr("Male")) # Azure 语音的友好名称
.replace("Neural", "") for v in azure_voices:
for v in voices friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "")
}
# SoulVoice 语音的友好名称
for v in soulvoice_voices:
friendly_names[v] = "SoulVoice (自定义音色)"
# 获取保存的语音设置 # 获取保存的语音设置
saved_voice_name = config.ui.get("voice_name", "") saved_voice_name = config.ui.get("voice_name", "")
@ -42,9 +62,9 @@ def render_tts_settings(tr):
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
else: else:
# 如果没有保存的设置选择与UI语言匹配的第一个语音 # 如果没有保存的设置选择与UI语言匹配的第一个语音
for i, v in enumerate(voices): for i, v in enumerate(all_voices):
if (v.lower().startswith(st.session_state["ui_language"].lower()) if (v.lower().startswith(st.session_state["ui_language"].lower())
and "V2" not in v): and "V2" not in v and not v.startswith("soulvoice:")):
saved_voice_name_index = i saved_voice_name_index = i
break break
@ -60,20 +80,84 @@ def render_tts_settings(tr):
list(friendly_names.values()).index(selected_friendly_name) list(friendly_names.values()).index(selected_friendly_name)
] ]
# 如果选择的是 SoulVoice 自定义选项,使用配置的音色 URI
if voice_name == "soulvoice:custom":
custom_voice_uri = config.soulvoice.get("voice_uri", "")
if custom_voice_uri:
# 确保音色 URI 有正确的前缀
if not custom_voice_uri.startswith("soulvoice:") and not custom_voice_uri.startswith("speech:"):
voice_name = f"soulvoice:{custom_voice_uri}"
else:
voice_name = custom_voice_uri if custom_voice_uri.startswith("soulvoice:") else f"soulvoice:{custom_voice_uri}"
# 保存设置 # 保存设置
config.ui["voice_name"] = voice_name config.ui["voice_name"] = voice_name
# Azure V2语音特殊处理 # 根据语音类型渲染不同的设置
if voice.is_azure_v2_voice(voice_name): if voice.is_soulvoice_voice(voice_name):
render_soulvoice_settings(tr)
elif voice.is_azure_v2_voice(voice_name):
render_azure_v2_settings(tr) render_azure_v2_settings(tr)
# 语音参数设置 # 语音参数设置
render_voice_parameters(tr) render_voice_parameters(tr, voice_name)
# 试听按钮 # 试听按钮
render_voice_preview(tr, voice_name) render_voice_preview(tr, voice_name)
def render_soulvoice_settings(tr):
"""渲染 SoulVoice 语音设置"""
saved_api_key = config.soulvoice.get("api_key", "")
saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr")
# API Key 输入
api_key = st.text_input(
"SoulVoice API Key",
value=saved_api_key,
type="password",
help="请输入您的 SoulVoice API 密钥"
)
# 音色 URI 输入
voice_uri = st.text_input(
"音色 URI",
value=saved_voice_uri,
help="请输入 SoulVoice 音色标识符格式如speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr",
placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
)
# API URL 输入(可选)
with st.expander("高级设置", expanded=False):
api_url = st.text_input(
"API 地址",
value=saved_api_url,
help="SoulVoice API 接口地址"
)
model = st.text_input(
"模型名称",
value=saved_model,
help="使用的 TTS 模型"
)
# 保存配置
config.soulvoice["api_key"] = api_key
config.soulvoice["voice_uri"] = voice_uri
config.soulvoice["api_url"] = api_url
config.soulvoice["model"] = model
# 显示配置状态
if api_key and voice_uri:
st.success("✅ SoulVoice 配置已设置")
elif not api_key:
st.warning("⚠️ 请配置 SoulVoice API Key")
elif not voice_uri:
st.warning("⚠️ 请配置音色 URI")
def render_azure_v2_settings(tr): def render_azure_v2_settings(tr):
"""渲染Azure V2语音设置""" """渲染Azure V2语音设置"""
saved_azure_speech_region = config.azure.get("speech_region", "") saved_azure_speech_region = config.azure.get("speech_region", "")
@ -93,7 +177,7 @@ def render_azure_v2_settings(tr):
config.azure["speech_key"] = azure_speech_key config.azure["speech_key"] = azure_speech_key
def render_voice_parameters(tr): def render_voice_parameters(tr, voice_name):
"""渲染语音参数设置""" """渲染语音参数设置"""
# 音量 - 使用统一的默认值 # 音量 - 使用统一的默认值
voice_volume = st.slider( voice_volume = st.slider(
@ -106,22 +190,41 @@ def render_voice_parameters(tr):
) )
st.session_state['voice_volume'] = voice_volume st.session_state['voice_volume'] = voice_volume
# 检查是否为 SoulVoice 引擎
is_soulvoice = voice.is_soulvoice_voice(voice_name)
# 语速 # 语速
voice_rate = st.selectbox( if is_soulvoice:
tr("Speech Rate"), # SoulVoice 支持更精细的语速控制
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], voice_rate = st.slider(
index=2, tr("Speech Rate"),
) min_value=0.5,
max_value=2.0,
value=1.0,
step=0.1,
help="SoulVoice 语音速度控制"
)
else:
# Azure TTS 使用预设选项
voice_rate = st.selectbox(
tr("Speech Rate"),
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
index=2,
)
st.session_state['voice_rate'] = voice_rate st.session_state['voice_rate'] = voice_rate
# 音调 # 音调 - SoulVoice 不支持音调调节
voice_pitch = st.selectbox( if not is_soulvoice:
tr("Speech Pitch"), voice_pitch = st.selectbox(
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], tr("Speech Pitch"),
index=2, options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
) index=2,
st.session_state['voice_pitch'] = voice_pitch )
st.session_state['voice_pitch'] = voice_pitch
else:
# SoulVoice 不支持音调调节,设置默认值
st.session_state['voice_pitch'] = 1.0
st.info(" SoulVoice 引擎不支持音调调节")
def render_voice_preview(tr, voice_name): def render_voice_preview(tr, voice_name):

View File

@ -9,14 +9,35 @@ def render_subtitle_panel(tr):
with st.container(border=True): with st.container(border=True):
st.write(tr("Subtitle Settings")) st.write(tr("Subtitle Settings"))
# 启用字幕选项 # 检查是否选择了 SoulVoice 引擎
enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) from app.services import voice
st.session_state['subtitle_enabled'] = enable_subtitles current_voice = st.session_state.get('voice_name', '')
is_soulvoice = voice.is_soulvoice_voice(current_voice)
if enable_subtitles: if is_soulvoice:
render_font_settings(tr) # SoulVoice 引擎时显示禁用提示
render_position_settings(tr) st.warning("⚠️ SoulVoice TTS 不支持精确字幕生成")
render_style_settings(tr) st.info("💡 建议使用专业剪辑工具如剪映、PR等手动添加字幕")
# 强制禁用字幕
st.session_state['subtitle_enabled'] = False
# 显示禁用状态的复选框
st.checkbox(
tr("Enable Subtitles"),
value=False,
disabled=True,
help="SoulVoice 引擎不支持字幕生成,请使用其他 TTS 引擎"
)
else:
# 其他引擎正常显示字幕选项
enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
st.session_state['subtitle_enabled'] = enable_subtitles
if enable_subtitles:
render_font_settings(tr)
render_position_settings(tr)
render_style_settings(tr)
def render_font_settings(tr): def render_font_settings(tr):

View File

@ -29,7 +29,7 @@
"Clip Duration": "Maximum Clip Duration (Seconds) (**Not the total length of the video**, refers to the length of each **composite segment**)", "Clip Duration": "Maximum Clip Duration (Seconds) (**Not the total length of the video**, refers to the length of each **composite segment**)",
"Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously", "Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously",
"Audio Settings": "**Audio Settings**", "Audio Settings": "**Audio Settings**",
"Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY])", "Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY; SoulVoice provides high-quality Chinese voices])",
"Speech Region": "Service Region (:red[Required, [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Region": "Service Region (:red[Required, [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
"Speech Key": "API Key (:red[Required, either Key 1 or Key 2 is acceptable [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Key": "API Key (:red[Required, either Key 1 or Key 2 is acceptable [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
"Speech Volume": "Speech Volume (1.0 represents 100%)", "Speech Volume": "Speech Volume (1.0 represents 100%)",

View File

@ -29,7 +29,7 @@
"Clip Duration": "视频片段最大时长(秒)**不是视频总长度**,是指每个**合成片段**的长度)", "Clip Duration": "视频片段最大时长(秒)**不是视频总长度**,是指每个**合成片段**的长度)",
"Number of Videos Generated Simultaneously": "同时生成视频数量", "Number of Videos Generated Simultaneously": "同时生成视频数量",
"Audio Settings": "**音频设置**", "Audio Settings": "**音频设置**",
"Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意V2版效果更好但是需要API KEY]", "Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意V2版效果更好但是需要API KEYSoulVoice 提供高质量中文语音]",
"Speech Region": "服务区域 (:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Region": "服务区域 (:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
"Speech Key": "API Key (:red[必填密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Key": "API Key (:red[必填密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
"Speech Volume": "朗读音量1.0表示100%", "Speech Volume": "朗读音量1.0表示100%",