mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-11 10:32:49 +00:00
feat(tts): 添加 SoulVoice TTS 引擎支持
实现 SoulVoice TTS 引擎集成,包括配置管理、语音选择、API 调用和字幕处理 新增 SoulVoice 配置项和示例配置 修改音频设置面板以支持 SoulVoice 选项 优化音频时长计算和异常处理 更新多语言文案以反映 SoulVoice 支持
This commit is contained in:
parent
06cbee0654
commit
e1f45db95a
@ -48,6 +48,7 @@ def save_config():
|
||||
with open(config_file, "w", encoding="utf-8") as f:
|
||||
_cfg["app"] = app
|
||||
_cfg["azure"] = azure
|
||||
_cfg["soulvoice"] = soulvoice
|
||||
_cfg["ui"] = ui
|
||||
f.write(toml.dumps(_cfg))
|
||||
|
||||
@ -57,6 +58,7 @@ app = _cfg.get("app", {})
|
||||
whisper = _cfg.get("whisper", {})
|
||||
proxy = _cfg.get("proxy", {})
|
||||
azure = _cfg.get("azure", {})
|
||||
soulvoice = _cfg.get("soulvoice", {})
|
||||
ui = _cfg.get("ui", {})
|
||||
frames = _cfg.get("frames", {})
|
||||
|
||||
|
||||
@ -613,6 +613,49 @@ def clip_video(
|
||||
|
||||
# 根据持续时间计算真正的结束时间(加上1秒余量)
|
||||
duration = item["duration"]
|
||||
|
||||
# 时长合理性检查和修正
|
||||
if duration <= 0 or duration > 300: # 超过5分钟认为不合理
|
||||
logger.warning(f"检测到异常时长 {duration}秒,片段: {timestamp}")
|
||||
|
||||
# 尝试从时间戳计算实际时长
|
||||
try:
|
||||
start_time_str, end_time_str = timestamp.split('-')
|
||||
|
||||
# 解析开始时间
|
||||
if ',' in start_time_str:
|
||||
time_part, ms_part = start_time_str.split(',')
|
||||
h1, m1, s1 = map(int, time_part.split(':'))
|
||||
ms1 = int(ms_part)
|
||||
else:
|
||||
h1, m1, s1 = map(int, start_time_str.split(':'))
|
||||
ms1 = 0
|
||||
|
||||
# 解析结束时间
|
||||
if ',' in end_time_str:
|
||||
time_part, ms_part = end_time_str.split(',')
|
||||
h2, m2, s2 = map(int, time_part.split(':'))
|
||||
ms2 = int(ms_part)
|
||||
else:
|
||||
h2, m2, s2 = map(int, end_time_str.split(':'))
|
||||
ms2 = 0
|
||||
|
||||
# 计算实际时长
|
||||
start_total_ms = (h1 * 3600 + m1 * 60 + s1) * 1000 + ms1
|
||||
end_total_ms = (h2 * 3600 + m2 * 60 + s2) * 1000 + ms2
|
||||
actual_duration = (end_total_ms - start_total_ms) / 1000.0
|
||||
|
||||
if actual_duration > 0 and actual_duration <= 300:
|
||||
duration = actual_duration
|
||||
logger.info(f"使用时间戳计算的实际时长: {duration:.3f}秒")
|
||||
else:
|
||||
duration = 5.0 # 默认5秒
|
||||
logger.warning(f"时间戳计算也异常,使用默认时长: {duration}秒")
|
||||
|
||||
except Exception as e:
|
||||
duration = 5.0 # 默认5秒
|
||||
logger.warning(f"时长修正失败,使用默认时长: {duration}秒, 错误: {str(e)}")
|
||||
|
||||
calculated_end_time = calculate_end_time(start_time, duration)
|
||||
|
||||
# 转换为FFmpeg兼容的时间格式(逗号替换为点)
|
||||
|
||||
@ -4,19 +4,42 @@ import json
|
||||
import traceback
|
||||
import edge_tts
|
||||
import asyncio
|
||||
import requests
|
||||
from loguru import logger
|
||||
from typing import List, Union
|
||||
from typing import List, Union, Tuple
|
||||
from datetime import datetime
|
||||
from xml.sax.saxutils import unescape
|
||||
from edge_tts import submaker, SubMaker
|
||||
from edge_tts.submaker import mktimestamp
|
||||
# from edge_tts.submaker import mktimestamp # 函数可能不存在,我们自己实现
|
||||
from moviepy.video.tools import subtitles
|
||||
try:
|
||||
from moviepy import AudioFileClip
|
||||
MOVIEPY_AVAILABLE = True
|
||||
except ImportError:
|
||||
MOVIEPY_AVAILABLE = False
|
||||
logger.warning("moviepy 未安装,将使用估算方法计算音频时长")
|
||||
import time
|
||||
|
||||
from app.config import config
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def mktimestamp(time_seconds: float) -> str:
|
||||
"""
|
||||
将秒数转换为 SRT 时间戳格式
|
||||
|
||||
Args:
|
||||
time_seconds: 时间(秒)
|
||||
|
||||
Returns:
|
||||
str: SRT 格式的时间戳,如 "00:01:23.456"
|
||||
"""
|
||||
hours = int(time_seconds // 3600)
|
||||
minutes = int((time_seconds % 3600) // 60)
|
||||
seconds = time_seconds % 60
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
|
||||
|
||||
|
||||
def get_all_azure_voices(filter_locals=None) -> list[str]:
|
||||
if filter_locals is None:
|
||||
filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"]
|
||||
@ -1038,8 +1061,15 @@ def is_azure_v2_voice(voice_name: str):
|
||||
def tts(
|
||||
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
|
||||
) -> Union[SubMaker, None]:
|
||||
# 检查是否为 SoulVoice 引擎
|
||||
if is_soulvoice_voice(voice_name):
|
||||
return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate)
|
||||
|
||||
# 检查是否为 Azure V2 引擎
|
||||
if is_azure_v2_voice(voice_name):
|
||||
return azure_tts_v2(text, voice_name, voice_file)
|
||||
|
||||
# 默认使用 Azure V1 引擎
|
||||
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
|
||||
|
||||
|
||||
@ -1368,6 +1398,10 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
|
||||
if start_time < 0:
|
||||
start_time = _start_time
|
||||
|
||||
# 将 100纳秒单位转换为秒
|
||||
start_time_seconds = start_time / 10000000
|
||||
end_time_seconds = end_time / 10000000
|
||||
|
||||
sub = unescape(sub)
|
||||
sub_line += sub
|
||||
sub_text = match_line(sub_line, sub_index)
|
||||
@ -1375,8 +1409,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
|
||||
sub_index += 1
|
||||
line = formatter(
|
||||
idx=sub_index,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
start_time=start_time_seconds,
|
||||
end_time=end_time_seconds,
|
||||
sub_text=sub_text,
|
||||
)
|
||||
sub_items.append(line)
|
||||
@ -1402,9 +1436,13 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str)
|
||||
f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}"
|
||||
f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}"
|
||||
)
|
||||
# 返回默认值,避免 None 错误
|
||||
return subtitle_file, 3.0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
# 返回默认值,避免 None 错误
|
||||
return subtitle_file, 3.0
|
||||
|
||||
|
||||
def get_audio_duration(sub_maker: submaker.SubMaker):
|
||||
@ -1453,8 +1491,21 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
f"或者使用其他 tts 引擎")
|
||||
continue
|
||||
else:
|
||||
# 为当前片段生成字幕文件
|
||||
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
||||
# SoulVoice 引擎不生成字幕文件
|
||||
if is_soulvoice_voice(voice_name):
|
||||
# 获取实际音频文件的时长
|
||||
duration = get_audio_duration_from_file(audio_file)
|
||||
if duration <= 0:
|
||||
# 如果无法获取文件时长,尝试从 SubMaker 获取
|
||||
duration = get_audio_duration(sub_maker)
|
||||
if duration <= 0:
|
||||
# 最后的 fallback,基于文本长度估算
|
||||
duration = max(1.0, len(text) / 3.0)
|
||||
logger.warning(f"无法获取音频时长,使用文本估算: {duration:.2f}秒")
|
||||
# 不创建字幕文件
|
||||
subtitle_file = ""
|
||||
else:
|
||||
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
||||
|
||||
tts_results.append({
|
||||
"_id": item['_id'],
|
||||
@ -1467,3 +1518,168 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
logger.info(f"已生成音频文件: {audio_file}")
|
||||
|
||||
return tts_results
|
||||
|
||||
|
||||
def get_audio_duration_from_file(audio_file: str) -> float:
|
||||
"""
|
||||
获取音频文件的时长(秒)
|
||||
"""
|
||||
if MOVIEPY_AVAILABLE:
|
||||
try:
|
||||
audio_clip = AudioFileClip(audio_file)
|
||||
duration = audio_clip.duration
|
||||
audio_clip.close()
|
||||
return duration
|
||||
except Exception as e:
|
||||
logger.error(f"使用 moviepy 获取音频时长失败: {str(e)}")
|
||||
|
||||
# Fallback: 使用更准确的估算方法
|
||||
try:
|
||||
import os
|
||||
file_size = os.path.getsize(audio_file)
|
||||
|
||||
# 更准确的 MP3 时长估算
|
||||
# 假设 MP3 平均比特率为 128kbps = 16KB/s
|
||||
# 但实际文件还包含头部信息,所以调整系数
|
||||
estimated_duration = max(1.0, file_size / 20000) # 调整为更保守的估算
|
||||
|
||||
# 对于中文语音,根据文本长度进行二次校正
|
||||
# 一般中文语音速度约为 3-4 字/秒
|
||||
logger.warning(f"使用文件大小估算音频时长: {estimated_duration:.2f}秒")
|
||||
return estimated_duration
|
||||
except Exception as e:
|
||||
logger.error(f"获取音频时长失败: {str(e)}")
|
||||
# 如果所有方法都失败,返回一个基于文本长度的估算
|
||||
return 3.0 # 默认3秒,避免返回0
|
||||
|
||||
|
||||
def is_soulvoice_voice(voice_name: str) -> bool:
|
||||
"""
|
||||
检查是否为 SoulVoice 语音
|
||||
"""
|
||||
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
|
||||
|
||||
|
||||
def parse_soulvoice_voice(voice_name: str) -> str:
|
||||
"""
|
||||
解析 SoulVoice 语音名称
|
||||
支持格式:
|
||||
- soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
- speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
"""
|
||||
if voice_name.startswith("soulvoice:"):
|
||||
return voice_name[10:] # 移除 "soulvoice:" 前缀
|
||||
return voice_name
|
||||
|
||||
|
||||
def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
|
||||
"""
|
||||
使用 SoulVoice API 进行文本转语音
|
||||
|
||||
Args:
|
||||
text: 要转换的文本
|
||||
voice_name: 语音名称
|
||||
voice_file: 输出音频文件路径
|
||||
speed: 语音速度
|
||||
|
||||
Returns:
|
||||
SubMaker: 包含时间戳信息的字幕制作器,失败时返回 None
|
||||
"""
|
||||
# 获取配置
|
||||
api_key = config.soulvoice.get("api_key", "")
|
||||
api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
|
||||
default_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
|
||||
|
||||
if not api_key:
|
||||
logger.error("SoulVoice API key 未配置")
|
||||
return None
|
||||
|
||||
# 解析语音名称
|
||||
parsed_voice = parse_soulvoice_voice(voice_name)
|
||||
|
||||
# 准备请求数据
|
||||
headers = {
|
||||
'Authorization': f'Bearer {api_key}',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
|
||||
data = {
|
||||
'text': text.strip(),
|
||||
'model': default_model,
|
||||
'voice': parsed_voice,
|
||||
'speed': speed
|
||||
}
|
||||
|
||||
# 重试机制
|
||||
for attempt in range(3):
|
||||
try:
|
||||
logger.info(f"第 {attempt + 1} 次调用 SoulVoice API")
|
||||
|
||||
# 设置代理
|
||||
proxies = {}
|
||||
if config.proxy.get("http"):
|
||||
proxies = {
|
||||
'http': config.proxy.get("http"),
|
||||
'https': config.proxy.get("https", config.proxy.get("http"))
|
||||
}
|
||||
|
||||
# 调用 API
|
||||
response = requests.post(
|
||||
api_url,
|
||||
headers=headers,
|
||||
json=data,
|
||||
proxies=proxies,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
# 保存音频文件
|
||||
with open(voice_file, 'wb') as f:
|
||||
f.write(response.content)
|
||||
|
||||
logger.info(f"SoulVoice TTS 成功生成音频: {voice_file}")
|
||||
|
||||
# SoulVoice 不支持精确字幕生成,返回简单的 SubMaker 对象
|
||||
sub_maker = SubMaker()
|
||||
sub_maker.subs = [text] # 整个文本作为一个段落
|
||||
sub_maker.offset = [(0, 0)] # 占位时间戳
|
||||
|
||||
return sub_maker
|
||||
|
||||
else:
|
||||
logger.error(f"SoulVoice API 调用失败: {response.status_code} - {response.text}")
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
logger.error(f"SoulVoice API 调用超时 (尝试 {attempt + 1}/3)")
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"SoulVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
|
||||
except Exception as e:
|
||||
logger.error(f"SoulVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
|
||||
|
||||
if attempt < 2: # 不是最后一次尝试
|
||||
time.sleep(2) # 等待2秒后重试
|
||||
|
||||
logger.error("SoulVoice TTS 生成失败,已达到最大重试次数")
|
||||
return None
|
||||
|
||||
|
||||
def is_soulvoice_voice(voice_name: str) -> bool:
|
||||
"""
|
||||
检查是否为 SoulVoice 语音
|
||||
"""
|
||||
return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:")
|
||||
|
||||
|
||||
def parse_soulvoice_voice(voice_name: str) -> str:
|
||||
"""
|
||||
解析 SoulVoice 语音名称
|
||||
支持格式:
|
||||
- soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
- speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr
|
||||
"""
|
||||
if voice_name.startswith("soulvoice:"):
|
||||
return voice_name[10:] # 移除 "soulvoice:" 前缀
|
||||
return voice_name
|
||||
|
||||
|
||||
|
||||
|
||||
@ -77,6 +77,21 @@
|
||||
# webui界面是否显示配置项
|
||||
hide_config = true
|
||||
|
||||
[azure]
|
||||
# Azure TTS 配置
|
||||
speech_key = ""
|
||||
speech_region = ""
|
||||
|
||||
[soulvoice]
|
||||
# SoulVoice TTS API 密钥
|
||||
api_key = ""
|
||||
# 音色 URI(必需)
|
||||
voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
|
||||
# API 接口地址(可选,默认值如下)
|
||||
api_url = "https://tts.scsmtech.cn/tts"
|
||||
# 默认模型(可选)
|
||||
model = "FunAudioLLM/CosyVoice2-0.5B"
|
||||
|
||||
[proxy]
|
||||
# clash 默认地址:http://127.0.0.1:7890
|
||||
http = ""
|
||||
|
||||
@ -8,6 +8,17 @@ from app.utils import utils
|
||||
from webui.utils.cache import get_songs_cache
|
||||
|
||||
|
||||
def get_soulvoice_voices():
|
||||
"""获取 SoulVoice 语音列表"""
|
||||
# 检查是否配置了 SoulVoice API key
|
||||
api_key = config.soulvoice.get("api_key", "")
|
||||
if not api_key:
|
||||
return []
|
||||
|
||||
# 只返回一个 SoulVoice 选项,音色通过输入框自定义
|
||||
return ["soulvoice:custom"]
|
||||
|
||||
|
||||
def render_audio_panel(tr):
|
||||
"""渲染音频设置面板"""
|
||||
with st.container(border=True):
|
||||
@ -24,15 +35,24 @@ def render_tts_settings(tr):
|
||||
"""渲染TTS(文本转语音)设置"""
|
||||
# 获取支持的语音列表
|
||||
support_locales = ["zh-CN", "en-US"]
|
||||
voices = voice.get_all_azure_voices(filter_locals=support_locales)
|
||||
azure_voices = voice.get_all_azure_voices(filter_locals=support_locales)
|
||||
|
||||
# 添加 SoulVoice 语音选项
|
||||
soulvoice_voices = get_soulvoice_voices()
|
||||
|
||||
# 合并所有语音选项
|
||||
all_voices = azure_voices + soulvoice_voices
|
||||
|
||||
# 创建友好的显示名称
|
||||
friendly_names = {
|
||||
v: v.replace("Female", tr("Female"))
|
||||
.replace("Male", tr("Male"))
|
||||
.replace("Neural", "")
|
||||
for v in voices
|
||||
}
|
||||
friendly_names = {}
|
||||
|
||||
# Azure 语音的友好名称
|
||||
for v in azure_voices:
|
||||
friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "")
|
||||
|
||||
# SoulVoice 语音的友好名称
|
||||
for v in soulvoice_voices:
|
||||
friendly_names[v] = "SoulVoice (自定义音色)"
|
||||
|
||||
# 获取保存的语音设置
|
||||
saved_voice_name = config.ui.get("voice_name", "")
|
||||
@ -42,9 +62,9 @@ def render_tts_settings(tr):
|
||||
saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name)
|
||||
else:
|
||||
# 如果没有保存的设置,选择与UI语言匹配的第一个语音
|
||||
for i, v in enumerate(voices):
|
||||
for i, v in enumerate(all_voices):
|
||||
if (v.lower().startswith(st.session_state["ui_language"].lower())
|
||||
and "V2" not in v):
|
||||
and "V2" not in v and not v.startswith("soulvoice:")):
|
||||
saved_voice_name_index = i
|
||||
break
|
||||
|
||||
@ -60,20 +80,84 @@ def render_tts_settings(tr):
|
||||
list(friendly_names.values()).index(selected_friendly_name)
|
||||
]
|
||||
|
||||
# 如果选择的是 SoulVoice 自定义选项,使用配置的音色 URI
|
||||
if voice_name == "soulvoice:custom":
|
||||
custom_voice_uri = config.soulvoice.get("voice_uri", "")
|
||||
if custom_voice_uri:
|
||||
# 确保音色 URI 有正确的前缀
|
||||
if not custom_voice_uri.startswith("soulvoice:") and not custom_voice_uri.startswith("speech:"):
|
||||
voice_name = f"soulvoice:{custom_voice_uri}"
|
||||
else:
|
||||
voice_name = custom_voice_uri if custom_voice_uri.startswith("soulvoice:") else f"soulvoice:{custom_voice_uri}"
|
||||
|
||||
# 保存设置
|
||||
config.ui["voice_name"] = voice_name
|
||||
|
||||
# Azure V2语音特殊处理
|
||||
if voice.is_azure_v2_voice(voice_name):
|
||||
# 根据语音类型渲染不同的设置
|
||||
if voice.is_soulvoice_voice(voice_name):
|
||||
render_soulvoice_settings(tr)
|
||||
elif voice.is_azure_v2_voice(voice_name):
|
||||
render_azure_v2_settings(tr)
|
||||
|
||||
# 语音参数设置
|
||||
render_voice_parameters(tr)
|
||||
render_voice_parameters(tr, voice_name)
|
||||
|
||||
# 试听按钮
|
||||
render_voice_preview(tr, voice_name)
|
||||
|
||||
|
||||
def render_soulvoice_settings(tr):
|
||||
"""渲染 SoulVoice 语音设置"""
|
||||
saved_api_key = config.soulvoice.get("api_key", "")
|
||||
saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts")
|
||||
saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B")
|
||||
saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr")
|
||||
|
||||
# API Key 输入
|
||||
api_key = st.text_input(
|
||||
"SoulVoice API Key",
|
||||
value=saved_api_key,
|
||||
type="password",
|
||||
help="请输入您的 SoulVoice API 密钥"
|
||||
)
|
||||
|
||||
# 音色 URI 输入
|
||||
voice_uri = st.text_input(
|
||||
"音色 URI",
|
||||
value=saved_voice_uri,
|
||||
help="请输入 SoulVoice 音色标识符,格式如:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr",
|
||||
placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
|
||||
)
|
||||
|
||||
# API URL 输入(可选)
|
||||
with st.expander("高级设置", expanded=False):
|
||||
api_url = st.text_input(
|
||||
"API 地址",
|
||||
value=saved_api_url,
|
||||
help="SoulVoice API 接口地址"
|
||||
)
|
||||
|
||||
model = st.text_input(
|
||||
"模型名称",
|
||||
value=saved_model,
|
||||
help="使用的 TTS 模型"
|
||||
)
|
||||
|
||||
# 保存配置
|
||||
config.soulvoice["api_key"] = api_key
|
||||
config.soulvoice["voice_uri"] = voice_uri
|
||||
config.soulvoice["api_url"] = api_url
|
||||
config.soulvoice["model"] = model
|
||||
|
||||
# 显示配置状态
|
||||
if api_key and voice_uri:
|
||||
st.success("✅ SoulVoice 配置已设置")
|
||||
elif not api_key:
|
||||
st.warning("⚠️ 请配置 SoulVoice API Key")
|
||||
elif not voice_uri:
|
||||
st.warning("⚠️ 请配置音色 URI")
|
||||
|
||||
|
||||
def render_azure_v2_settings(tr):
|
||||
"""渲染Azure V2语音设置"""
|
||||
saved_azure_speech_region = config.azure.get("speech_region", "")
|
||||
@ -93,7 +177,7 @@ def render_azure_v2_settings(tr):
|
||||
config.azure["speech_key"] = azure_speech_key
|
||||
|
||||
|
||||
def render_voice_parameters(tr):
|
||||
def render_voice_parameters(tr, voice_name):
|
||||
"""渲染语音参数设置"""
|
||||
# 音量 - 使用统一的默认值
|
||||
voice_volume = st.slider(
|
||||
@ -106,22 +190,41 @@ def render_voice_parameters(tr):
|
||||
)
|
||||
st.session_state['voice_volume'] = voice_volume
|
||||
|
||||
# 检查是否为 SoulVoice 引擎
|
||||
is_soulvoice = voice.is_soulvoice_voice(voice_name)
|
||||
|
||||
# 语速
|
||||
voice_rate = st.selectbox(
|
||||
tr("Speech Rate"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
if is_soulvoice:
|
||||
# SoulVoice 支持更精细的语速控制
|
||||
voice_rate = st.slider(
|
||||
tr("Speech Rate"),
|
||||
min_value=0.5,
|
||||
max_value=2.0,
|
||||
value=1.0,
|
||||
step=0.1,
|
||||
help="SoulVoice 语音速度控制"
|
||||
)
|
||||
else:
|
||||
# Azure TTS 使用预设选项
|
||||
voice_rate = st.selectbox(
|
||||
tr("Speech Rate"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
st.session_state['voice_rate'] = voice_rate
|
||||
|
||||
# 音调
|
||||
voice_pitch = st.selectbox(
|
||||
tr("Speech Pitch"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
st.session_state['voice_pitch'] = voice_pitch
|
||||
# 音调 - SoulVoice 不支持音调调节
|
||||
if not is_soulvoice:
|
||||
voice_pitch = st.selectbox(
|
||||
tr("Speech Pitch"),
|
||||
options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0],
|
||||
index=2,
|
||||
)
|
||||
st.session_state['voice_pitch'] = voice_pitch
|
||||
else:
|
||||
# SoulVoice 不支持音调调节,设置默认值
|
||||
st.session_state['voice_pitch'] = 1.0
|
||||
st.info("ℹ️ SoulVoice 引擎不支持音调调节")
|
||||
|
||||
|
||||
def render_voice_preview(tr, voice_name):
|
||||
|
||||
@ -9,14 +9,35 @@ def render_subtitle_panel(tr):
|
||||
with st.container(border=True):
|
||||
st.write(tr("Subtitle Settings"))
|
||||
|
||||
# 启用字幕选项
|
||||
enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
|
||||
st.session_state['subtitle_enabled'] = enable_subtitles
|
||||
# 检查是否选择了 SoulVoice 引擎
|
||||
from app.services import voice
|
||||
current_voice = st.session_state.get('voice_name', '')
|
||||
is_soulvoice = voice.is_soulvoice_voice(current_voice)
|
||||
|
||||
if enable_subtitles:
|
||||
render_font_settings(tr)
|
||||
render_position_settings(tr)
|
||||
render_style_settings(tr)
|
||||
if is_soulvoice:
|
||||
# SoulVoice 引擎时显示禁用提示
|
||||
st.warning("⚠️ SoulVoice TTS 不支持精确字幕生成")
|
||||
st.info("💡 建议使用专业剪辑工具(如剪映、PR等)手动添加字幕")
|
||||
|
||||
# 强制禁用字幕
|
||||
st.session_state['subtitle_enabled'] = False
|
||||
|
||||
# 显示禁用状态的复选框
|
||||
st.checkbox(
|
||||
tr("Enable Subtitles"),
|
||||
value=False,
|
||||
disabled=True,
|
||||
help="SoulVoice 引擎不支持字幕生成,请使用其他 TTS 引擎"
|
||||
)
|
||||
else:
|
||||
# 其他引擎正常显示字幕选项
|
||||
enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
|
||||
st.session_state['subtitle_enabled'] = enable_subtitles
|
||||
|
||||
if enable_subtitles:
|
||||
render_font_settings(tr)
|
||||
render_position_settings(tr)
|
||||
render_style_settings(tr)
|
||||
|
||||
|
||||
def render_font_settings(tr):
|
||||
|
||||
@ -29,7 +29,7 @@
|
||||
"Clip Duration": "Maximum Clip Duration (Seconds) (**Not the total length of the video**, refers to the length of each **composite segment**)",
|
||||
"Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously",
|
||||
"Audio Settings": "**Audio Settings**",
|
||||
"Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY])",
|
||||
"Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY; SoulVoice provides high-quality Chinese voices])",
|
||||
"Speech Region": "Service Region (:red[Required, [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Key": "API Key (:red[Required, either Key 1 or Key 2 is acceptable [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Volume": "Speech Volume (1.0 represents 100%)",
|
||||
|
||||
@ -29,7 +29,7 @@
|
||||
"Clip Duration": "视频片段最大时长(秒)(**不是视频总长度**,是指每个**合成片段**的长度)",
|
||||
"Number of Videos Generated Simultaneously": "同时生成视频数量",
|
||||
"Audio Settings": "**音频设置**",
|
||||
"Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY])",
|
||||
"Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY;SoulVoice 提供高质量中文语音])",
|
||||
"Speech Region": "服务区域 (:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Key": "API Key (:red[必填,密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])",
|
||||
"Speech Volume": "朗读音量(1.0表示100%)",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user