feat: 优化视频生成进度展示与UI细节

- 为视频生成任务的每个处理步骤添加详细的中文状态提示
- 重构WebUI的视频生成弹窗,使用Streamlit原生状态组件优化进度展示
- 清理多语言翻译文本中的冗余表情符号,统一UI文本风格
- 调整TTS设置面板的折叠面板默认展开状态为关闭,并移除标题中的表情前缀
This commit is contained in:
viccy 2026-06-07 18:36:47 +08:00
parent 34d5532119
commit 4ab29fd776
5 changed files with 231 additions and 108 deletions

View File

@ -434,12 +434,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
global merged_audio_path, merged_subtitle_path
logger.info(f"\n\n## 开始统一视频处理任务: {task_id}")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=0,
message="正在初始化视频生成任务",
)
"""
1. 加载剪辑脚本
"""
logger.info("\n\n## 1. 加载视频脚本")
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=5,
message="正在加载剪辑脚本",
)
video_script_path = path.join(params.video_clip_json_path)
if path.exists(video_script_path):
@ -465,6 +476,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
2. 使用 TTS 生成音频素材
"""
logger.info("\n\n## 2. 根据OST设置生成音频列表")
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=10,
message="正在生成 TTS 配音",
)
# 只为OST=0 or 2的判断生成音频 OST=0 仅保留解说 OST=2 保留解说和原声
tts_segments = [
segment for segment in list_script
@ -481,12 +498,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
voice_pitch=params.voice_pitch,
)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=20,
message="TTS 配音生成完成",
)
"""
3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
"""
logger.info("\n\n## 3. 统一视频裁剪基于OST类型")
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=30,
message="正在按脚本裁剪视频片段",
)
# 使用新的统一裁剪策略
video_clip_result = clip_video.clip_video_unified(
@ -505,12 +533,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=60,
message="视频片段裁剪完成",
)
"""
4. 合并音频和字幕
"""
logger.info("\n\n## 4. 合并音频和字幕")
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=65,
message="正在合并配音和字幕",
)
total_duration = sum([script["duration"] for script in new_script_list])
if tts_segments:
try:
@ -540,6 +579,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
logger.warning("没有需要合并的音频/字幕")
merged_audio_path = ""
merged_subtitle_path = ""
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=70,
message="配音和字幕合并完成",
)
"""
5. 合并视频
@ -549,6 +594,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=75,
message="正在合并视频片段",
)
# 使用统一裁剪后的视频片段
video_clips = []
@ -568,7 +619,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
video_aspect=params.video_aspect,
threads=params.n_threads
)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=80,
message="视频片段合并完成",
)
"""
6. 合并字幕/BGM/配音/视频
@ -581,6 +637,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
else output_video_path
)
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}")
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=85,
message="正在合成最终视频",
)
bgm_path = utils.get_bgm_file(
bgm_type=getattr(params, "bgm_type", "random"),
@ -634,10 +696,20 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
auto_subtitle_path = ""
if auto_transcription_enabled:
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=90)
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=90,
message="正在自动转录最终视频",
)
logger.info("\n\n## 7. 自动转录最终视频字幕")
auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=95)
sm.state.update_task(
task_id,
state=const.TASK_STATE_PROCESSING,
progress=95,
message="正在压入自动转录字幕",
)
logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}")
_merge_auto_transcribed_subtitles(
source_video_path=merge_output_video_path,
@ -657,7 +729,13 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
}
if auto_subtitle_path:
kwargs["subtitles"] = [auto_subtitle_path]
sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
sm.state.update_task(
task_id,
state=const.TASK_STATE_COMPLETE,
progress=100,
message="视频生成完成",
**kwargs
)
return kwargs

137
webui.py
View File

@ -170,57 +170,102 @@ def render_generate_button():
# 生成一个新的task_id用于本次处理
task_id = str(uuid.uuid4())
# 创建进度条
progress_bar = st.progress(0)
status_text = st.empty()
@st.dialog(tr("Generating Video"), width="large")
def generate_video_dialog():
st.markdown(
"""
<style>
div[data-testid="stDialog"] div[data-testid="stStatusWidget"] {
margin-top: 0.25rem;
}
div[data-testid="stDialog"] div[data-testid="stProgress"] {
margin-bottom: 0.75rem;
}
</style>
""",
unsafe_allow_html=True,
)
def run_task():
try:
tm.start_subclip_unified(
task_id=task_id,
params=params
)
except Exception as e:
logger.error(f"任务执行失败: {e}")
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED, message=str(e))
progress_bar = st.progress(0)
status_panel = st.status(tr("Generating Video"), expanded=True)
status_panel.write(tr("Generating Video"))
# 在新线程中启动任务
thread = threading.Thread(target=run_task)
thread.start()
def run_task():
try:
tm.start_subclip_unified(
task_id=task_id,
params=params
)
except Exception as e:
logger.error(f"任务执行失败: {e}")
current_task = sm.state.get_task(task_id) or {}
sm.state.update_task(
task_id,
state=const.TASK_STATE_FAILED,
progress=current_task.get("progress", 0),
message=str(e),
)
# 轮询任务状态
while True:
task = sm.state.get_task(task_id)
if task:
progress = task.get("progress", 0)
state = task.get("state")
# 更新进度条
progress_bar.progress(progress / 100)
status_text.text(f"Processing... {progress}%")
# 在新线程中启动任务
thread = threading.Thread(target=run_task)
thread.start()
last_status_key = None
# 轮询任务状态
while True:
task = sm.state.get_task(task_id)
if task:
progress = task.get("progress", 0)
state = task.get("state")
if state == const.TASK_STATE_COMPLETE:
status_text.text(tr("Video Generation Completed"))
progress_bar.progress(1.0)
# 显示结果
video_files = task.get("videos", [])
try:
if video_files:
player_cols = st.columns(len(video_files) * 2 + 1)
for i, url in enumerate(video_files):
player_cols[i * 2 + 1].video(url)
except Exception as e:
logger.error(f"播放视频失败: {e}")
st.success(tr("Video Generation Completed"))
break
elif state == const.TASK_STATE_FAILED:
st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}")
break
time.sleep(0.5)
progress = int(progress)
except (TypeError, ValueError):
progress = 0
progress = max(0, min(progress, 100))
# 更新进度条和阶段状态
progress_bar.progress(progress / 100)
current_message = task.get("message") or f"Processing... {progress}%"
status_label = f"{current_message} ({progress}%)"
status_key = (state, progress, current_message)
if status_key != last_status_key:
status_panel.write(status_label)
last_status_key = status_key
if state == const.TASK_STATE_COMPLETE:
status_panel.update(
label=tr("Video Generation Completed"),
state="complete",
expanded=False,
)
progress_bar.progress(1.0)
# 显示结果
video_files = task.get("videos", [])
try:
if video_files:
for url in video_files:
st.video(url)
except Exception as e:
logger.error(f"播放视频失败: {e}")
st.success(tr("Video Generation Completed"))
break
if state == const.TASK_STATE_FAILED:
status_panel.update(
label=f"{tr('Task failed')}: {task.get('message', 'Unknown error')}",
state="error",
expanded=True,
)
st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}")
break
time.sleep(0.5)
generate_video_dialog()
def get_voice_name_for_tts_engine(tts_engine: str) -> str:

View File

@ -522,7 +522,7 @@ def render_tts_settings(tr):
"""渲染TTS(文本转语音)设置"""
# 1. TTS引擎选择器
# st.subheader("🎤 TTS引擎选择")
# st.subheader("TTS引擎选择")
engine_options = get_tts_engine_options(tr)
engine_descriptions = get_tts_engine_descriptions(tr)
@ -553,7 +553,7 @@ def render_tts_settings(tr):
if selected_engine in engine_descriptions:
desc = engine_descriptions[selected_engine]
with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=True):
with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=False):
st.markdown(f"**{tr('Features')}:** {desc['features']}")
st.markdown(f"**{tr('Use Case')}:** {desc['use_case']}")
@ -561,7 +561,7 @@ def render_tts_settings(tr):
st.markdown(f"**{tr('Registration URL')}:** [{desc['registration']}]({desc['registration']})")
# 3. 根据选择的引擎渲染对应的配置界面
# st.subheader("⚙️ 引擎配置")
# st.subheader("引擎配置")
if selected_engine == "edge_tts":
render_edge_tts_settings(tr)

View File

@ -61,7 +61,7 @@
"Preview Background Music Help": "Play the selected background music.",
"Upload Background Music File": "Upload Background Music File",
"Upload Background Music Help": "Upload an audio file to use as background music.",
"Background Music uploaded": "Background music uploaded: {path}",
"Background Music uploaded": "Background music uploaded: {path}",
"Background Music Volume": "Background Music Volume (0.2 represents 20%, background sound should not be too loud)",
"Subtitle Settings": "**Subtitle Settings**",
"Enable Subtitles": "Enable Subtitles (If unchecked, the following settings will not take effect)",
@ -326,13 +326,13 @@
"Cloud Service": "Cloud Service",
"Select TTS Engine": "Select TTS Engine",
"Select TTS Engine Help": "Choose the text-to-speech engine you want to use.",
"TTS Engine Details": "📋 {engine} Details",
"TTS Engine Details": "{engine} Details",
"Features": "Features",
"Use Case": "Use Case",
"Registration URL": "Registration URL",
"Voice Selection": "Voice Selection",
"Select Edge TTS Voice": "Select an Edge TTS voice",
"Edge TTS Voice Description": "💡 Edge TTS Voice Notes",
"Edge TTS Voice Description": "Edge TTS Voice Notes",
"Loaded voice count": "Loaded {count} voices",
"Female Voice": "Female voice",
"Male Voice": "Male voice",
@ -348,21 +348,21 @@
"Azure Speech Key Help": "Azure Speech Services API key",
"Voice Name": "Voice Name",
"Azure Voice Name Help": "Enter an Azure Speech Services voice name. You can use the official voice name directly, such as zh-CN-YunzeNeural.",
"Common Voice Reference": "💡 Common Voice Reference",
"Common Voice Reference": "Common Voice Reference",
"Chinese Voices": "Chinese Voices",
"English Voices": "English Voices",
"Multilingual": "multilingual",
"Azure Voices Docs Notice": "💡 For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).",
"Azure Voices Docs Notice": "For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).",
"Quick Select": "Quick Select",
"Chinese Female Voice": "Chinese Female Voice",
"Chinese Male Voice": "Chinese Male Voice",
"English Female Voice": "English Female Voice",
"Voice name valid": "Voice name is valid: {voice}",
"Voice name format may be invalid": "⚠️ Voice name format may be incorrect: {voice}",
"Azure voice name format notice": "💡 Azure voice names usually follow this format: [language]-[region]-[name]Neural",
"Azure Speech Services configured": "Azure Speech Services is configured",
"Please configure service region": "⚠️ Please configure the service region",
"Please configure API Key": "⚠️ Please configure the API Key",
"Voice name valid": "Voice name is valid: {voice}",
"Voice name format may be invalid": "Voice name format may be incorrect: {voice}",
"Azure voice name format notice": "Azure voice names usually follow this format: [language]-[region]-[name]Neural",
"Azure Speech Services configured": "Azure Speech Services is configured",
"Please configure service region": "Please configure the service region",
"Please configure API Key": "Please configure the API Key",
"Task failed": "Task failed",
"Script file cannot be empty": "Script file cannot be empty",
"Video file cannot be empty": "Video file cannot be empty",
@ -486,10 +486,10 @@
"Tencent Service Region Help": "Select the Tencent Cloud TTS service region",
"Custom Voice": "Custom Voice",
"Select Tencent TTS Voice": "Select a Tencent Cloud TTS voice",
"Tencent Cloud TTS Voice Description": "💡 Tencent Cloud TTS Voice Notes",
"Tencent Cloud TTS Voice Description": "Tencent Cloud TTS Voice Notes",
"Female Voices": "Female Voices",
"Male Voices": "Male Voices",
"Tencent More Voices Notice": "💡 See the official Tencent Cloud documentation for more voices.",
"Tencent More Voices Notice": "See the official Tencent Cloud documentation for more voices.",
"Qwen DashScope API Key Help": "Tongyi Qwen DashScope API Key",
"TTS Model Name": "TTS Model Name",
"Qwen TTS Model Help": "Qwen TTS model name, for example qwen3-tts-flash",
@ -532,12 +532,12 @@
"Preview Reference Audio Help": "Play the selected reference audio.",
"Upload Reference Audio File": "Upload Reference Audio File",
"Upload Reference Audio Help": "Upload a clear audio clip for voice cloning",
"Audio uploaded": "Audio uploaded: {path}",
"Audio uploaded": "Audio uploaded: {path}",
"Inference Mode": "Inference Mode",
"Standard Inference": "Standard Inference",
"Fast Inference": "Fast Inference",
"Inference Mode Help": "Standard inference has higher quality but is slower. Fast inference is faster with slightly lower quality.",
"Advanced Parameters": "🔧 Advanced Parameters",
"Advanced Parameters": "Advanced Parameters",
"Sampling Temperature": "Sampling Temperature",
"Sampling Temperature Help": "Controls randomness. Higher values are more random; lower values are more deterministic.",
"Top P Help": "Probability threshold for nucleus sampling. Smaller values make results more deterministic.",
@ -548,9 +548,9 @@
"Repetition Penalty Help": "Higher values reduce repetition, but overly high values may sound unnatural.",
"Enable Sampling": "Enable Sampling",
"Enable Sampling Help": "Enable sampling for more natural speech.",
"IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 Usage Instructions",
"IndexTTS Usage Instructions Title": "IndexTTS-1.5 Usage Instructions",
"IndexTTS Usage Instructions": "**Zero-shot voice cloning**\n\n1. **Prepare reference audio**: upload or specify a clear audio file (3-10 seconds recommended)\n2. **Set API URL**: make sure the IndexTTS-1.5 service is running\n3. **Start synthesis**: the system will use the reference voice to synthesize new speech\n\n**Notes**:\n- Reference audio quality directly affects synthesis quality\n- Use clean audio without background noise when possible\n- Keep text length within a reasonable range\n- The first synthesis may take longer",
"IndexTTS2 Emotion Parameters": "🎭 Emotion Parameters",
"IndexTTS2 Emotion Parameters": "Emotion Parameters",
"Emotion Mode": "Emotion Mode",
"Emotion Mode Help": "Choose the emotion control source for IndexTTS-2.",
"Emotion Mode Speaker": "Same as speaker reference",
@ -578,7 +578,7 @@
"Max Text Tokens Per Segment Help": "Maximum text tokens per segment for IndexTTS-2 inference.",
"Max Mel Tokens": "Max Mel Tokens",
"Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.",
"IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 Usage Instructions",
"IndexTTS2 Usage Instructions Title": "IndexTTS-2 Usage Instructions",
"IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments",
"OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions",
"OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
@ -594,7 +594,7 @@
"Voice Pitch Help 0.5-1.5": "Adjust voice pitch (0.5-1.5)",
"Sentence Silence Duration": "Sentence-end Silence Duration (seconds)",
"Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)",
"Doubao TTS API Key Application Process": "💡 Doubao TTS API Key Application Process",
"Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process",
"Application Steps": "Application Steps",
"Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
"Doubao TTS Step 2": "2. Create a new Access Key and Secret Key",
@ -602,15 +602,15 @@
"Doubao TTS Step 4": "4. Click Start Now",
"Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)",
"Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token",
"Doubao TTS Fill Credentials Notice": "💡 Fill the Access Key, Secret Key, AppID, and Token above.",
"Doubao TTS configured": "Doubao TTS is configured",
"Please configure missing fields": "⚠️ Please configure: {fields}",
"Preview Voice Synthesis": "🎵 Preview Voice Synthesis",
"Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.",
"Doubao TTS configured": "Doubao TTS is configured",
"Please configure missing fields": "Please configure: {fields}",
"Preview Voice Synthesis": "Preview Voice Synthesis",
"Voice Preview Sample": "Thanks for using NarratoAI. If you have any questions or suggestions, please join the community for help and discussion.",
"Please configure voice settings first": "Please configure voice settings first",
"Voice synthesis successful": "Voice synthesis successful!",
"Voice synthesis failed": "Voice synthesis failed. Please check your configuration.",
"SoulVoice pitch not supported": " SoulVoice does not support pitch adjustment",
"Voice synthesis successful": "Voice synthesis successful!",
"Voice synthesis failed": "Voice synthesis failed. Please check your configuration.",
"SoulVoice pitch not supported": "SoulVoice does not support pitch adjustment",
"Progress": "Progress",
"Generating script...": "Generating script...",
"Please select video file first": "Please select a video file first",

View File

@ -49,7 +49,7 @@
"Preview Background Music Help": "播放当前背景音乐",
"Upload Background Music File": "上传背景音乐文件",
"Upload Background Music Help": "上传一个音频文件作为背景音乐",
"Background Music uploaded": "背景音乐已上传: {path}",
"Background Music uploaded": "背景音乐已上传: {path}",
"Background Music Volume": "背景音乐音量0.2表示20%,背景声音不宜过高)",
"Subtitle Settings": "**字幕设置**",
"Enable Subtitles": "启用字幕(若取消勾选,下面的设置都将不生效)",
@ -276,13 +276,13 @@
"Cloud Service": "云端服务",
"Select TTS Engine": "选择 TTS 引擎",
"Select TTS Engine Help": "选择您要使用的文本转语音引擎",
"TTS Engine Details": "📋 {engine} 详细说明",
"TTS Engine Details": "{engine} 详细说明",
"Features": "特点",
"Use Case": "适用场景",
"Registration URL": "注册地址",
"Voice Selection": "音色选择",
"Select Edge TTS Voice": "选择 Edge TTS 音色",
"Edge TTS Voice Description": "💡 Edge TTS 音色说明",
"Edge TTS Voice Description": "Edge TTS 音色说明",
"Loaded voice count": "已加载 {count} 个音色",
"Female Voice": "女声",
"Male Voice": "男声",
@ -298,21 +298,21 @@
"Azure Speech Key Help": "Azure Speech Services API 密钥",
"Voice Name": "音色名称",
"Azure Voice Name Help": "输入 Azure Speech Services 音色名称直接使用官方音色名称即可。例如zh-CN-YunzeNeural",
"Common Voice Reference": "💡 常用音色参考",
"Common Voice Reference": "常用音色参考",
"Chinese Voices": "中文音色",
"English Voices": "英文音色",
"Multilingual": "多语言",
"Azure Voices Docs Notice": "💡 更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)",
"Azure Voices Docs Notice": "更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)",
"Quick Select": "快速选择",
"Chinese Female Voice": "中文女声",
"Chinese Male Voice": "中文男声",
"English Female Voice": "英文女声",
"Voice name valid": "音色名称有效: {voice}",
"Voice name format may be invalid": "⚠️ 音色名称格式可能不正确: {voice}",
"Azure voice name format notice": "💡 Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural",
"Azure Speech Services configured": "Azure Speech Services 配置已设置",
"Please configure service region": "⚠️ 请配置服务区域",
"Please configure API Key": "⚠️ 请配置 API Key",
"Voice name valid": "音色名称有效: {voice}",
"Voice name format may be invalid": "音色名称格式可能不正确: {voice}",
"Azure voice name format notice": "Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural",
"Azure Speech Services configured": "Azure Speech Services 配置已设置",
"Please configure service region": "请配置服务区域",
"Please configure API Key": "请配置 API Key",
"Language": "界面语言",
"Task failed": "任务失败",
"Script file cannot be empty": "脚本文件不能为空",
@ -437,10 +437,10 @@
"Tencent Service Region Help": "选择腾讯云 TTS 服务地域",
"Custom Voice": "自定义音色",
"Select Tencent TTS Voice": "选择腾讯云 TTS 音色",
"Tencent Cloud TTS Voice Description": "💡 腾讯云 TTS 音色说明",
"Tencent Cloud TTS Voice Description": "腾讯云 TTS 音色说明",
"Female Voices": "女声音色",
"Male Voices": "男声音色",
"Tencent More Voices Notice": "💡 更多音色请参考腾讯云官方文档",
"Tencent More Voices Notice": "更多音色请参考腾讯云官方文档",
"Qwen DashScope API Key Help": "通义千问 DashScope API Key",
"TTS Model Name": "模型名称",
"Qwen TTS Model Help": "Qwen TTS 模型名,例如 qwen3-tts-flash",
@ -483,12 +483,12 @@
"Preview Reference Audio Help": "播放当前参考音频",
"Upload Reference Audio File": "上传参考音频文件",
"Upload Reference Audio Help": "上传一段清晰的音频用于语音克隆",
"Audio uploaded": "音频已上传: {path}",
"Audio uploaded": "音频已上传: {path}",
"Inference Mode": "推理模式",
"Standard Inference": "普通推理",
"Fast Inference": "快速推理",
"Inference Mode Help": "普通推理质量更高但速度较慢,快速推理速度更快但质量略低",
"Advanced Parameters": "🔧 高级参数",
"Advanced Parameters": "高级参数",
"Sampling Temperature": "采样温度 (Temperature)",
"Sampling Temperature Help": "控制随机性,值越高输出越随机,值越低越确定",
"Top P Help": "nucleus 采样的概率阈值,值越小结果越确定",
@ -499,9 +499,9 @@
"Repetition Penalty Help": "值越大越能避免重复,但过大可能导致不自然",
"Enable Sampling": "启用采样",
"Enable Sampling Help": "启用采样可以获得更自然的语音",
"IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 使用说明",
"IndexTTS Usage Instructions Title": "IndexTTS-1.5 使用说明",
"IndexTTS Usage Instructions": "**零样本语音克隆**\n\n1. **准备参考音频**:上传或指定一段清晰的音频文件(建议 3-10 秒)\n2. **设置 API 地址**:确保 IndexTTS-1.5 服务正常运行\n3. **开始合成**:系统会自动使用参考音频的音色合成新语音\n\n**注意事项**\n- 参考音频质量直接影响合成效果\n- 建议使用无背景噪音的清晰音频\n- 文本长度建议控制在合理范围内\n- 首次合成可能需要较长时间",
"IndexTTS2 Emotion Parameters": "🎭 情感参数",
"IndexTTS2 Emotion Parameters": "情感参数",
"Emotion Mode": "情感控制方式",
"Emotion Mode Help": "选择 IndexTTS-2 的情感控制来源",
"Emotion Mode Speaker": "与音色参考相同",
@ -529,7 +529,7 @@
"Max Text Tokens Per Segment Help": "IndexTTS-2 分段推理的最大文本 token 数",
"Max Mel Tokens": "最大 Mel Tokens",
"Max Mel Tokens Help": "控制单次生成的最大 mel token 数,值越大可生成更长音频",
"IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 使用说明",
"IndexTTS2 Usage Instructions Title": "IndexTTS-2 使用说明",
"IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**:复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**:例如 http://192.168.3.6:7863/tts也可以填写服务根地址\n3. **调整情感参数**:默认使用 speaker可按需切换到 audio、vector 或 text\n4. **调整生成参数**temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型,耗时更长\n- CPU 部署生成速度会明显慢于 GPU",
"OmniVoice Usage Instructions Title": "OmniVoice 使用说明",
"OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**:只需要设置 API 地址和语言,可直接合成。\n2. **指令音色**:填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**:上传或选择参考音频,并填写该音频对应文本。\n\n**注意事项**\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频,系统会按音频时长估算字幕段落",
@ -545,7 +545,7 @@
"Voice Pitch Help 0.5-1.5": "调节语音音高 (0.5-1.5)",
"Sentence Silence Duration": "句尾静音时长 (秒)",
"Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)",
"Doubao TTS API Key Application Process": "💡 豆包语音 TTS API Key申请流程",
"Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程",
"Application Steps": "申请步骤",
"Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
"Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key",
@ -553,15 +553,15 @@
"Doubao TTS Step 4": "4. 点击立即使用",
"Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成(注意:是语音合成,不是语音合成大模型)",
"Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token",
"Doubao TTS Fill Credentials Notice": "💡 请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
"Doubao TTS configured": "豆包语音 TTS 配置已设置",
"Please configure missing fields": "⚠️ 请配置: {fields}",
"Preview Voice Synthesis": "🎵 试听语音合成",
"Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
"Doubao TTS configured": "豆包语音 TTS 配置已设置",
"Please configure missing fields": "请配置: {fields}",
"Preview Voice Synthesis": "试听语音合成",
"Voice Preview Sample": "感谢关注 NarratoAI有任何问题或建议可以加入社区频道求助或讨论",
"Please configure voice settings first": "请先配置语音设置",
"Voice synthesis successful": "语音合成成功!",
"Voice synthesis failed": "语音合成失败,请检查配置",
"SoulVoice pitch not supported": " SoulVoice 引擎不支持音调调节",
"Voice synthesis successful": "语音合成成功!",
"Voice synthesis failed": "语音合成失败,请检查配置",
"SoulVoice pitch not supported": "SoulVoice 引擎不支持音调调节",
"上传字幕文件": "上传字幕",
"清除已上传字幕": "清除已上传字幕",
"无法读取字幕文件,请检查文件编码(支持 UTF-8、UTF-16、GBK、GB2312": "无法读取字幕文件,请检查文件编码(支持 UTF-8、UTF-16、GBK、GB2312",