From 4ab29fd7763c18808509270389eef7e885bd1493 Mon Sep 17 00:00:00 2001 From: viccy Date: Sun, 7 Jun 2026 18:36:47 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E8=A7=86=E9=A2=91?= =?UTF-8?q?=E7=94=9F=E6=88=90=E8=BF=9B=E5=BA=A6=E5=B1=95=E7=A4=BA=E4=B8=8E?= =?UTF-8?q?UI=E7=BB=86=E8=8A=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 为视频生成任务的每个处理步骤添加详细的中文状态提示 - 重构WebUI的视频生成弹窗,使用Streamlit原生状态组件优化进度展示 - 清理多语言翻译文本中的冗余表情符号,统一UI文本风格 - 调整TTS设置面板的折叠面板默认展开状态为关闭,并移除标题中的表情前缀 --- app/services/task.py | 92 +++++++++++++++++-- webui.py | 137 +++++++++++++++++++---------- webui/components/audio_settings.py | 6 +- webui/i18n/en.json | 52 +++++------ webui/i18n/zh.json | 52 +++++------ 5 files changed, 231 insertions(+), 108 deletions(-) diff --git a/app/services/task.py b/app/services/task.py index 74e7804..356b7a6 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -434,12 +434,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): global merged_audio_path, merged_subtitle_path logger.info(f"\n\n## 开始统一视频处理任务: {task_id}") - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0) + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=0, + message="正在初始化视频生成任务", + ) """ 1. 加载剪辑脚本 """ logger.info("\n\n## 1. 加载视频脚本") + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=5, + message="正在加载剪辑脚本", + ) video_script_path = path.join(params.video_clip_json_path) if path.exists(video_script_path): @@ -465,6 +476,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): 2. 使用 TTS 生成音频素材 """ logger.info("\n\n## 2. 根据OST设置生成音频列表") + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=10, + message="正在生成 TTS 配音", + ) # 只为OST=0 or 2的判断生成音频, OST=0 仅保留解说 OST=2 保留解说和原声 tts_segments = [ segment for segment in list_script @@ -481,12 +498,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): voice_pitch=params.voice_pitch, ) - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20) + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=20, + message="TTS 配音生成完成", + ) """ 3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略 """ logger.info("\n\n## 3. 统一视频裁剪(基于OST类型)") + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=30, + message="正在按脚本裁剪视频片段", + ) # 使用新的统一裁剪策略 video_clip_result = clip_video.clip_video_unified( @@ -505,12 +533,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段") - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60) + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=60, + message="视频片段裁剪完成", + ) """ 4. 合并音频和字幕 """ logger.info("\n\n## 4. 合并音频和字幕") + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=65, + message="正在合并配音和字幕", + ) total_duration = sum([script["duration"] for script in new_script_list]) if tts_segments: try: @@ -540,6 +579,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): logger.warning("没有需要合并的音频/字幕") merged_audio_path = "" merged_subtitle_path = "" + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=70, + message="配音和字幕合并完成", + ) """ 5. 合并视频 @@ -549,6 +594,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4") logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}") + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=75, + message="正在合并视频片段", + ) # 使用统一裁剪后的视频片段 video_clips = [] @@ -568,7 +619,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): video_aspect=params.video_aspect, threads=params.n_threads ) - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80) + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=80, + message="视频片段合并完成", + ) """ 6. 合并字幕/BGM/配音/视频 @@ -581,6 +637,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): else output_video_path ) logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}") + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=85, + message="正在合成最终视频", + ) bgm_path = utils.get_bgm_file( bgm_type=getattr(params, "bgm_type", "random"), @@ -634,10 +696,20 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): auto_subtitle_path = "" if auto_transcription_enabled: - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=90) + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=90, + message="正在自动转录最终视频", + ) logger.info("\n\n## 7. 自动转录最终视频字幕") auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params) - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=95) + sm.state.update_task( + task_id, + state=const.TASK_STATE_PROCESSING, + progress=95, + message="正在压入自动转录字幕", + ) logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}") _merge_auto_transcribed_subtitles( source_video_path=merge_output_video_path, @@ -657,7 +729,13 @@ def start_subclip_unified(task_id: str, params: VideoClipParams): } if auto_subtitle_path: kwargs["subtitles"] = [auto_subtitle_path] - sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs) + sm.state.update_task( + task_id, + state=const.TASK_STATE_COMPLETE, + progress=100, + message="视频生成完成", + **kwargs + ) return kwargs diff --git a/webui.py b/webui.py index 5ba26a3..3eac2eb 100644 --- a/webui.py +++ b/webui.py @@ -170,57 +170,102 @@ def render_generate_button(): # 生成一个新的task_id用于本次处理 task_id = str(uuid.uuid4()) - # 创建进度条 - progress_bar = st.progress(0) - status_text = st.empty() + @st.dialog(tr("Generating Video"), width="large") + def generate_video_dialog(): + st.markdown( + """ + + """, + unsafe_allow_html=True, + ) - def run_task(): - try: - tm.start_subclip_unified( - task_id=task_id, - params=params - ) - except Exception as e: - logger.error(f"任务执行失败: {e}") - sm.state.update_task(task_id, state=const.TASK_STATE_FAILED, message=str(e)) + progress_bar = st.progress(0) + status_panel = st.status(tr("Generating Video"), expanded=True) + status_panel.write(tr("Generating Video")) - # 在新线程中启动任务 - thread = threading.Thread(target=run_task) - thread.start() + def run_task(): + try: + tm.start_subclip_unified( + task_id=task_id, + params=params + ) + except Exception as e: + logger.error(f"任务执行失败: {e}") + current_task = sm.state.get_task(task_id) or {} + sm.state.update_task( + task_id, + state=const.TASK_STATE_FAILED, + progress=current_task.get("progress", 0), + message=str(e), + ) - # 轮询任务状态 - while True: - task = sm.state.get_task(task_id) - if task: - progress = task.get("progress", 0) - state = task.get("state") - - # 更新进度条 - progress_bar.progress(progress / 100) - status_text.text(f"Processing... {progress}%") + # 在新线程中启动任务 + thread = threading.Thread(target=run_task) + thread.start() + + last_status_key = None + + # 轮询任务状态 + while True: + task = sm.state.get_task(task_id) + if task: + progress = task.get("progress", 0) + state = task.get("state") - if state == const.TASK_STATE_COMPLETE: - status_text.text(tr("Video Generation Completed")) - progress_bar.progress(1.0) - - # 显示结果 - video_files = task.get("videos", []) try: - if video_files: - player_cols = st.columns(len(video_files) * 2 + 1) - for i, url in enumerate(video_files): - player_cols[i * 2 + 1].video(url) - except Exception as e: - logger.error(f"播放视频失败: {e}") - - st.success(tr("Video Generation Completed")) - break - - elif state == const.TASK_STATE_FAILED: - st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}") - break - - time.sleep(0.5) + progress = int(progress) + except (TypeError, ValueError): + progress = 0 + progress = max(0, min(progress, 100)) + + # 更新进度条和阶段状态 + progress_bar.progress(progress / 100) + current_message = task.get("message") or f"Processing... {progress}%" + status_label = f"{current_message} ({progress}%)" + status_key = (state, progress, current_message) + if status_key != last_status_key: + status_panel.write(status_label) + last_status_key = status_key + + if state == const.TASK_STATE_COMPLETE: + status_panel.update( + label=tr("Video Generation Completed"), + state="complete", + expanded=False, + ) + progress_bar.progress(1.0) + + # 显示结果 + video_files = task.get("videos", []) + try: + if video_files: + for url in video_files: + st.video(url) + except Exception as e: + logger.error(f"播放视频失败: {e}") + + st.success(tr("Video Generation Completed")) + break + + if state == const.TASK_STATE_FAILED: + status_panel.update( + label=f"{tr('Task failed')}: {task.get('message', 'Unknown error')}", + state="error", + expanded=True, + ) + st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}") + break + + time.sleep(0.5) + + generate_video_dialog() def get_voice_name_for_tts_engine(tts_engine: str) -> str: diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index cab5413..8a6f4cc 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -522,7 +522,7 @@ def render_tts_settings(tr): """渲染TTS(文本转语音)设置""" # 1. TTS引擎选择器 - # st.subheader("🎤 TTS引擎选择") + # st.subheader("TTS引擎选择") engine_options = get_tts_engine_options(tr) engine_descriptions = get_tts_engine_descriptions(tr) @@ -553,7 +553,7 @@ def render_tts_settings(tr): if selected_engine in engine_descriptions: desc = engine_descriptions[selected_engine] - with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=True): + with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=False): st.markdown(f"**{tr('Features')}:** {desc['features']}") st.markdown(f"**{tr('Use Case')}:** {desc['use_case']}") @@ -561,7 +561,7 @@ def render_tts_settings(tr): st.markdown(f"**{tr('Registration URL')}:** [{desc['registration']}]({desc['registration']})") # 3. 根据选择的引擎渲染对应的配置界面 - # st.subheader("⚙️ 引擎配置") + # st.subheader("引擎配置") if selected_engine == "edge_tts": render_edge_tts_settings(tr) diff --git a/webui/i18n/en.json b/webui/i18n/en.json index fae8c95..0a8fb4b 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -61,7 +61,7 @@ "Preview Background Music Help": "Play the selected background music.", "Upload Background Music File": "Upload Background Music File", "Upload Background Music Help": "Upload an audio file to use as background music.", - "Background Music uploaded": "✅ Background music uploaded: {path}", + "Background Music uploaded": "Background music uploaded: {path}", "Background Music Volume": "Background Music Volume (0.2 represents 20%, background sound should not be too loud)", "Subtitle Settings": "**Subtitle Settings**", "Enable Subtitles": "Enable Subtitles (If unchecked, the following settings will not take effect)", @@ -326,13 +326,13 @@ "Cloud Service": "Cloud Service", "Select TTS Engine": "Select TTS Engine", "Select TTS Engine Help": "Choose the text-to-speech engine you want to use.", - "TTS Engine Details": "📋 {engine} Details", + "TTS Engine Details": "{engine} Details", "Features": "Features", "Use Case": "Use Case", "Registration URL": "Registration URL", "Voice Selection": "Voice Selection", "Select Edge TTS Voice": "Select an Edge TTS voice", - "Edge TTS Voice Description": "💡 Edge TTS Voice Notes", + "Edge TTS Voice Description": "Edge TTS Voice Notes", "Loaded voice count": "Loaded {count} voices", "Female Voice": "Female voice", "Male Voice": "Male voice", @@ -348,21 +348,21 @@ "Azure Speech Key Help": "Azure Speech Services API key", "Voice Name": "Voice Name", "Azure Voice Name Help": "Enter an Azure Speech Services voice name. You can use the official voice name directly, such as zh-CN-YunzeNeural.", - "Common Voice Reference": "💡 Common Voice Reference", + "Common Voice Reference": "Common Voice Reference", "Chinese Voices": "Chinese Voices", "English Voices": "English Voices", "Multilingual": "multilingual", - "Azure Voices Docs Notice": "💡 For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).", + "Azure Voices Docs Notice": "For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).", "Quick Select": "Quick Select", "Chinese Female Voice": "Chinese Female Voice", "Chinese Male Voice": "Chinese Male Voice", "English Female Voice": "English Female Voice", - "Voice name valid": "✅ Voice name is valid: {voice}", - "Voice name format may be invalid": "⚠️ Voice name format may be incorrect: {voice}", - "Azure voice name format notice": "💡 Azure voice names usually follow this format: [language]-[region]-[name]Neural", - "Azure Speech Services configured": "✅ Azure Speech Services is configured", - "Please configure service region": "⚠️ Please configure the service region", - "Please configure API Key": "⚠️ Please configure the API Key", + "Voice name valid": "Voice name is valid: {voice}", + "Voice name format may be invalid": "Voice name format may be incorrect: {voice}", + "Azure voice name format notice": "Azure voice names usually follow this format: [language]-[region]-[name]Neural", + "Azure Speech Services configured": "Azure Speech Services is configured", + "Please configure service region": "Please configure the service region", + "Please configure API Key": "Please configure the API Key", "Task failed": "Task failed", "Script file cannot be empty": "Script file cannot be empty", "Video file cannot be empty": "Video file cannot be empty", @@ -486,10 +486,10 @@ "Tencent Service Region Help": "Select the Tencent Cloud TTS service region", "Custom Voice": "Custom Voice", "Select Tencent TTS Voice": "Select a Tencent Cloud TTS voice", - "Tencent Cloud TTS Voice Description": "💡 Tencent Cloud TTS Voice Notes", + "Tencent Cloud TTS Voice Description": "Tencent Cloud TTS Voice Notes", "Female Voices": "Female Voices", "Male Voices": "Male Voices", - "Tencent More Voices Notice": "💡 See the official Tencent Cloud documentation for more voices.", + "Tencent More Voices Notice": "See the official Tencent Cloud documentation for more voices.", "Qwen DashScope API Key Help": "Tongyi Qwen DashScope API Key", "TTS Model Name": "TTS Model Name", "Qwen TTS Model Help": "Qwen TTS model name, for example qwen3-tts-flash", @@ -532,12 +532,12 @@ "Preview Reference Audio Help": "Play the selected reference audio.", "Upload Reference Audio File": "Upload Reference Audio File", "Upload Reference Audio Help": "Upload a clear audio clip for voice cloning", - "Audio uploaded": "✅ Audio uploaded: {path}", + "Audio uploaded": "Audio uploaded: {path}", "Inference Mode": "Inference Mode", "Standard Inference": "Standard Inference", "Fast Inference": "Fast Inference", "Inference Mode Help": "Standard inference has higher quality but is slower. Fast inference is faster with slightly lower quality.", - "Advanced Parameters": "🔧 Advanced Parameters", + "Advanced Parameters": "Advanced Parameters", "Sampling Temperature": "Sampling Temperature", "Sampling Temperature Help": "Controls randomness. Higher values are more random; lower values are more deterministic.", "Top P Help": "Probability threshold for nucleus sampling. Smaller values make results more deterministic.", @@ -548,9 +548,9 @@ "Repetition Penalty Help": "Higher values reduce repetition, but overly high values may sound unnatural.", "Enable Sampling": "Enable Sampling", "Enable Sampling Help": "Enable sampling for more natural speech.", - "IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 Usage Instructions", + "IndexTTS Usage Instructions Title": "IndexTTS-1.5 Usage Instructions", "IndexTTS Usage Instructions": "**Zero-shot voice cloning**\n\n1. **Prepare reference audio**: upload or specify a clear audio file (3-10 seconds recommended)\n2. **Set API URL**: make sure the IndexTTS-1.5 service is running\n3. **Start synthesis**: the system will use the reference voice to synthesize new speech\n\n**Notes**:\n- Reference audio quality directly affects synthesis quality\n- Use clean audio without background noise when possible\n- Keep text length within a reasonable range\n- The first synthesis may take longer", - "IndexTTS2 Emotion Parameters": "🎭 Emotion Parameters", + "IndexTTS2 Emotion Parameters": "Emotion Parameters", "Emotion Mode": "Emotion Mode", "Emotion Mode Help": "Choose the emotion control source for IndexTTS-2.", "Emotion Mode Speaker": "Same as speaker reference", @@ -578,7 +578,7 @@ "Max Text Tokens Per Segment Help": "Maximum text tokens per segment for IndexTTS-2 inference.", "Max Mel Tokens": "Max Mel Tokens", "Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.", - "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 Usage Instructions", + "IndexTTS2 Usage Instructions Title": "IndexTTS-2 Usage Instructions", "IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments", "OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions", "OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration", @@ -594,7 +594,7 @@ "Voice Pitch Help 0.5-1.5": "Adjust voice pitch (0.5-1.5)", "Sentence Silence Duration": "Sentence-end Silence Duration (seconds)", "Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)", - "Doubao TTS API Key Application Process": "💡 Doubao TTS API Key Application Process", + "Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process", "Application Steps": "Application Steps", "Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)", "Doubao TTS Step 2": "2. Create a new Access Key and Secret Key", @@ -602,15 +602,15 @@ "Doubao TTS Step 4": "4. Click Start Now", "Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)", "Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token", - "Doubao TTS Fill Credentials Notice": "💡 Fill the Access Key, Secret Key, AppID, and Token above.", - "Doubao TTS configured": "✅ Doubao TTS is configured", - "Please configure missing fields": "⚠️ Please configure: {fields}", - "Preview Voice Synthesis": "🎵 Preview Voice Synthesis", + "Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.", + "Doubao TTS configured": "Doubao TTS is configured", + "Please configure missing fields": "Please configure: {fields}", + "Preview Voice Synthesis": "Preview Voice Synthesis", "Voice Preview Sample": "Thanks for using NarratoAI. If you have any questions or suggestions, please join the community for help and discussion.", "Please configure voice settings first": "Please configure voice settings first", - "Voice synthesis successful": "✅ Voice synthesis successful!", - "Voice synthesis failed": "❌ Voice synthesis failed. Please check your configuration.", - "SoulVoice pitch not supported": "ℹ️ SoulVoice does not support pitch adjustment", + "Voice synthesis successful": "Voice synthesis successful!", + "Voice synthesis failed": "Voice synthesis failed. Please check your configuration.", + "SoulVoice pitch not supported": "SoulVoice does not support pitch adjustment", "Progress": "Progress", "Generating script...": "Generating script...", "Please select video file first": "Please select a video file first", diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 63eac0e..1099604 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -49,7 +49,7 @@ "Preview Background Music Help": "播放当前背景音乐", "Upload Background Music File": "上传背景音乐文件", "Upload Background Music Help": "上传一个音频文件作为背景音乐", - "Background Music uploaded": "✅ 背景音乐已上传: {path}", + "Background Music uploaded": "背景音乐已上传: {path}", "Background Music Volume": "背景音乐音量(0.2表示20%,背景声音不宜过高)", "Subtitle Settings": "**字幕设置**", "Enable Subtitles": "启用字幕(若取消勾选,下面的设置都将不生效)", @@ -276,13 +276,13 @@ "Cloud Service": "云端服务", "Select TTS Engine": "选择 TTS 引擎", "Select TTS Engine Help": "选择您要使用的文本转语音引擎", - "TTS Engine Details": "📋 {engine} 详细说明", + "TTS Engine Details": "{engine} 详细说明", "Features": "特点", "Use Case": "适用场景", "Registration URL": "注册地址", "Voice Selection": "音色选择", "Select Edge TTS Voice": "选择 Edge TTS 音色", - "Edge TTS Voice Description": "💡 Edge TTS 音色说明", + "Edge TTS Voice Description": "Edge TTS 音色说明", "Loaded voice count": "已加载 {count} 个音色", "Female Voice": "女声", "Male Voice": "男声", @@ -298,21 +298,21 @@ "Azure Speech Key Help": "Azure Speech Services API 密钥", "Voice Name": "音色名称", "Azure Voice Name Help": "输入 Azure Speech Services 音色名称,直接使用官方音色名称即可。例如:zh-CN-YunzeNeural", - "Common Voice Reference": "💡 常用音色参考", + "Common Voice Reference": "常用音色参考", "Chinese Voices": "中文音色", "English Voices": "英文音色", "Multilingual": "多语言", - "Azure Voices Docs Notice": "💡 更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)", + "Azure Voices Docs Notice": "更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)", "Quick Select": "快速选择", "Chinese Female Voice": "中文女声", "Chinese Male Voice": "中文男声", "English Female Voice": "英文女声", - "Voice name valid": "✅ 音色名称有效: {voice}", - "Voice name format may be invalid": "⚠️ 音色名称格式可能不正确: {voice}", - "Azure voice name format notice": "💡 Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural", - "Azure Speech Services configured": "✅ Azure Speech Services 配置已设置", - "Please configure service region": "⚠️ 请配置服务区域", - "Please configure API Key": "⚠️ 请配置 API Key", + "Voice name valid": "音色名称有效: {voice}", + "Voice name format may be invalid": "音色名称格式可能不正确: {voice}", + "Azure voice name format notice": "Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural", + "Azure Speech Services configured": "Azure Speech Services 配置已设置", + "Please configure service region": "请配置服务区域", + "Please configure API Key": "请配置 API Key", "Language": "界面语言", "Task failed": "任务失败", "Script file cannot be empty": "脚本文件不能为空", @@ -437,10 +437,10 @@ "Tencent Service Region Help": "选择腾讯云 TTS 服务地域", "Custom Voice": "自定义音色", "Select Tencent TTS Voice": "选择腾讯云 TTS 音色", - "Tencent Cloud TTS Voice Description": "💡 腾讯云 TTS 音色说明", + "Tencent Cloud TTS Voice Description": "腾讯云 TTS 音色说明", "Female Voices": "女声音色", "Male Voices": "男声音色", - "Tencent More Voices Notice": "💡 更多音色请参考腾讯云官方文档", + "Tencent More Voices Notice": "更多音色请参考腾讯云官方文档", "Qwen DashScope API Key Help": "通义千问 DashScope API Key", "TTS Model Name": "模型名称", "Qwen TTS Model Help": "Qwen TTS 模型名,例如 qwen3-tts-flash", @@ -483,12 +483,12 @@ "Preview Reference Audio Help": "播放当前参考音频", "Upload Reference Audio File": "上传参考音频文件", "Upload Reference Audio Help": "上传一段清晰的音频用于语音克隆", - "Audio uploaded": "✅ 音频已上传: {path}", + "Audio uploaded": "音频已上传: {path}", "Inference Mode": "推理模式", "Standard Inference": "普通推理", "Fast Inference": "快速推理", "Inference Mode Help": "普通推理质量更高但速度较慢,快速推理速度更快但质量略低", - "Advanced Parameters": "🔧 高级参数", + "Advanced Parameters": "高级参数", "Sampling Temperature": "采样温度 (Temperature)", "Sampling Temperature Help": "控制随机性,值越高输出越随机,值越低越确定", "Top P Help": "nucleus 采样的概率阈值,值越小结果越确定", @@ -499,9 +499,9 @@ "Repetition Penalty Help": "值越大越能避免重复,但过大可能导致不自然", "Enable Sampling": "启用采样", "Enable Sampling Help": "启用采样可以获得更自然的语音", - "IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 使用说明", + "IndexTTS Usage Instructions Title": "IndexTTS-1.5 使用说明", "IndexTTS Usage Instructions": "**零样本语音克隆**\n\n1. **准备参考音频**:上传或指定一段清晰的音频文件(建议 3-10 秒)\n2. **设置 API 地址**:确保 IndexTTS-1.5 服务正常运行\n3. **开始合成**:系统会自动使用参考音频的音色合成新语音\n\n**注意事项**:\n- 参考音频质量直接影响合成效果\n- 建议使用无背景噪音的清晰音频\n- 文本长度建议控制在合理范围内\n- 首次合成可能需要较长时间", - "IndexTTS2 Emotion Parameters": "🎭 情感参数", + "IndexTTS2 Emotion Parameters": "情感参数", "Emotion Mode": "情感控制方式", "Emotion Mode Help": "选择 IndexTTS-2 的情感控制来源", "Emotion Mode Speaker": "与音色参考相同", @@ -529,7 +529,7 @@ "Max Text Tokens Per Segment Help": "IndexTTS-2 分段推理的最大文本 token 数", "Max Mel Tokens": "最大 Mel Tokens", "Max Mel Tokens Help": "控制单次生成的最大 mel token 数,值越大可生成更长音频", - "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 使用说明", + "IndexTTS2 Usage Instructions Title": "IndexTTS-2 使用说明", "IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**:复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**:例如 http://192.168.3.6:7863/tts,也可以填写服务根地址\n3. **调整情感参数**:默认使用 speaker,可按需切换到 audio、vector 或 text\n4. **调整生成参数**:temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**:\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型,耗时更长\n- CPU 部署生成速度会明显慢于 GPU", "OmniVoice Usage Instructions Title": "OmniVoice 使用说明", "OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**:只需要设置 API 地址和语言,可直接合成。\n2. **指令音色**:填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**:上传或选择参考音频,并填写该音频对应文本。\n\n**注意事项**:\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频,系统会按音频时长估算字幕段落", @@ -545,7 +545,7 @@ "Voice Pitch Help 0.5-1.5": "调节语音音高 (0.5-1.5)", "Sentence Silence Duration": "句尾静音时长 (秒)", "Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)", - "Doubao TTS API Key Application Process": "💡 豆包语音 TTS API Key申请流程", + "Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程", "Application Steps": "申请步骤", "Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)", "Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key", @@ -553,15 +553,15 @@ "Doubao TTS Step 4": "4. 点击立即使用", "Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成(注意:是语音合成,不是语音合成大模型)", "Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token", - "Doubao TTS Fill Credentials Notice": "💡 请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中", - "Doubao TTS configured": "✅ 豆包语音 TTS 配置已设置", - "Please configure missing fields": "⚠️ 请配置: {fields}", - "Preview Voice Synthesis": "🎵 试听语音合成", + "Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中", + "Doubao TTS configured": "豆包语音 TTS 配置已设置", + "Please configure missing fields": "请配置: {fields}", + "Preview Voice Synthesis": "试听语音合成", "Voice Preview Sample": "感谢关注 NarratoAI,有任何问题或建议,可以加入社区频道求助或讨论", "Please configure voice settings first": "请先配置语音设置", - "Voice synthesis successful": "✅ 语音合成成功!", - "Voice synthesis failed": "❌ 语音合成失败,请检查配置", - "SoulVoice pitch not supported": "ℹ️ SoulVoice 引擎不支持音调调节", + "Voice synthesis successful": "语音合成成功!", + "Voice synthesis failed": "语音合成失败,请检查配置", + "SoulVoice pitch not supported": "SoulVoice 引擎不支持音调调节", "上传字幕文件": "上传字幕", "清除已上传字幕": "清除已上传字幕", "无法读取字幕文件,请检查文件编码(支持 UTF-8、UTF-16、GBK、GB2312)": "无法读取字幕文件,请检查文件编码(支持 UTF-8、UTF-16、GBK、GB2312)",