From 4ab29fd7763c18808509270389eef7e885bd1493 Mon Sep 17 00:00:00 2001
From: viccy <linyqemail@gmail.com>
Date: Sun, 7 Jun 2026 18:36:47 +0800
Subject: [PATCH] =?UTF-8?q?feat:=20=E4=BC=98=E5=8C=96=E8=A7=86=E9=A2=91?=
 =?UTF-8?q?=E7=94=9F=E6=88=90=E8=BF=9B=E5=BA=A6=E5=B1=95=E7=A4=BA=E4=B8=8E?=
 =?UTF-8?q?UI=E7=BB=86=E8=8A=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 为视频生成任务的每个处理步骤添加详细的中文状态提示
- 重构WebUI的视频生成弹窗，使用Streamlit原生状态组件优化进度展示
- 清理多语言翻译文本中的冗余表情符号，统一UI文本风格
- 调整TTS设置面板的折叠面板默认展开状态为关闭，并移除标题中的表情前缀
---
 app/services/task.py               |  92 +++++++++++++++++--
 webui.py                           | 137 +++++++++++++++++++----------
 webui/components/audio_settings.py |   6 +-
 webui/i18n/en.json                 |  52 +++++------
 webui/i18n/zh.json                 |  52 +++++------
 5 files changed, 231 insertions(+), 108 deletions(-)

diff --git a/app/services/task.py b/app/services/task.py
index 74e7804..356b7a6 100644
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -434,12 +434,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
     global merged_audio_path, merged_subtitle_path
 
     logger.info(f"\n\n## 开始统一视频处理任务: {task_id}")
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=0,
+        message="正在初始化视频生成任务",
+    )
 
     """
     1. 加载剪辑脚本
     """
     logger.info("\n\n## 1. 加载视频脚本")
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=5,
+        message="正在加载剪辑脚本",
+    )
     video_script_path = path.join(params.video_clip_json_path)
 
     if path.exists(video_script_path):
@@ -465,6 +476,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
     2. 使用 TTS 生成音频素材
     """
     logger.info("\n\n## 2. 根据OST设置生成音频列表")
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=10,
+        message="正在生成 TTS 配音",
+    )
     # 只为OST=0 or 2的判断生成音频， OST=0 仅保留解说 OST=2 保留解说和原声
     tts_segments = [
         segment for segment in list_script
@@ -481,12 +498,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
         voice_pitch=params.voice_pitch,
     )
 
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=20,
+        message="TTS 配音生成完成",
+    )
 
     """
     3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
     """
     logger.info("\n\n## 3. 统一视频裁剪（基于OST类型）")
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=30,
+        message="正在按脚本裁剪视频片段",
+    )
 
     # 使用新的统一裁剪策略
     video_clip_result = clip_video.clip_video_unified(
@@ -505,12 +533,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
 
     logger.info(f"统一裁剪完成，处理了 {len(video_clip_result)} 个视频片段")
 
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=60,
+        message="视频片段裁剪完成",
+    )
 
     """
     4. 合并音频和字幕
     """
     logger.info("\n\n## 4. 合并音频和字幕")
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=65,
+        message="正在合并配音和字幕",
+    )
     total_duration = sum([script["duration"] for script in new_script_list])
     if tts_segments:
         try:
@@ -540,6 +579,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
         logger.warning("没有需要合并的音频/字幕")
         merged_audio_path = ""
         merged_subtitle_path = ""
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=70,
+        message="配音和字幕合并完成",
+    )
 
     """
     5. 合并视频
@@ -549,6 +594,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
 
     combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
     logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=75,
+        message="正在合并视频片段",
+    )
 
     # 使用统一裁剪后的视频片段
     video_clips = []
@@ -568,7 +619,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
         video_aspect=params.video_aspect,
         threads=params.n_threads
     )
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=80,
+        message="视频片段合并完成",
+    )
 
     """
     6. 合并字幕/BGM/配音/视频
@@ -581,6 +637,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
         else output_video_path
     )
     logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}")
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_PROCESSING,
+        progress=85,
+        message="正在合成最终视频",
+    )
 
     bgm_path = utils.get_bgm_file(
         bgm_type=getattr(params, "bgm_type", "random"),
@@ -634,10 +696,20 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
 
     auto_subtitle_path = ""
     if auto_transcription_enabled:
-        sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=90)
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_PROCESSING,
+            progress=90,
+            message="正在自动转录最终视频",
+        )
         logger.info("\n\n## 7. 自动转录最终视频字幕")
         auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params)
-        sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=95)
+        sm.state.update_task(
+            task_id,
+            state=const.TASK_STATE_PROCESSING,
+            progress=95,
+            message="正在压入自动转录字幕",
+        )
         logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}")
         _merge_auto_transcribed_subtitles(
             source_video_path=merge_output_video_path,
@@ -657,7 +729,13 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
     }
     if auto_subtitle_path:
         kwargs["subtitles"] = [auto_subtitle_path]
-    sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
+    sm.state.update_task(
+        task_id,
+        state=const.TASK_STATE_COMPLETE,
+        progress=100,
+        message="视频生成完成",
+        **kwargs
+    )
     return kwargs
 
 
diff --git a/webui.py b/webui.py
index 5ba26a3..3eac2eb 100644
--- a/webui.py
+++ b/webui.py
@@ -170,57 +170,102 @@ def render_generate_button():
         # 生成一个新的task_id用于本次处理
         task_id = str(uuid.uuid4())
 
-        # 创建进度条
-        progress_bar = st.progress(0)
-        status_text = st.empty()
+        @st.dialog(tr("Generating Video"), width="large")
+        def generate_video_dialog():
+            st.markdown(
+                """
+                <style>
+                    div[data-testid="stDialog"] div[data-testid="stStatusWidget"] {
+                        margin-top: 0.25rem;
+                    }
+                    div[data-testid="stDialog"] div[data-testid="stProgress"] {
+                        margin-bottom: 0.75rem;
+                    }
+                </style>
+                """,
+                unsafe_allow_html=True,
+            )
 
-        def run_task():
-            try:
-                tm.start_subclip_unified(
-                    task_id=task_id,
-                    params=params
-                )
-            except Exception as e:
-                logger.error(f"任务执行失败: {e}")
-                sm.state.update_task(task_id, state=const.TASK_STATE_FAILED, message=str(e))
+            progress_bar = st.progress(0)
+            status_panel = st.status(tr("Generating Video"), expanded=True)
+            status_panel.write(tr("Generating Video"))
 
-        # 在新线程中启动任务
-        thread = threading.Thread(target=run_task)
-        thread.start()
+            def run_task():
+                try:
+                    tm.start_subclip_unified(
+                        task_id=task_id,
+                        params=params
+                    )
+                except Exception as e:
+                    logger.error(f"任务执行失败: {e}")
+                    current_task = sm.state.get_task(task_id) or {}
+                    sm.state.update_task(
+                        task_id,
+                        state=const.TASK_STATE_FAILED,
+                        progress=current_task.get("progress", 0),
+                        message=str(e),
+                    )
 
-        # 轮询任务状态
-        while True:
-            task = sm.state.get_task(task_id)
-            if task:
-                progress = task.get("progress", 0)
-                state = task.get("state")
-                
-                # 更新进度条
-                progress_bar.progress(progress / 100)
-                status_text.text(f"Processing... {progress}%")
+            # 在新线程中启动任务
+            thread = threading.Thread(target=run_task)
+            thread.start()
+
+            last_status_key = None
+
+            # 轮询任务状态
+            while True:
+                task = sm.state.get_task(task_id)
+                if task:
+                    progress = task.get("progress", 0)
+                    state = task.get("state")
 
-                if state == const.TASK_STATE_COMPLETE:
-                    status_text.text(tr("Video Generation Completed"))
-                    progress_bar.progress(1.0)
-                    
-                    # 显示结果
-                    video_files = task.get("videos", [])
                     try:
-                        if video_files:
-                            player_cols = st.columns(len(video_files) * 2 + 1)
-                            for i, url in enumerate(video_files):
-                                player_cols[i * 2 + 1].video(url)
-                    except Exception as e:
-                        logger.error(f"播放视频失败: {e}")
-                    
-                    st.success(tr("Video Generation Completed"))
-                    break
-                
-                elif state == const.TASK_STATE_FAILED:
-                    st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}")
-                    break
-            
-            time.sleep(0.5)
+                        progress = int(progress)
+                    except (TypeError, ValueError):
+                        progress = 0
+                    progress = max(0, min(progress, 100))
+
+                    # 更新进度条和阶段状态
+                    progress_bar.progress(progress / 100)
+                    current_message = task.get("message") or f"Processing... {progress}%"
+                    status_label = f"{current_message} ({progress}%)"
+                    status_key = (state, progress, current_message)
+                    if status_key != last_status_key:
+                        status_panel.write(status_label)
+                        last_status_key = status_key
+
+                    if state == const.TASK_STATE_COMPLETE:
+                        status_panel.update(
+                            label=tr("Video Generation Completed"),
+                            state="complete",
+                            expanded=False,
+                        )
+                        progress_bar.progress(1.0)
+
+                        # 显示结果
+                        video_files = task.get("videos", [])
+                        try:
+                            if video_files:
+                                for url in video_files:
+                                    st.video(url)
+                        except Exception as e:
+                            logger.error(f"播放视频失败: {e}")
+
+                        st.success(tr("Video Generation Completed"))
+                        break
+
+                    if state == const.TASK_STATE_FAILED:
+                        status_panel.update(
+                            label=f"{tr('Task failed')}: {task.get('message', 'Unknown error')}",
+                            state="error",
+                            expanded=True,
+                        )
+                        st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}")
+                        break
+
+                time.sleep(0.5)
+
+        generate_video_dialog()
 
 
 def get_voice_name_for_tts_engine(tts_engine: str) -> str:
diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py
index cab5413..8a6f4cc 100644
--- a/webui/components/audio_settings.py
+++ b/webui/components/audio_settings.py
@@ -522,7 +522,7 @@ def render_tts_settings(tr):
     """渲染TTS(文本转语音)设置"""
 
     # 1. TTS引擎选择器
-    # st.subheader("🎤 TTS引擎选择")
+    # st.subheader("TTS引擎选择")
 
     engine_options = get_tts_engine_options(tr)
     engine_descriptions = get_tts_engine_descriptions(tr)
@@ -553,7 +553,7 @@ def render_tts_settings(tr):
     if selected_engine in engine_descriptions:
         desc = engine_descriptions[selected_engine]
 
-        with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=True):
+        with st.expander(tr("TTS Engine Details").format(engine=desc['title']), expanded=False):
             st.markdown(f"**{tr('Features')}:** {desc['features']}")
             st.markdown(f"**{tr('Use Case')}:** {desc['use_case']}")
 
@@ -561,7 +561,7 @@ def render_tts_settings(tr):
                 st.markdown(f"**{tr('Registration URL')}:** [{desc['registration']}]({desc['registration']})")
 
     # 3. 根据选择的引擎渲染对应的配置界面
-    # st.subheader("⚙️ 引擎配置")
+    # st.subheader("引擎配置")
 
     if selected_engine == "edge_tts":
         render_edge_tts_settings(tr)
diff --git a/webui/i18n/en.json b/webui/i18n/en.json
index fae8c95..0a8fb4b 100644
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@@ -61,7 +61,7 @@
     "Preview Background Music Help": "Play the selected background music.",
     "Upload Background Music File": "Upload Background Music File",
     "Upload Background Music Help": "Upload an audio file to use as background music.",
-    "Background Music uploaded": "✅ Background music uploaded: {path}",
+    "Background Music uploaded": "Background music uploaded: {path}",
     "Background Music Volume": "Background Music Volume (0.2 represents 20%, background sound should not be too loud)",
     "Subtitle Settings": "**Subtitle Settings**",
     "Enable Subtitles": "Enable Subtitles (If unchecked, the following settings will not take effect)",
@@ -326,13 +326,13 @@
     "Cloud Service": "Cloud Service",
     "Select TTS Engine": "Select TTS Engine",
     "Select TTS Engine Help": "Choose the text-to-speech engine you want to use.",
-    "TTS Engine Details": "📋 {engine} Details",
+    "TTS Engine Details": "{engine} Details",
     "Features": "Features",
     "Use Case": "Use Case",
     "Registration URL": "Registration URL",
     "Voice Selection": "Voice Selection",
     "Select Edge TTS Voice": "Select an Edge TTS voice",
-    "Edge TTS Voice Description": "💡 Edge TTS Voice Notes",
+    "Edge TTS Voice Description": "Edge TTS Voice Notes",
     "Loaded voice count": "Loaded {count} voices",
     "Female Voice": "Female voice",
     "Male Voice": "Male voice",
@@ -348,21 +348,21 @@
     "Azure Speech Key Help": "Azure Speech Services API key",
     "Voice Name": "Voice Name",
     "Azure Voice Name Help": "Enter an Azure Speech Services voice name. You can use the official voice name directly, such as zh-CN-YunzeNeural.",
-    "Common Voice Reference": "💡 Common Voice Reference",
+    "Common Voice Reference": "Common Voice Reference",
     "Chinese Voices": "Chinese Voices",
     "English Voices": "English Voices",
     "Multilingual": "multilingual",
-    "Azure Voices Docs Notice": "💡 For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).",
+    "Azure Voices Docs Notice": "For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).",
     "Quick Select": "Quick Select",
     "Chinese Female Voice": "Chinese Female Voice",
     "Chinese Male Voice": "Chinese Male Voice",
     "English Female Voice": "English Female Voice",
-    "Voice name valid": "✅ Voice name is valid: {voice}",
-    "Voice name format may be invalid": "⚠️ Voice name format may be incorrect: {voice}",
-    "Azure voice name format notice": "💡 Azure voice names usually follow this format: [language]-[region]-[name]Neural",
-    "Azure Speech Services configured": "✅ Azure Speech Services is configured",
-    "Please configure service region": "⚠️ Please configure the service region",
-    "Please configure API Key": "⚠️ Please configure the API Key",
+    "Voice name valid": "Voice name is valid: {voice}",
+    "Voice name format may be invalid": "Voice name format may be incorrect: {voice}",
+    "Azure voice name format notice": "Azure voice names usually follow this format: [language]-[region]-[name]Neural",
+    "Azure Speech Services configured": "Azure Speech Services is configured",
+    "Please configure service region": "Please configure the service region",
+    "Please configure API Key": "Please configure the API Key",
     "Task failed": "Task failed",
     "Script file cannot be empty": "Script file cannot be empty",
     "Video file cannot be empty": "Video file cannot be empty",
@@ -486,10 +486,10 @@
     "Tencent Service Region Help": "Select the Tencent Cloud TTS service region",
     "Custom Voice": "Custom Voice",
     "Select Tencent TTS Voice": "Select a Tencent Cloud TTS voice",
-    "Tencent Cloud TTS Voice Description": "💡 Tencent Cloud TTS Voice Notes",
+    "Tencent Cloud TTS Voice Description": "Tencent Cloud TTS Voice Notes",
     "Female Voices": "Female Voices",
     "Male Voices": "Male Voices",
-    "Tencent More Voices Notice": "💡 See the official Tencent Cloud documentation for more voices.",
+    "Tencent More Voices Notice": "See the official Tencent Cloud documentation for more voices.",
     "Qwen DashScope API Key Help": "Tongyi Qwen DashScope API Key",
     "TTS Model Name": "TTS Model Name",
     "Qwen TTS Model Help": "Qwen TTS model name, for example qwen3-tts-flash",
@@ -532,12 +532,12 @@
     "Preview Reference Audio Help": "Play the selected reference audio.",
     "Upload Reference Audio File": "Upload Reference Audio File",
     "Upload Reference Audio Help": "Upload a clear audio clip for voice cloning",
-    "Audio uploaded": "✅ Audio uploaded: {path}",
+    "Audio uploaded": "Audio uploaded: {path}",
     "Inference Mode": "Inference Mode",
     "Standard Inference": "Standard Inference",
     "Fast Inference": "Fast Inference",
     "Inference Mode Help": "Standard inference has higher quality but is slower. Fast inference is faster with slightly lower quality.",
-    "Advanced Parameters": "🔧 Advanced Parameters",
+    "Advanced Parameters": "Advanced Parameters",
     "Sampling Temperature": "Sampling Temperature",
     "Sampling Temperature Help": "Controls randomness. Higher values are more random; lower values are more deterministic.",
     "Top P Help": "Probability threshold for nucleus sampling. Smaller values make results more deterministic.",
@@ -548,9 +548,9 @@
     "Repetition Penalty Help": "Higher values reduce repetition, but overly high values may sound unnatural.",
     "Enable Sampling": "Enable Sampling",
     "Enable Sampling Help": "Enable sampling for more natural speech.",
-    "IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 Usage Instructions",
+    "IndexTTS Usage Instructions Title": "IndexTTS-1.5 Usage Instructions",
     "IndexTTS Usage Instructions": "**Zero-shot voice cloning**\n\n1. **Prepare reference audio**: upload or specify a clear audio file (3-10 seconds recommended)\n2. **Set API URL**: make sure the IndexTTS-1.5 service is running\n3. **Start synthesis**: the system will use the reference voice to synthesize new speech\n\n**Notes**:\n- Reference audio quality directly affects synthesis quality\n- Use clean audio without background noise when possible\n- Keep text length within a reasonable range\n- The first synthesis may take longer",
-    "IndexTTS2 Emotion Parameters": "🎭 Emotion Parameters",
+    "IndexTTS2 Emotion Parameters": "Emotion Parameters",
     "Emotion Mode": "Emotion Mode",
     "Emotion Mode Help": "Choose the emotion control source for IndexTTS-2.",
     "Emotion Mode Speaker": "Same as speaker reference",
@@ -578,7 +578,7 @@
     "Max Text Tokens Per Segment Help": "Maximum text tokens per segment for IndexTTS-2 inference.",
     "Max Mel Tokens": "Max Mel Tokens",
     "Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.",
-    "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 Usage Instructions",
+    "IndexTTS2 Usage Instructions Title": "IndexTTS-2 Usage Instructions",
     "IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments",
     "OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions",
     "OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
@@ -594,7 +594,7 @@
     "Voice Pitch Help 0.5-1.5": "Adjust voice pitch (0.5-1.5)",
     "Sentence Silence Duration": "Sentence-end Silence Duration (seconds)",
     "Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)",
-    "Doubao TTS API Key Application Process": "💡 Doubao TTS API Key Application Process",
+    "Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process",
     "Application Steps": "Application Steps",
     "Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
     "Doubao TTS Step 2": "2. Create a new Access Key and Secret Key",
@@ -602,15 +602,15 @@
     "Doubao TTS Step 4": "4. Click Start Now",
     "Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)",
     "Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token",
-    "Doubao TTS Fill Credentials Notice": "💡 Fill the Access Key, Secret Key, AppID, and Token above.",
-    "Doubao TTS configured": "✅ Doubao TTS is configured",
-    "Please configure missing fields": "⚠️ Please configure: {fields}",
-    "Preview Voice Synthesis": "🎵 Preview Voice Synthesis",
+    "Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.",
+    "Doubao TTS configured": "Doubao TTS is configured",
+    "Please configure missing fields": "Please configure: {fields}",
+    "Preview Voice Synthesis": "Preview Voice Synthesis",
     "Voice Preview Sample": "Thanks for using NarratoAI. If you have any questions or suggestions, please join the community for help and discussion.",
     "Please configure voice settings first": "Please configure voice settings first",
-    "Voice synthesis successful": "✅ Voice synthesis successful!",
-    "Voice synthesis failed": "❌ Voice synthesis failed. Please check your configuration.",
-    "SoulVoice pitch not supported": "ℹ️ SoulVoice does not support pitch adjustment",
+    "Voice synthesis successful": "Voice synthesis successful!",
+    "Voice synthesis failed": "Voice synthesis failed. Please check your configuration.",
+    "SoulVoice pitch not supported": "SoulVoice does not support pitch adjustment",
     "Progress": "Progress",
     "Generating script...": "Generating script...",
     "Please select video file first": "Please select a video file first",
diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json
index 63eac0e..1099604 100644
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@@ -49,7 +49,7 @@
     "Preview Background Music Help": "播放当前背景音乐",
     "Upload Background Music File": "上传背景音乐文件",
     "Upload Background Music Help": "上传一个音频文件作为背景音乐",
-    "Background Music uploaded": "✅ 背景音乐已上传: {path}",
+    "Background Music uploaded": "背景音乐已上传: {path}",
     "Background Music Volume": "背景音乐音量（0.2表示20%，背景声音不宜过高）",
     "Subtitle Settings": "**字幕设置**",
     "Enable Subtitles": "启用字幕（若取消勾选，下面的设置都将不生效）",
@@ -276,13 +276,13 @@
     "Cloud Service": "云端服务",
     "Select TTS Engine": "选择 TTS 引擎",
     "Select TTS Engine Help": "选择您要使用的文本转语音引擎",
-    "TTS Engine Details": "📋 {engine} 详细说明",
+    "TTS Engine Details": "{engine} 详细说明",
     "Features": "特点",
     "Use Case": "适用场景",
     "Registration URL": "注册地址",
     "Voice Selection": "音色选择",
     "Select Edge TTS Voice": "选择 Edge TTS 音色",
-    "Edge TTS Voice Description": "💡 Edge TTS 音色说明",
+    "Edge TTS Voice Description": "Edge TTS 音色说明",
     "Loaded voice count": "已加载 {count} 个音色",
     "Female Voice": "女声",
     "Male Voice": "男声",
@@ -298,21 +298,21 @@
     "Azure Speech Key Help": "Azure Speech Services API 密钥",
     "Voice Name": "音色名称",
     "Azure Voice Name Help": "输入 Azure Speech Services 音色名称，直接使用官方音色名称即可。例如：zh-CN-YunzeNeural",
-    "Common Voice Reference": "💡 常用音色参考",
+    "Common Voice Reference": "常用音色参考",
     "Chinese Voices": "中文音色",
     "English Voices": "英文音色",
     "Multilingual": "多语言",
-    "Azure Voices Docs Notice": "💡 更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)",
+    "Azure Voices Docs Notice": "更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)",
     "Quick Select": "快速选择",
     "Chinese Female Voice": "中文女声",
     "Chinese Male Voice": "中文男声",
     "English Female Voice": "英文女声",
-    "Voice name valid": "✅ 音色名称有效: {voice}",
-    "Voice name format may be invalid": "⚠️ 音色名称格式可能不正确: {voice}",
-    "Azure voice name format notice": "💡 Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural",
-    "Azure Speech Services configured": "✅ Azure Speech Services 配置已设置",
-    "Please configure service region": "⚠️ 请配置服务区域",
-    "Please configure API Key": "⚠️ 请配置 API Key",
+    "Voice name valid": "音色名称有效: {voice}",
+    "Voice name format may be invalid": "音色名称格式可能不正确: {voice}",
+    "Azure voice name format notice": "Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural",
+    "Azure Speech Services configured": "Azure Speech Services 配置已设置",
+    "Please configure service region": "请配置服务区域",
+    "Please configure API Key": "请配置 API Key",
     "Language": "界面语言",
     "Task failed": "任务失败",
     "Script file cannot be empty": "脚本文件不能为空",
@@ -437,10 +437,10 @@
     "Tencent Service Region Help": "选择腾讯云 TTS 服务地域",
     "Custom Voice": "自定义音色",
     "Select Tencent TTS Voice": "选择腾讯云 TTS 音色",
-    "Tencent Cloud TTS Voice Description": "💡 腾讯云 TTS 音色说明",
+    "Tencent Cloud TTS Voice Description": "腾讯云 TTS 音色说明",
     "Female Voices": "女声音色",
     "Male Voices": "男声音色",
-    "Tencent More Voices Notice": "💡 更多音色请参考腾讯云官方文档",
+    "Tencent More Voices Notice": "更多音色请参考腾讯云官方文档",
     "Qwen DashScope API Key Help": "通义千问 DashScope API Key",
     "TTS Model Name": "模型名称",
     "Qwen TTS Model Help": "Qwen TTS 模型名，例如 qwen3-tts-flash",
@@ -483,12 +483,12 @@
     "Preview Reference Audio Help": "播放当前参考音频",
     "Upload Reference Audio File": "上传参考音频文件",
     "Upload Reference Audio Help": "上传一段清晰的音频用于语音克隆",
-    "Audio uploaded": "✅ 音频已上传: {path}",
+    "Audio uploaded": "音频已上传: {path}",
     "Inference Mode": "推理模式",
     "Standard Inference": "普通推理",
     "Fast Inference": "快速推理",
     "Inference Mode Help": "普通推理质量更高但速度较慢，快速推理速度更快但质量略低",
-    "Advanced Parameters": "🔧 高级参数",
+    "Advanced Parameters": "高级参数",
     "Sampling Temperature": "采样温度 (Temperature)",
     "Sampling Temperature Help": "控制随机性，值越高输出越随机，值越低越确定",
     "Top P Help": "nucleus 采样的概率阈值，值越小结果越确定",
@@ -499,9 +499,9 @@
     "Repetition Penalty Help": "值越大越能避免重复，但过大可能导致不自然",
     "Enable Sampling": "启用采样",
     "Enable Sampling Help": "启用采样可以获得更自然的语音",
-    "IndexTTS Usage Instructions Title": "💡 IndexTTS-1.5 使用说明",
+    "IndexTTS Usage Instructions Title": "IndexTTS-1.5 使用说明",
     "IndexTTS Usage Instructions": "**零样本语音克隆**\n\n1. **准备参考音频**：上传或指定一段清晰的音频文件（建议 3-10 秒）\n2. **设置 API 地址**：确保 IndexTTS-1.5 服务正常运行\n3. **开始合成**：系统会自动使用参考音频的音色合成新语音\n\n**注意事项**：\n- 参考音频质量直接影响合成效果\n- 建议使用无背景噪音的清晰音频\n- 文本长度建议控制在合理范围内\n- 首次合成可能需要较长时间",
-    "IndexTTS2 Emotion Parameters": "🎭 情感参数",
+    "IndexTTS2 Emotion Parameters": "情感参数",
     "Emotion Mode": "情感控制方式",
     "Emotion Mode Help": "选择 IndexTTS-2 的情感控制来源",
     "Emotion Mode Speaker": "与音色参考相同",
@@ -529,7 +529,7 @@
     "Max Text Tokens Per Segment Help": "IndexTTS-2 分段推理的最大文本 token 数",
     "Max Mel Tokens": "最大 Mel Tokens",
     "Max Mel Tokens Help": "控制单次生成的最大 mel token 数，值越大可生成更长音频",
-    "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 使用说明",
+    "IndexTTS2 Usage Instructions Title": "IndexTTS-2 使用说明",
     "IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**：复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**：例如 http://192.168.3.6:7863/tts，也可以填写服务根地址\n3. **调整情感参数**：默认使用 speaker，可按需切换到 audio、vector 或 text\n4. **调整生成参数**：temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**：\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型，耗时更长\n- CPU 部署生成速度会明显慢于 GPU",
     "OmniVoice Usage Instructions Title": "OmniVoice 使用说明",
     "OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**：只需要设置 API 地址和语言，可直接合成。\n2. **指令音色**：填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**：上传或选择参考音频，并填写该音频对应文本。\n\n**注意事项**：\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频，系统会按音频时长估算字幕段落",
@@ -545,7 +545,7 @@
     "Voice Pitch Help 0.5-1.5": "调节语音音高 (0.5-1.5)",
     "Sentence Silence Duration": "句尾静音时长 (秒)",
     "Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)",
-    "Doubao TTS API Key Application Process": "💡 豆包语音 TTS API Key申请流程",
+    "Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程",
     "Application Steps": "申请步骤",
     "Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
     "Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key",
@@ -553,15 +553,15 @@
     "Doubao TTS Step 4": "4. 点击立即使用",
     "Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成（注意：是语音合成，不是语音合成大模型）",
     "Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token",
-    "Doubao TTS Fill Credentials Notice": "💡 请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
-    "Doubao TTS configured": "✅ 豆包语音 TTS 配置已设置",
-    "Please configure missing fields": "⚠️ 请配置: {fields}",
-    "Preview Voice Synthesis": "🎵 试听语音合成",
+    "Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
+    "Doubao TTS configured": "豆包语音 TTS 配置已设置",
+    "Please configure missing fields": "请配置: {fields}",
+    "Preview Voice Synthesis": "试听语音合成",
     "Voice Preview Sample": "感谢关注 NarratoAI，有任何问题或建议，可以加入社区频道求助或讨论",
     "Please configure voice settings first": "请先配置语音设置",
-    "Voice synthesis successful": "✅ 语音合成成功！",
-    "Voice synthesis failed": "❌ 语音合成失败，请检查配置",
-    "SoulVoice pitch not supported": "ℹ️ SoulVoice 引擎不支持音调调节",
+    "Voice synthesis successful": "语音合成成功！",
+    "Voice synthesis failed": "语音合成失败，请检查配置",
+    "SoulVoice pitch not supported": "SoulVoice 引擎不支持音调调节",
     "上传字幕文件": "上传字幕",
     "清除已上传字幕": "清除已上传字幕",
     "无法读取字幕文件，请检查文件编码（支持 UTF-8、UTF-16、GBK、GB2312）": "无法读取字幕文件，请检查文件编码（支持 UTF-8、UTF-16、GBK、GB2312）",