Merge pull request #250 from linyqh/develop

Release: NarratoAI v0.8.x
2026-08-01 02:45:54 +00:00 · 2026-06-10 00:08:22 +08:00 · 2026-06-10 00:08:22 +08:00 · 2fd397e05a
commit 2fd397e05a
parent c0b72ec603 25ae35484f
68 changed files with 16708 additions and 1438 deletions
--- a/.gitignore
+++ b/.gitignore
@ -51,3 +51,6 @@ tests/*
 !tests/test_script_service_documentary_unittest.py
 !tests/test_generate_narration_script_documentary_unittest.py
 !tests/test_generate_script_docu_unittest.py
+
+docs/reddit-community
+docs/wechat-0.8
--- a/README.md
+++ b/README.md
@ -41,10 +41,11 @@ NarratoAI 是一款自动化影视解说工具，基于 LLM 实现文案撰写
 本项目仅供学习和研究使用，不得商用。如需商业授权，请联系作者。

 ## 最新资讯
+- 2026.06.10 发布新版本 0.8.1，**大版本更新**，优化多个核心流程
 - 2026.04.27 发布新版本 0.7.9，新增 **Fun-ASR一键转录字幕**
 - 2026.04.03 发布新版本 0.7.8，重构纪录片逐帧分析链路，统一共享服务并优化抽帧、缓存、视觉并发与文案生成流程
 - 2026.03.27 发布新版本 0.7.7，出于安全考虑，已移除 LiteLLM 依赖，统一使用 OpenAI 兼容请求链路
- 2025.11.20 发布新版本 0.7.5，新增 [IndexTTS2](https://github.com/index-tts/index-tts) 语音克隆支持
+- 2025.11.20 发布新版本 0.7.5，新增 [IndexTTS-1.5](https://github.com/index-tts/index-tts) 语音克隆支持
 - 2025.10.15 发布新版本 0.7.3，升级大模型供应商管理能力
 - 2025.09.10 发布新版本 0.7.2，新增腾讯云tts
 - 2025.08.18 发布新版本 0.7.1，支持 **语音克隆** 和 最新大模型
@ -100,7 +101,7 @@ _**1. NarratoAI 是一款完全免费的软件，近期在社交媒体(抖音,B
 - [X] 支持短剧解说
 - [ ] 主角人脸匹配
 - [ ] 支持根据口播，文案，视频素材自动匹配
- [ ] 支持更多 TTS 引擎
+- [X] 支持更多 TTS 引擎
 - [ ] ...

 ## 快速启动 🚀
--- a/app/config/config.py
+++ b/app/config/config.py
@ -9,6 +9,56 @@ from app.config.defaults import build_default_app_config, merge_missing_app_defa
 root_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.realpath(__file__))))
 config_file = f"{root_dir}/config.toml"
 version_file = f"{root_dir}/project_version"
+INDEXTTS_ENGINE = "indextts"
+INDEXTTS_DISPLAY_NAME = "IndexTTS-1.5"
+INDEXTTS2_ENGINE = "indextts2"
+INDEXTTS2_DISPLAY_NAME = "IndexTTS-2"
+OMNIVOICE_ENGINE = "omnivoice"
+OMNIVOICE_DISPLAY_NAME = "OmniVoice"
+INDEXTTS_VOICE_PREFIX = f"{INDEXTTS_ENGINE}:"
+INDEXTTS2_VOICE_PREFIX = f"{INDEXTTS2_ENGINE}:"
+OMNIVOICE_VOICE_PREFIX = f"{OMNIVOICE_ENGINE}:"
+
+
+def normalize_tts_engine_name(tts_engine: str) -> str:
+    return tts_engine
+
+
+def normalize_indextts_voice_prefix(voice_name: str) -> str:
+    return voice_name
+
+
+def _is_legacy_indextts2_config(indextts2_config) -> bool:
+    if not isinstance(indextts2_config, dict):
+        return False
+    api_url = str(indextts2_config.get("api_url", ""))
+    has_indextts2_fields = any(
+        key in indextts2_config
+        for key in (
+            "emotion_mode",
+            "emotion_alpha",
+            "max_text_tokens_per_segment",
+            "max_mel_tokens",
+            "vec_calm",
+        )
+    )
+    return "8081" in api_url and not has_indextts2_fields
+
+
+def migrate_indextts_config(config_data):
+    migrated_legacy_indextts2 = _is_legacy_indextts2_config(config_data.get(INDEXTTS2_ENGINE))
+    if migrated_legacy_indextts2:
+        if "indextts" not in config_data:
+            config_data["indextts"] = config_data[INDEXTTS2_ENGINE]
+        config_data.pop(INDEXTTS2_ENGINE, None)
+
+    ui_config = config_data.get("ui")
+    if isinstance(ui_config, dict):
+        if migrated_legacy_indextts2 and ui_config.get("tts_engine") == INDEXTTS2_ENGINE:
+            ui_config["tts_engine"] = INDEXTTS_ENGINE
+        if ui_config.get("voice_name", "").startswith(INDEXTTS2_VOICE_PREFIX) and ui_config.get("tts_engine") == INDEXTTS_ENGINE:
+            ui_config["voice_name"] = f"{INDEXTTS_VOICE_PREFIX}{ui_config['voice_name'][len(INDEXTTS2_VOICE_PREFIX):]}"
+    return config_data


 def get_version_from_file():
@ -32,13 +82,13 @@ def load_config():
        _config_ = build_default_config()
        write_config_file(_config_)
        logger.info("create config.toml with shared defaults")
-        return _config_
+        return migrate_indextts_config(_config_)

    logger.info(f"load config from file: {config_file}")

    _config_ = load_toml_file(config_file)
    _config_["app"] = merge_missing_app_defaults(_config_.get("app", {}))
-    return _config_
+    return migrate_indextts_config(_config_)


 def load_toml_file(file_path):
@ -60,7 +110,7 @@ def build_default_config():
        config_data = load_toml_file(example_file)

    config_data["app"] = build_default_app_config(config_data.get("app", {}))
-    return config_data
+    return migrate_indextts_config(config_data)


 def write_config_file(config_data):
@ -82,7 +132,9 @@ def save_config():
        _cfg["ui"] = ui
        _cfg["tts_qwen"] = tts_qwen
        _cfg["fun_asr"] = fun_asr
+        _cfg["indextts"] = indextts
        _cfg["indextts2"] = indextts2
+        _cfg["omnivoice"] = omnivoice
        _cfg["doubaotts"] = doubaotts
        f.write(toml.dumps(_cfg))

@ -98,7 +150,9 @@ ui = _cfg.get("ui", {})
 frames = _cfg.get("frames", {})
 tts_qwen = _cfg.get("tts_qwen", {})
 fun_asr = _cfg.get("fun_asr", {})
+indextts = _cfg.get("indextts", {})
 indextts2 = _cfg.get("indextts2", {})
+omnivoice = _cfg.get("omnivoice", {})
 doubaotts = _cfg.get("doubaotts", {})

 hostname = socket.gethostname()
@ -119,8 +173,43 @@ imagemagick_path = app.get("imagemagick_path", "")
 if imagemagick_path and os.path.isfile(imagemagick_path):
    os.environ["IMAGEMAGICK_BINARY"] = imagemagick_path

+_applied_ffmpeg_dir = None
+
+
+def apply_ffmpeg_path(ffmpeg_binary: str = "") -> None:
+    """Apply the configured FFmpeg binary to this Python process."""
+    global _applied_ffmpeg_dir
+
+    if not ffmpeg_binary or not os.path.isfile(ffmpeg_binary):
+        return
+
+    ffmpeg_binary = os.path.abspath(os.path.expanduser(ffmpeg_binary))
+    ffmpeg_dir = os.path.dirname(ffmpeg_binary)
+    os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_binary
+
+    current_paths = os.environ.get("PATH", "").split(os.pathsep)
+    normalized_ffmpeg_dir = os.path.normcase(os.path.abspath(ffmpeg_dir))
+    normalized_previous_dir = (
+        os.path.normcase(os.path.abspath(_applied_ffmpeg_dir))
+        if _applied_ffmpeg_dir
+        else None
+    )
+    filtered_paths = []
+    for path_item in current_paths:
+        if not path_item:
+            continue
+        normalized_item = os.path.normcase(os.path.abspath(path_item))
+        if normalized_item == normalized_ffmpeg_dir:
+            continue
+        if normalized_previous_dir and normalized_item == normalized_previous_dir:
+            continue
+        filtered_paths.append(path_item)
+
+    os.environ["PATH"] = os.pathsep.join([ffmpeg_dir, *filtered_paths])
+    _applied_ffmpeg_dir = ffmpeg_dir
+
+
 ffmpeg_path = app.get("ffmpeg_path", "")
-if ffmpeg_path and os.path.isfile(ffmpeg_path):
-    os.environ["IMAGEIO_FFMPEG_EXE"] = ffmpeg_path
+apply_ffmpeg_path(ffmpeg_path)

 logger.info(f"{project_name} v{project_version}")
--- a/app/config/defaults.py
+++ b/app/config/defaults.py
@ -11,6 +11,21 @@ DEFAULT_VISION_OPENAI_MODEL_NAME = "Qwen/Qwen3.5-122B-A10B"
 DEFAULT_TEXT_LLM_PROVIDER = DEFAULT_OPENAI_COMPATIBLE_PROVIDER
 DEFAULT_TEXT_OPENAI_MODEL_NAME = "Pro/zai-org/GLM-5"

+DEFAULT_LLM_GENERATION_CONFIG = {
+    "temperature": 1.0,
+    "top_p": 0.95,
+    "max_tokens": 65536,
+    "thinking_level": "auto",
+}
+
+DEFAULT_LLM_THINKING_LEVELS = ["auto", "off", "low", "medium", "high"]
+
+DEFAULT_LLM_GENERATION_APP_CONFIG = {
+    f"{model_type}_openai_{param_name}": value
+    for model_type in ("vision", "text")
+    for param_name, value in DEFAULT_LLM_GENERATION_CONFIG.items()
+}
+
 DEFAULT_LLM_APP_CONFIG = {
    "vision_llm_provider": DEFAULT_VISION_LLM_PROVIDER,
    "vision_openai_model_name": DEFAULT_VISION_OPENAI_MODEL_NAME,
@ -20,7 +35,11 @@ DEFAULT_LLM_APP_CONFIG = {
    "text_openai_model_name": DEFAULT_TEXT_OPENAI_MODEL_NAME,
    "text_openai_api_key": "",
    "text_openai_base_url": DEFAULT_OPENAI_COMPATIBLE_BASE_URL,
+    "tavily_api_key": "",
+    "tavily_search_depth": "basic",
+    "tavily_max_results": 5,
 }
+DEFAULT_LLM_APP_CONFIG.update(DEFAULT_LLM_GENERATION_APP_CONFIG)


 def build_default_app_config(app_config: dict | None = None) -> dict:
--- a/app/config/test_config_bootstrap_unittest.py
+++ b/app/config/test_config_bootstrap_unittest.py
@ -53,13 +53,53 @@ hide_config = true
        self.assertEqual("openai", config_data["app"]["vision_llm_provider"])
        self.assertEqual("Qwen/Qwen3.5-122B-A10B", config_data["app"]["vision_openai_model_name"])
        self.assertEqual("https://api.siliconflow.cn/v1", config_data["app"]["vision_openai_base_url"])
+        self.assertEqual(1.0, config_data["app"]["vision_openai_temperature"])
+        self.assertEqual(0.95, config_data["app"]["vision_openai_top_p"])
        self.assertEqual("openai", config_data["app"]["text_llm_provider"])
        self.assertEqual("Pro/zai-org/GLM-5", config_data["app"]["text_openai_model_name"])
        self.assertEqual("https://api.siliconflow.cn/v1", config_data["app"]["text_openai_base_url"])
+        self.assertEqual(1.0, config_data["app"]["text_openai_temperature"])
+        self.assertEqual(0.95, config_data["app"]["text_openai_top_p"])
        self.assertEqual("Qwen/Qwen3.5-122B-A10B", saved_config["app"]["vision_openai_model_name"])
        self.assertEqual("Pro/zai-org/GLM-5", saved_config["app"]["text_openai_model_name"])
        self.assertTrue(saved_config["app"]["hide_config"])

+    def test_legacy_indextts2_config_is_migrated_to_indextts_15(self):
+        migrated = cfg.migrate_indextts_config(
+            {
+                "indextts2": {"api_url": "http://127.0.0.1:8081/tts"},
+                "ui": {
+                    "tts_engine": "indextts2",
+                    "voice_name": "indextts2:/tmp/reference.wav",
+                },
+            }
+        )
+
+        self.assertEqual("http://127.0.0.1:8081/tts", migrated["indextts"]["api_url"])
+        self.assertNotIn("indextts2", migrated)
+        self.assertEqual("indextts", migrated["ui"]["tts_engine"])
+        self.assertEqual("indextts:/tmp/reference.wav", migrated["ui"]["voice_name"])
+
+    def test_indextts2_config_is_kept_as_separate_engine(self):
+        migrated = cfg.migrate_indextts_config(
+            {
+                "indextts": {"api_url": "http://127.0.0.1:8081/tts"},
+                "indextts2": {
+                    "api_url": "http://192.168.3.6:7863/tts",
+                    "emotion_mode": "speaker",
+                },
+                "ui": {
+                    "tts_engine": "indextts2",
+                    "voice_name": "indextts2:/tmp/reference.wav",
+                },
+            }
+        )
+
+        self.assertEqual("http://127.0.0.1:8081/tts", migrated["indextts"]["api_url"])
+        self.assertEqual("http://192.168.3.6:7863/tts", migrated["indextts2"]["api_url"])
+        self.assertEqual("indextts2", migrated["ui"]["tts_engine"])
+        self.assertEqual("indextts2:/tmp/reference.wav", migrated["ui"]["voice_name"])
+

 class OpenAICompatibleModelDefaultsTests(unittest.TestCase):
    def test_ui_keeps_full_model_name_and_openai_provider(self):
--- a/app/models/schema.py
+++ b/app/models/schema.py
@ -164,6 +164,9 @@ class VideoClipParams(BaseModel):
    video_clip_json: Optional[list] = Field(default=[], description="LLM 生成的视频剪辑脚本内容")
    video_clip_json_path: Optional[str] = Field(default="", description="LLM 生成的视频剪辑脚本路径")
    video_origin_path: Optional[str] = Field(default="", description="原视频路径")
+    video_origin_paths: Optional[List[str]] = Field(default=[], description="原视频路径列表")
+    original_subtitle_path: Optional[str] = Field(default="", description="原视频字幕路径")
+    original_subtitle_paths: Optional[List[str]] = Field(default=[], description="原视频字幕路径列表")
    video_aspect: Optional[VideoAspect] = Field(default=VideoAspect.portrait.value, description="视频比例")
    video_language: Optional[str] = Field(default="zh-CN", description="视频语言")

@ -182,6 +185,28 @@ class VideoClipParams(BaseModel):
    bgm_file: Optional[str] = Field(default="", description="背景音乐文件")

    subtitle_enabled: bool = True
+    subtitle_mask_enabled: bool = False
+    subtitle_mask_landscape_x_percent: float = 10.0
+    subtitle_mask_landscape_y_percent: float = 78.0
+    subtitle_mask_landscape_width_percent: float = 80.0
+    subtitle_mask_landscape_height_percent: float = 14.0
+    subtitle_mask_landscape_blur_radius: int = 18
+    subtitle_mask_landscape_opacity_percent: int = 82
+    subtitle_mask_portrait_x_percent: float = 8.0
+    subtitle_mask_portrait_y_percent: float = 79.0
+    subtitle_mask_portrait_width_percent: float = 84.0
+    subtitle_mask_portrait_height_percent: float = 16.0
+    subtitle_mask_portrait_blur_radius: int = 26
+    subtitle_mask_portrait_opacity_percent: int = 84
+    subtitle_position_landscape_y_percent: float = 85.0
+    subtitle_position_portrait_y_percent: float = 82.0
+    subtitle_auto_transcribe_enabled: bool = False
+    subtitle_auto_transcribe_backend: str = "local"
+    subtitle_auto_transcribe_api_url: str = ""
+    subtitle_auto_transcribe_firered_api_url: str = ""
+    subtitle_auto_transcribe_api_key: str = ""
+    subtitle_auto_transcribe_hotword: str = ""
+    subtitle_auto_transcribe_enable_spk: bool = False
    font_name: str = "SimHei"  # 默认使用黑体
    font_size: int = 36
    text_fore_color: str = "white"              # 文本前景色
@ -206,4 +231,3 @@ class SubtitlePosition(str, Enum):
    TOP = "top"
    CENTER = "center"
    BOTTOM = "bottom"
-
--- a/app/services/SDE/short_drama_explanation.py
+++ b/app/services/SDE/short_drama_explanation.py
@ -11,7 +11,7 @@
 import os
 import json
 import requests
-from typing import Dict, Any, Optional
+from typing import Dict, Any, Optional, Tuple
 from loguru import logger
 from app.config import config
 from app.utils.utils import get_uuid, storage_dir
@ -31,6 +31,7 @@ class SubtitleAnalyzer:
        custom_prompt: Optional[str] = None,
        temperature: Optional[float] = 1.0,
        provider: Optional[str] = None,
+        prompt_category: str = "short_drama_narration",
    ):
        """
        初始化字幕分析器
@ -49,6 +50,7 @@ class SubtitleAnalyzer:
        self.base_url = base_url
        self.temperature = temperature
        self.provider = provider or self._detect_provider()
+        self.prompt_category = prompt_category or "short_drama_narration"

        # 设置自定义提示词（如果提供）
        self.custom_prompt = custom_prompt
@ -94,7 +96,7 @@ class SubtitleAnalyzer:
            else:
                # 使用新的提示词管理系统，正确传入参数
                prompt = PromptManager.get_prompt(
-                    category="short_drama_narration",
+                    category=self.prompt_category,
                    name="plot_analysis",
                    parameters={"subtitle_content": subtitle_content}
                )
@ -363,7 +365,179 @@ class SubtitleAnalyzer:
            logger.error(f"保存分析结果时发生错误: {str(e)}")
            return ""

-    def generate_narration_script(self, short_name: str, plot_analysis: str, subtitle_content: str = "", temperature: float = 0.7) -> Dict[str, Any]:
+    def _render_prompt(self, name: str, parameters: Dict[str, Any]) -> Tuple[str, Optional[str]]:
+        prompt = PromptManager.get_prompt(
+            category=self.prompt_category,
+            name=name,
+            parameters=parameters,
+        )
+        prompt_object = PromptManager.get_prompt_object(
+            category=self.prompt_category,
+            name=name,
+        )
+        return prompt, prompt_object.get_system_prompt()
+
+    def _generate_json_text(
+        self,
+        prompt: str,
+        system_prompt: Optional[str],
+        temperature: float,
+    ) -> Dict[str, Any]:
+        if self.is_native_gemini:
+            return self._generate_narration_with_native_gemini(prompt, temperature, system_prompt, json_output=True)
+        return self._generate_narration_with_openai_compatible(prompt, temperature, system_prompt, json_output=True)
+
+    def _generate_plain_text(
+        self,
+        prompt: str,
+        system_prompt: Optional[str],
+        temperature: float,
+    ) -> Dict[str, Any]:
+        if self.is_native_gemini:
+            result = self._generate_narration_with_native_gemini(prompt, temperature, system_prompt, json_output=False)
+        else:
+            result = self._generate_narration_with_openai_compatible(prompt, temperature, system_prompt, json_output=False)
+        if result.get("status") == "success":
+            result["narration_copy"] = str(result.get("narration_script", "")).strip()
+        return result
+
+    def generate_narration_copy(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str = "",
+        temperature: float = 0.7,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> Dict[str, Any]:
+        """生成供用户审核修改的解说正文。"""
+        try:
+            prompt, system_prompt = self._render_prompt(
+                "narration_copy",
+                {
+                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
+                    "plot_analysis": plot_analysis,
+                    "subtitle_content": subtitle_content,
+                    "narration_language": narration_language,
+                },
+            )
+            return self._generate_plain_text(prompt, system_prompt, temperature)
+        except Exception as e:
+            logger.error(f"解说文案正文生成过程中发生错误: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": temperature,
+            }
+
+    def match_narration_copy_to_script(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str,
+        narration_copy: str,
+        temperature: float = 0.3,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+        original_sound_ratio: int = 30,
+    ) -> Dict[str, Any]:
+        """将用户审核后的解说正文匹配到字幕时间戳。"""
+        try:
+            prompt, system_prompt = self._render_prompt(
+                "script_matching",
+                {
+                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
+                    "plot_analysis": plot_analysis,
+                    "subtitle_content": subtitle_content,
+                    "narration_copy": narration_copy,
+                    "narration_language": narration_language,
+                    "original_sound_ratio": int(original_sound_ratio),
+                },
+            )
+            return self._generate_json_text(prompt, system_prompt, min(float(temperature), 0.3))
+        except Exception as e:
+            logger.error(f"解说文案画面匹配过程中发生错误: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": temperature,
+            }
+
+    def plan_narration_segments(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str = "",
+        temperature: float = 0.3,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> Dict[str, Any]:
+        """规划短剧解说片段，只输出片段来源和意图。"""
+        try:
+            prompt, system_prompt = self._render_prompt(
+                "segment_planning",
+                {
+                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
+                    "plot_analysis": plot_analysis,
+                    "subtitle_content": subtitle_content,
+                    "narration_language": narration_language,
+                },
+            )
+            return self._generate_json_text(prompt, system_prompt, min(float(temperature), 0.3))
+        except Exception as e:
+            logger.error(f"片段规划过程中发生错误: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": temperature,
+            }
+
+    def repair_narration_script(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str,
+        invalid_script: str,
+        validation_errors: str,
+        temperature: float = 0.3,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> Dict[str, Any]:
+        """根据确定性校验错误修复解说脚本。"""
+        try:
+            prompt, system_prompt = self._render_prompt(
+                "script_repair",
+                {
+                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
+                    "plot_analysis": plot_analysis,
+                    "subtitle_content": subtitle_content,
+                    "invalid_script": invalid_script,
+                    "validation_errors": validation_errors,
+                    "narration_language": narration_language,
+                },
+            )
+            return self._generate_json_text(prompt, system_prompt, min(float(temperature), 0.3))
+        except Exception as e:
+            logger.error(f"解说文案修复过程中发生错误: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": temperature,
+            }
+
+    def generate_narration_script(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str = "",
+        temperature: float = 0.7,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> Dict[str, Any]:
        """
        根据剧情分析生成解说文案

@ -372,28 +546,36 @@ class SubtitleAnalyzer:
            plot_analysis: 剧情分析内容
            subtitle_content: 原始字幕内容，用于提供准确的时间戳信息
            temperature: 生成温度，控制创造性，默认0.7
+            narration_language: 解说台词目标语言

        Returns:
            Dict[str, Any]: 包含生成结果的字典
        """
        try:
-            # 使用新的提示词管理系统构建提示词
-            prompt = PromptManager.get_prompt(
-                category="short_drama_narration",
-                name="script_generation",
-                parameters={
+            segment_plan_result = self.plan_narration_segments(
+                short_name=short_name,
+                plot_analysis=plot_analysis,
+                subtitle_content=subtitle_content,
+                temperature=temperature,
+                narration_language=narration_language,
+                drama_genre=drama_genre,
+            )
+            if segment_plan_result["status"] != "success":
+                return segment_plan_result
+
+            prompt, system_prompt = self._render_prompt(
+                "script_generation",
+                {
                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
                    "plot_analysis": plot_analysis,
-                    "subtitle_content": subtitle_content
-                }
+                    "subtitle_content": subtitle_content,
+                    "segment_plan": segment_plan_result["narration_script"],
+                    "narration_language": narration_language,
+                },
            )

-            if self.is_native_gemini:
-                # 使用原生Gemini API格式
-                return self._generate_narration_with_native_gemini(prompt, temperature)
-            else:
-                # 使用OpenAI兼容格式
-                return self._generate_narration_with_openai_compatible(prompt, temperature)
+            return self._generate_json_text(prompt, system_prompt, temperature)

        except Exception as e:
            logger.error(f"解说文案生成过程中发生错误: {str(e)}")
@ -403,16 +585,35 @@ class SubtitleAnalyzer:
                "temperature": self.temperature
            }

-    def _generate_narration_with_native_gemini(self, prompt: str, temperature: float) -> Dict[str, Any]:
+    def _generate_narration_with_native_gemini(
+        self,
+        prompt: str,
+        temperature: float,
+        system_prompt: Optional[str] = None,
+        json_output: bool = True,
+    ) -> Dict[str, Any]:
        """使用原生Gemini API生成解说文案"""
        try:
            # 构建原生Gemini API请求数据
            # 为了确保JSON输出，在提示词中添加更强的约束
-            enhanced_prompt = f"{prompt}\n\n请确保输出严格的JSON格式，不要包含任何其他文字或标记。"
+            enhanced_prompt = (
+                f"{prompt}\n\n请确保输出严格的JSON格式，不要包含任何其他文字或标记。"
+                if json_output
+                else prompt
+            )

            payload = {
                "systemInstruction": {
-                    "parts": [{"text": "你是一位专业的短视频解说脚本撰写专家。你必须严格按照JSON格式输出，不能包含任何其他文字、说明或代码块标记。"}]
+                    "parts": [
+                        {
+                            "text": system_prompt
+                            or (
+                                "你必须严格按照JSON格式输出，不能包含任何其他文字、说明或代码块标记。"
+                                if json_output
+                                else "你是一位专业的短剧解说文案助手。"
+                            )
+                        }
+                    ]
                },
                "contents": [{
                    "parts": [{"text": enhanced_prompt}]
@ -423,7 +624,6 @@ class SubtitleAnalyzer:
                    "topP": 0.95,
                    "maxOutputTokens": 64000,
                    "candidateCount": 1,
-                    "stopSequences": ["```", "注意", "说明"]
                },
                "safetySettings": [
                    {
@ -444,6 +644,8 @@ class SubtitleAnalyzer:
                    }
                ]
            }
+            if json_output:
+                payload["generationConfig"]["stopSequences"] = ["```", "注意", "说明"]

            # 构建请求URL
            url = f"{self.base_url}/models/{self.model}:generateContent"
@ -523,21 +725,27 @@ class SubtitleAnalyzer:
                "temperature": temperature
            }

-    def _generate_narration_with_openai_compatible(self, prompt: str, temperature: float) -> Dict[str, Any]:
+    def _generate_narration_with_openai_compatible(
+        self,
+        prompt: str,
+        temperature: float,
+        system_prompt: Optional[str] = None,
+        json_output: bool = True,
+    ) -> Dict[str, Any]:
        """使用OpenAI兼容API生成解说文案"""
        try:
            # 构建OpenAI格式的请求数据
            payload = {
                "model": self.model,
                "messages": [
-                    {"role": "system", "content": "你是一位专业的短视频解说脚本撰写专家。"},
+                    {"role": "system", "content": system_prompt or ("你必须严格按照JSON格式输出。" if json_output else "你是一位专业的短剧解说文案助手。")},
                    {"role": "user", "content": prompt}
                ],
                "temperature": temperature
            }

            # 对特定模型添加响应格式设置
-            if self.model not in ["deepseek-reasoner"]:
+            if json_output and self.model not in ["deepseek-reasoner"]:
                payload["response_format"] = {"type": "json_object"}

            # 构建请求地址
@ -632,7 +840,8 @@ def analyze_subtitle(
        temperature: float = 1.0,
        save_result: bool = False,
        output_path: Optional[str] = None,
-        provider: Optional[str] = None
+        provider: Optional[str] = None,
+        prompt_category: str = "short_drama_narration",
 ) -> Dict[str, Any]:
    """
    分析字幕内容的便捷函数
@ -659,7 +868,8 @@ def analyze_subtitle(
        model=model,
        base_url=base_url,
        custom_prompt=custom_prompt,
-        provider=provider
+        provider=provider,
+        prompt_category=prompt_category,
    )
    logger.debug(f"使用模型: {analyzer.model} 开始分析, 温度: {analyzer.temperature}")
    # 分析字幕
@ -691,7 +901,10 @@ def generate_narration_script(
    temperature: float = 1.0,
    save_result: bool = False,
    output_path: Optional[str] = None,
-    provider: Optional[str] = None
+    provider: Optional[str] = None,
+    narration_language: str = "简体中文（中国）",
+    drama_genre: str = "逆袭/复仇",
+    prompt_category: str = "short_drama_narration",
 ) -> Dict[str, Any]:
    """
    根据剧情分析生成解说文案的便捷函数
@ -707,6 +920,7 @@ def generate_narration_script(
        save_result: 是否保存结果到文件
        output_path: 输出文件路径
        provider: 提供商类型
+        narration_language: 解说台词目标语言

    Returns:
        Dict[str, Any]: 包含生成结果的字典
@ -717,11 +931,19 @@ def generate_narration_script(
        api_key=api_key,
        model=model,
        base_url=base_url,
-        provider=provider
+        provider=provider,
+        prompt_category=prompt_category,
    )
    
    # 生成解说文案
-    result = analyzer.generate_narration_script(short_name, plot_analysis, subtitle_content or "", temperature)
+    result = analyzer.generate_narration_script(
+        short_name,
+        plot_analysis,
+        subtitle_content or "",
+        temperature,
+        narration_language,
+        drama_genre,
+    )
    
    # 保存结果
    if save_result and result["status"] == "success":
@ -730,6 +952,113 @@ def generate_narration_script(
    return result


+def generate_narration_copy(
+    short_name: str = None,
+    plot_analysis: str = None,
+    subtitle_content: str = None,
+    api_key: Optional[str] = None,
+    model: Optional[str] = None,
+    base_url: Optional[str] = None,
+    temperature: float = 0.7,
+    provider: Optional[str] = None,
+    narration_language: str = "简体中文（中国）",
+    drama_genre: str = "逆袭/复仇",
+    prompt_category: str = "short_drama_narration",
+) -> Dict[str, Any]:
+    """生成可供用户审核修改的解说正文。"""
+    analyzer = SubtitleAnalyzer(
+        temperature=temperature,
+        api_key=api_key,
+        model=model,
+        base_url=base_url,
+        provider=provider,
+        prompt_category=prompt_category,
+    )
+
+    return analyzer.generate_narration_copy(
+        short_name=short_name,
+        plot_analysis=plot_analysis or "",
+        subtitle_content=subtitle_content or "",
+        temperature=temperature,
+        narration_language=narration_language,
+        drama_genre=drama_genre,
+    )
+
+
+def match_narration_copy_to_script(
+    short_name: str = None,
+    plot_analysis: str = None,
+    subtitle_content: str = None,
+    narration_copy: str = None,
+    api_key: Optional[str] = None,
+    model: Optional[str] = None,
+    base_url: Optional[str] = None,
+    temperature: float = 0.3,
+    provider: Optional[str] = None,
+    narration_language: str = "简体中文（中国）",
+    drama_genre: str = "逆袭/复仇",
+    original_sound_ratio: int = 30,
+    prompt_category: str = "short_drama_narration",
+) -> Dict[str, Any]:
+    """将用户审核后的解说正文匹配到字幕时间戳。"""
+    analyzer = SubtitleAnalyzer(
+        temperature=temperature,
+        api_key=api_key,
+        model=model,
+        base_url=base_url,
+        provider=provider,
+        prompt_category=prompt_category,
+    )
+
+    return analyzer.match_narration_copy_to_script(
+        short_name=short_name,
+        plot_analysis=plot_analysis or "",
+        subtitle_content=subtitle_content or "",
+        narration_copy=narration_copy or "",
+        temperature=temperature,
+        narration_language=narration_language,
+        drama_genre=drama_genre,
+        original_sound_ratio=original_sound_ratio,
+    )
+
+
+def repair_narration_script(
+    short_name: str = None,
+    plot_analysis: str = None,
+    subtitle_content: str = None,
+    invalid_script: str = None,
+    validation_errors: str = None,
+    api_key: Optional[str] = None,
+    model: Optional[str] = None,
+    base_url: Optional[str] = None,
+    temperature: float = 0.3,
+    provider: Optional[str] = None,
+    narration_language: str = "简体中文（中国）",
+    drama_genre: str = "逆袭/复仇",
+    prompt_category: str = "short_drama_narration",
+) -> Dict[str, Any]:
+    """根据校验错误修复解说文案的便捷函数。"""
+    analyzer = SubtitleAnalyzer(
+        temperature=temperature,
+        api_key=api_key,
+        model=model,
+        base_url=base_url,
+        provider=provider,
+        prompt_category=prompt_category,
+    )
+
+    return analyzer.repair_narration_script(
+        short_name=short_name,
+        plot_analysis=plot_analysis or "",
+        subtitle_content=subtitle_content or "",
+        invalid_script=invalid_script or "",
+        validation_errors=validation_errors or "",
+        temperature=temperature,
+        narration_language=narration_language,
+        drama_genre=drama_genre,
+    )
+
+
 if __name__ == '__main__':
    text_api_key = "skxxxx"
    text_model = "gemini-2.0-flash"
--- a/app/services/clip_video.py
+++ b/app/services/clip_video.py
@ -32,6 +32,108 @@ def parse_timestamp(timestamp: str) -> tuple:
    return start_time, end_time


+def _ffmpeg_time_to_seconds(time_value: str) -> float:
+    normalized_time = str(time_value).strip().replace(",", ".")
+    parts = normalized_time.split(":")
+
+    if len(parts) == 3:
+        hours, minutes, seconds = parts
+        return int(hours) * 3600 + int(minutes) * 60 + float(seconds)
+    if len(parts) == 2:
+        minutes, seconds = parts
+        return int(minutes) * 60 + float(seconds)
+    return float(normalized_time)
+
+
+def _calculate_ffmpeg_duration(start_time: str, end_time: str) -> str:
+    duration = _ffmpeg_time_to_seconds(end_time) - _ffmpeg_time_to_seconds(start_time)
+    if duration <= 0:
+        raise ValueError(f"无效的视频裁剪时间范围: {start_time} -> {end_time}")
+
+    return f"{duration:.3f}".rstrip("0").rstrip(".")
+
+
+def _append_fast_seek_input(cmd: List[str], input_path: str, start_time: str, end_time: str) -> None:
+    duration = _calculate_ffmpeg_duration(start_time, end_time)
+    cmd.extend(["-ss", start_time, "-i", input_path, "-t", duration])
+
+
+def _normalize_video_origin_paths(
+    video_origin_path: str,
+    video_origin_paths: Optional[List[str]] = None,
+) -> List[str]:
+    paths = []
+    if video_origin_paths:
+        paths.extend(video_origin_paths)
+    if video_origin_path:
+        paths.insert(0, video_origin_path)
+
+    normalized_paths = []
+    seen = set()
+    for item in paths:
+        if not isinstance(item, str):
+            continue
+        item = item.strip()
+        if not item or item in seen:
+            continue
+        normalized_paths.append(item)
+        seen.add(item)
+    return normalized_paths
+
+
+def _coerce_video_id(value) -> Optional[int]:
+    try:
+        video_id = int(value)
+    except (TypeError, ValueError):
+        return None
+    return video_id if video_id > 0 else None
+
+
+def _match_video_id_by_name(video_name: str, video_origin_paths: List[str]) -> Optional[int]:
+    video_name = str(video_name or "").strip()
+    if not video_name:
+        return None
+
+    expected_name = os.path.basename(video_name)
+    for index, video_path in enumerate(video_origin_paths, start=1):
+        if os.path.basename(video_path) == expected_name:
+            return index
+    return None
+
+
+def _resolve_script_video_path(script_item: Dict, video_origin_paths: List[str]) -> str:
+    explicit_path = (
+        script_item.get("source_video_path")
+        or script_item.get("video_origin_path")
+        or script_item.get("origin_video_path")
+    )
+    if explicit_path and os.path.exists(explicit_path):
+        return explicit_path
+
+    video_id = _coerce_video_id(script_item.get("video_id") or script_item.get("video_index"))
+    matched_video_id = _match_video_id_by_name(
+        script_item.get("video_name") or script_item.get("source_video"),
+        video_origin_paths,
+    )
+    if matched_video_id:
+        video_id = matched_video_id
+
+    if video_id is not None:
+        if video_id <= len(video_origin_paths):
+            return video_origin_paths[video_id - 1]
+        logger.warning(
+            f"片段 {script_item.get('_id')} 的 video_id={video_id} 超出视频数量 "
+            f"{len(video_origin_paths)}，默认使用第一个视频"
+        )
+
+    return video_origin_paths[0]
+
+
+def _safe_output_id(value) -> str:
+    safe_value = str(value if value is not None else "unknown")
+    return "".join(char if char.isalnum() or char in ("-", "_") else "_" for char in safe_value)
+
+
 def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
    """
    根据开始时间和持续时间计算结束时间
@ -177,11 +279,8 @@ def build_ffmpeg_command(
        # 对于其他编码器，可以使用硬件解码参数
        cmd.extend(hwaccel_args)
    
-    # 输入文件
-    cmd.extend(["-i", input_path])
-    
-    # 时间范围
-    cmd.extend(["-ss", start_time, "-to", end_time])
+    # 快速定位输入文件，避免长视频从头解码到目标片段
+    _append_fast_seek_input(cmd, input_path, start_time, end_time)
    
    # 编码器设置
    cmd.extend(["-c:v", encoder_config["video_codec"]])
@ -363,11 +462,12 @@ def try_compatibility_fallback(
        bool: 是否成功
    """
    # 兼容性模式：避免所有可能的滤镜链问题
+    duration = _calculate_ffmpeg_duration(start_time, end_time)
    fallback_cmd = [
        "ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
-        "-i", input_path,
        "-ss", start_time,
-        "-to", end_time,
+        "-i", input_path,
+        "-t", duration,
        "-c:v", "libx264",
        "-c:a", "aac",
        "-pix_fmt", "yuv420p",  # 明确指定像素格式
@ -404,11 +504,12 @@ def try_software_fallback(
        bool: 是否成功
    """
    # 纯软件编码
+    duration = _calculate_ffmpeg_duration(start_time, end_time)
    fallback_cmd = [
        "ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
-        "-i", input_path,
        "-ss", start_time,
-        "-to", end_time,
+        "-i", input_path,
+        "-t", duration,
        "-c:v", "libx264",
        "-c:a", "aac",
        "-pix_fmt", "yuv420p",
@ -444,11 +545,12 @@ def try_basic_fallback(
        bool: 是否成功
    """
    # 最基本的编码参数
+    duration = _calculate_ffmpeg_duration(start_time, end_time)
    fallback_cmd = [
        "ffmpeg", "-y", "-hide_banner", "-loglevel", "error",
-        "-i", input_path,
        "-ss", start_time,
-        "-to", end_time,
+        "-i", input_path,
+        "-t", duration,
        "-c:v", "libx264",
        "-c:a", "aac",
        "-pix_fmt", "yuv420p",
@ -527,11 +629,12 @@ def try_fallback_encoding(
        bool: 是否成功
    """
    # 最简单的软件编码命令
+    duration = _calculate_ffmpeg_duration(start_time, end_time)
    fallback_cmd = [
        "ffmpeg", "-y",
-        "-i", input_path,
        "-ss", start_time,
-        "-to", end_time,
+        "-i", input_path,
+        "-t", duration,
        "-c:v", "libx264",
        "-c:a", "aac",
        "-pix_fmt", "yuv420p",
@ -579,7 +682,7 @@ def _process_narration_only_segment(
    # 生成输出文件名
    safe_start_time = start_time.replace(':', '-').replace(',', '-')
    safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost0_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost0_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
    output_path = os.path.join(output_dir, output_filename)

    # 构建FFmpeg命令 - 移除音频
@ -622,7 +725,7 @@ def _process_original_audio_segment(
    # 生成输出文件名
    safe_start_time = start_time.replace(':', '-').replace(',', '-')
    safe_end_time = end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost1_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost1_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
    output_path = os.path.join(output_dir, output_filename)

    # 构建FFmpeg命令 - 保持原声
@ -674,7 +777,7 @@ def _process_mixed_segment(
    # 生成输出文件名
    safe_start_time = start_time.replace(':', '-').replace(',', '-')
    safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost2_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost2_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
    output_path = os.path.join(output_dir, output_filename)

    # 构建FFmpeg命令 - 保持原声
@ -725,11 +828,8 @@ def _build_ffmpeg_command_with_audio_control(
    elif hwaccel_args:
        cmd.extend(hwaccel_args)

-    # 输入文件
-    cmd.extend(["-i", input_path])
-
-    # 时间范围
-    cmd.extend(["-ss", start_time, "-to", end_time])
+    # 快速定位输入文件，避免长视频从头解码到目标片段
+    _append_fast_seek_input(cmd, input_path, start_time, end_time)

    # 视频编码器设置
    cmd.extend(["-c:v", encoder_config["video_codec"]])
@ -782,28 +882,34 @@ def clip_video_unified(
        script_list: List[Dict],
        tts_results: List[Dict],
        output_dir: Optional[str] = None,
-        task_id: Optional[str] = None
+        task_id: Optional[str] = None,
+        video_origin_paths: Optional[List[str]] = None
 ) -> Dict[str, str]:
    """
    基于OST类型的统一视频裁剪策略 - 消除双重裁剪问题

    Args:
-        video_origin_path: 原始视频的路径
+        video_origin_path: 原始视频的路径；旧脚本或无 video_id 片段默认使用该视频
        script_list: 完整的脚本列表，包含所有片段信息
        tts_results: TTS结果列表，仅包含OST=0和OST=2的片段
        output_dir: 输出目录路径，默认为None时会自动生成
        task_id: 任务ID，用于生成唯一的输出目录，默认为None时会自动生成
+        video_origin_paths: 多个原始视频路径，脚本片段可用 video_id/video_name 指定来源

    Returns:
        Dict[str, str]: 片段ID到裁剪后视频路径的映射
    """
-    # 检查视频文件是否存在
-    if not os.path.exists(video_origin_path):
-        raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
+    video_source_paths = _normalize_video_origin_paths(video_origin_path, video_origin_paths)
+    if not video_source_paths:
+        raise FileNotFoundError("视频文件不存在: 未提供原始视频路径")
+
+    missing_video_paths = [item for item in video_source_paths if not os.path.exists(item)]
+    if missing_video_paths:
+        raise FileNotFoundError(f"视频文件不存在: {', '.join(missing_video_paths)}")

    # 如果未提供task_id，则根据输入生成一个唯一ID
    if task_id is None:
-        content_for_hash = f"{video_origin_path}_{json.dumps(script_list)}"
+        content_for_hash = f"{json.dumps(video_source_paths, ensure_ascii=False)}_{json.dumps(script_list, ensure_ascii=False)}"
        task_id = hashlib.md5(content_for_hash.encode()).hexdigest()

    # 设置输出目录
@ -840,29 +946,33 @@ def clip_video_unified(
    failed_clips = []
    success_count = 0

-    logger.info(f"📹 开始统一视频裁剪，总共{total_clips}个片段")
+    logger.info(f"📹 开始统一视频裁剪，总共{total_clips}个片段，源视频{len(video_source_paths)}个")

    for i, script_item in enumerate(script_list, 1):
        _id = script_item.get("_id")
        ost = script_item.get("OST", 0)
        timestamp = script_item["timestamp"]
+        source_video_path = _resolve_script_video_path(script_item, video_source_paths)

-        logger.info(f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, 时间戳:{timestamp}")
+        logger.info(
+            f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, "
+            f"视频:{os.path.basename(source_video_path)}, 时间戳:{timestamp}"
+        )

        try:
            if ost == 0:  # 纯解说片段
                output_path = _process_narration_only_segment(
-                    video_origin_path, script_item, tts_map, output_dir,
+                    source_video_path, script_item, tts_map, output_dir,
                    encoder_config, hwaccel_args
                )
            elif ost == 1:  # 纯原声片段
                output_path = _process_original_audio_segment(
-                    video_origin_path, script_item, output_dir,
+                    source_video_path, script_item, output_dir,
                    encoder_config, hwaccel_args
                )
            elif ost == 2:  # 解说+原声混合片段
                output_path = _process_mixed_segment(
-                    video_origin_path, script_item, tts_map, output_dir,
+                    source_video_path, script_item, tts_map, output_dir,
                    encoder_config, hwaccel_args
                )
            else:
--- a/app/services/fun_asr_subtitle.py
+++ b/app/services/fun_asr_subtitle.py
@ -1,15 +1,17 @@
-"""Aliyun Bailian Fun-ASR subtitle transcription helpers.
+"""Fun-ASR subtitle transcription helpers.

-This module intentionally uses the REST API because the official Fun-ASR
+The Bailian path intentionally uses the REST API because the official Fun-ASR
 recorded-file API supports temporary `oss://` resources only through REST.
 """

 from __future__ import annotations

 import os
+import shutil
 import time
 from dataclasses import dataclass
 from typing import Any, Optional
+from urllib.parse import urljoin, urlparse, urlunparse

 import requests
 from loguru import logger
@ -21,6 +23,8 @@ UPLOAD_POLICY_URL = f"{DASHSCOPE_BASE_URL}/api/v1/uploads"
 TRANSCRIPTION_URL = f"{DASHSCOPE_BASE_URL}/api/v1/services/audio/asr/transcription"
 TASK_URL_TEMPLATE = f"{DASHSCOPE_BASE_URL}/api/v1/tasks/{{task_id}}"
 MODEL_NAME = "fun-asr"
+LOCAL_FUN_ASR_API_URL = "http://127.0.0.1:7860"
+LOCAL_FIRERED_ASR_API_URL = "http://127.0.0.1:7867"
 TERMINAL_FAILED_STATUSES = {"FAILED", "CANCELED", "UNKNOWN"}
 PUNCTUATION_BREAKS = set("，。！？；,.!?;")

@ -89,6 +93,93 @@ def _session_post(session, url: str, **kwargs):
    return session.post(url, **kwargs)


+def _require_local_file(local_file: str) -> None:
+    if not os.path.isfile(local_file):
+        raise FunAsrError(f"待转写文件不存在: {local_file}")
+
+
+def _normalize_local_api_url(api_url: str = "") -> str:
+    api_url = (api_url or LOCAL_FUN_ASR_API_URL).strip().rstrip("/")
+    if not api_url:
+        raise FunAsrError("请先填写本地 FunASR-Pack API 地址")
+    if "://" not in api_url:
+        api_url = f"http://{api_url}"
+    return api_url
+
+
+def _local_base_url(api_url: str = "") -> str:
+    api_url = _normalize_local_api_url(api_url)
+    parsed = urlparse(api_url)
+    path = parsed.path.rstrip("/")
+    if path.endswith("/asr"):
+        path = path[:-4].rstrip("/")
+    return urlunparse(parsed._replace(path=path, params="", query="", fragment="")).rstrip("/")
+
+
+def _local_asr_url(api_url: str = "") -> str:
+    api_url = _normalize_local_api_url(api_url)
+    if urlparse(api_url).path.rstrip("/").endswith("/asr"):
+        return api_url
+    return f"{api_url}/asr"
+
+
+def _absolute_local_download_url(api_url: str, download_url: str) -> str:
+    download_url = (download_url or "").strip()
+    if not download_url:
+        return ""
+    if urlparse(download_url).scheme:
+        return download_url
+    return urljoin(f"{_local_base_url(api_url)}/", download_url)
+
+
+def _raise_for_local_http(
+    response: requests.Response,
+    action: str,
+    service_name: str = "本地 FunASR-Pack 服务",
+) -> None:
+    status_code = getattr(response, "status_code", 200)
+    if status_code and status_code >= 400:
+        detail = ""
+        try:
+            data = response.json()
+            if isinstance(data, dict):
+                detail = str(data.get("detail") or "")
+        except Exception:
+            detail = ""
+        suffix = f": {detail}" if detail else ""
+        raise FunAsrError(f"{action}失败{suffix}，请确认{service_name}可用")
+
+    try:
+        response.raise_for_status()
+    except Exception as exc:
+        raise FunAsrError(f"{action}失败，请确认{service_name}可用") from exc
+
+
+def _local_json(
+    response: requests.Response,
+    action: str,
+    service_name: str = "本地 FunASR-Pack 服务",
+) -> dict[str, Any]:
+    _raise_for_local_http(response, action, service_name=service_name)
+    try:
+        data = response.json()
+    except Exception as exc:
+        raise FunAsrError(f"{action}返回了无效 JSON") from exc
+    if not isinstance(data, dict):
+        raise FunAsrError(f"{action}返回格式无效")
+    return data
+
+
+def _response_text(response: requests.Response) -> str:
+    text = getattr(response, "text", None)
+    if isinstance(text, str):
+        return text
+    content = getattr(response, "content", b"")
+    if isinstance(content, bytes):
+        return content.decode("utf-8")
+    return str(content)
+
+
 def request_upload_policy(api_key: str, model: str = MODEL_NAME, session=requests) -> UploadPolicy:
    """Request Bailian temporary-storage upload policy for the target model."""
    api_key = _require_api_key(api_key)
@ -418,6 +509,357 @@ def write_srt_file(srt_content: str, subtitle_file: str = "") -> str:
    return subtitle_file


+def copy_srt_file(source_file: str, subtitle_file: str = "") -> str:
+    """Copy an existing SRT file into NarratoAI's subtitle directory."""
+    if not os.path.isfile(source_file):
+        raise FunAsrError(f"本地 FunASR-Pack 返回的字幕文件不存在: {source_file}")
+    if not subtitle_file:
+        subtitle_file = os.path.join(utils.subtitle_dir(), f"fun_asr_local_{int(time.time())}.srt")
+    parent = os.path.dirname(subtitle_file)
+    if parent:
+        os.makedirs(parent, exist_ok=True)
+    if os.path.abspath(source_file) != os.path.abspath(subtitle_file):
+        shutil.copyfile(source_file, subtitle_file)
+    return subtitle_file
+
+
+def request_local_fun_asr_health(api_url: str = LOCAL_FUN_ASR_API_URL, session=requests) -> dict[str, Any]:
+    """Fetch FunASR-Pack health metadata from the local service."""
+    response = _session_get(session, f"{_local_base_url(api_url)}/health", timeout=10)
+    return _local_json(response, "检查本地 FunASR-Pack 服务")
+
+
+def request_local_firered_asr_health(
+    api_url: str = LOCAL_FIRERED_ASR_API_URL,
+    session=requests,
+) -> dict[str, Any]:
+    """Fetch FireRedASR2-AED-Pack health metadata from the local service."""
+    response = _session_get(session, f"{_local_base_url(api_url)}/health", timeout=10)
+    return _local_json(
+        response,
+        "检查本地 FireRedASR2-AED-Pack 服务",
+        service_name="本地 FireRedASR2-AED-Pack 服务",
+    )
+
+
+def request_local_fun_asr(
+    local_file: str,
+    api_url: str = LOCAL_FUN_ASR_API_URL,
+    hotword: str = "",
+    enable_spk: Optional[bool] = None,
+    timeout: float = 600.0,
+    session=requests,
+) -> dict[str, Any]:
+    """Call the local FunASR-Pack `/asr` API and return its JSON result."""
+    _require_local_file(local_file)
+    data: dict[str, str] = {}
+    if hotword.strip():
+        data["hotword"] = hotword.strip()
+    if enable_spk is not None:
+        data["enable_spk"] = "true" if enable_spk else "false"
+
+    with open(local_file, "rb") as file_obj:
+        files = {"file": (_safe_upload_name(local_file), file_obj)}
+        response = _session_post(
+            session,
+            _local_asr_url(api_url),
+            data=data,
+            files=files,
+            timeout=timeout,
+        )
+    return _local_json(response, "调用本地 FunASR-Pack ASR API")
+
+
+def request_local_firered_asr(
+    local_file: str,
+    api_url: str = LOCAL_FIRERED_ASR_API_URL,
+    enable_vad: Optional[bool] = True,
+    enable_lid: Optional[bool] = True,
+    enable_punc: Optional[bool] = True,
+    return_timestamp: Optional[bool] = True,
+    timeout: float = 600.0,
+    session=requests,
+) -> dict[str, Any]:
+    """Call the local FireRedASR2-AED-Pack `/asr` API and return its JSON result."""
+    _require_local_file(local_file)
+    data: dict[str, str] = {}
+    options = {
+        "enable_vad": enable_vad,
+        "enable_lid": enable_lid,
+        "enable_punc": enable_punc,
+        "return_timestamp": return_timestamp,
+    }
+    for key, value in options.items():
+        if value is not None:
+            data[key] = "true" if value else "false"
+
+    with open(local_file, "rb") as file_obj:
+        files = {"file": (_safe_upload_name(local_file), file_obj)}
+        response = _session_post(
+            session,
+            _local_asr_url(api_url),
+            data=data,
+            files=files,
+            timeout=timeout,
+        )
+    return _local_json(
+        response,
+        "调用本地 FireRedASR2-AED-Pack ASR API",
+        service_name="本地 FireRedASR2-AED-Pack 服务",
+    )
+
+
+def download_local_srt(
+    download_url: str,
+    api_url: str = LOCAL_FUN_ASR_API_URL,
+    subtitle_file: str = "",
+    session=requests,
+    service_name: str = "本地 FunASR-Pack 服务",
+) -> str:
+    """Download an SRT exposed by FunASR-Pack and save it as a NarratoAI subtitle."""
+    absolute_url = _absolute_local_download_url(api_url, download_url)
+    if not absolute_url:
+        raise FunAsrError("本地 FunASR-Pack 结果缺少 SRT 下载地址")
+    response = _session_get(session, absolute_url, timeout=60)
+    _raise_for_local_http(response, "下载本地 SRT", service_name=service_name)
+    srt_content = _response_text(response)
+    if not srt_content.strip():
+        raise FunAsrError(f"{service_name}返回了空 SRT")
+    return write_srt_file(srt_content, subtitle_file)
+
+
+def _local_result_items(result_json: dict[str, Any]):
+    raw = result_json.get("raw")
+    if isinstance(raw, dict):
+        yield raw
+    elif isinstance(raw, list):
+        for item in raw:
+            if isinstance(item, dict):
+                yield item
+    elif result_json.get("text"):
+        yield result_json
+
+
+def _blocks_from_local_timestamp(item: dict[str, Any], max_chars: int, max_duration: float) -> list[dict[str, Any]]:
+    text = str(item.get("text") or "").strip()
+    timestamps = item.get("timestamp") or []
+    if not text or not isinstance(timestamps, list):
+        return []
+
+    non_space_chars = [char for char in text if char.strip()]
+    consume_punctuation = len(timestamps) >= len(non_space_chars)
+    blocks: list[dict[str, Any]] = []
+    current: Optional[dict[str, Any]] = None
+    timestamp_index = 0
+    last_end = 0.0
+    max_duration_ms = max_duration * 1000
+
+    for char in text:
+        if not char.strip():
+            continue
+
+        is_punctuation = char in PUNCTUATION_BREAKS
+        consume_timestamp = consume_punctuation or not is_punctuation
+        if consume_timestamp and timestamp_index < len(timestamps):
+            pair = timestamps[timestamp_index]
+            timestamp_index += 1
+            if not isinstance(pair, (list, tuple)) or len(pair) < 2:
+                continue
+            start_ms = _timestamp_ms(pair[0], "local.timestamp.start")
+            end_ms = _timestamp_ms(pair[1], "local.timestamp.end")
+            last_end = end_ms
+        else:
+            start_ms = last_end
+            end_ms = last_end if is_punctuation else last_end + 200
+            last_end = end_ms
+
+        if current is None:
+            current = {"start": start_ms, "end": end_ms, "text": char}
+        else:
+            should_split_before = (
+                len(current["text"] + char) > max_chars
+                or (end_ms - current["start"]) > max_duration_ms
+            )
+            if should_split_before:
+                _flush_block(blocks, current)
+                current = {"start": start_ms, "end": end_ms, "text": char}
+            else:
+                current["text"] += char
+                current["end"] = end_ms
+
+        if current and is_punctuation:
+            _flush_block(blocks, current)
+            current = None
+
+    if current:
+        _flush_block(blocks, current)
+    return blocks
+
+
+def local_fun_asr_result_to_srt(
+    result_json: dict[str, Any],
+    max_chars: int = 20,
+    max_duration: float = 3.5,
+) -> str:
+    """Convert a FunASR-Pack JSON response into SRT when the API SRT is unavailable."""
+    blocks: list[dict[str, Any]] = []
+    for item in _local_result_items(result_json):
+        item_blocks = _blocks_from_local_timestamp(item, max_chars, max_duration)
+        if not item_blocks:
+            text = str(item.get("text") or "").strip()
+            if text:
+                item_blocks = _blocks_from_sentence(
+                    {
+                        "begin_time": 0,
+                        "end_time": max(1500, len(text) * 180),
+                        "text": text,
+                    },
+                    max_chars=max_chars,
+                )
+        blocks.extend(item_blocks)
+
+    if not blocks:
+        raise FunAsrError("本地 FunASR-Pack 转写结果为空：未找到可用字幕内容")
+
+    lines = []
+    for index, block in enumerate(blocks, start=1):
+        lines.append(_srt_block(index, block["start"], block["end"], block["text"]))
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def firered_asr_result_to_srt(result_json: dict[str, Any]) -> str:
+    """Convert a FireRedASR2-AED-Pack JSON response into SRT when no SRT URL is returned."""
+    blocks: list[dict[str, Any]] = []
+    sentences = result_json.get("sentences")
+    if isinstance(sentences, list):
+        for sentence in sentences:
+            if not isinstance(sentence, dict):
+                continue
+            text = str(sentence.get("text") or "").strip()
+            if not text:
+                continue
+            start = sentence.get("start_ms", sentence.get("begin_time", sentence.get("start_time", 0)))
+            end = sentence.get("end_ms", sentence.get("end_time"))
+            start_ms = _timestamp_ms(start, "firered.sentence.start_ms")
+            end_ms = _timestamp_ms(end, "firered.sentence.end_ms") if end is not None else start_ms + 500
+            blocks.append({"start": start_ms, "end": end_ms, "text": text})
+
+    if not blocks:
+        return local_fun_asr_result_to_srt(result_json)
+
+    lines = []
+    for index, block in enumerate(blocks, start=1):
+        lines.append(_srt_block(index, block["start"], block["end"], block["text"]))
+    return "\n".join(lines).rstrip() + "\n"
+
+
+def _get_local_srt_download_url(result_json: dict[str, Any]) -> str:
+    downloads = result_json.get("downloads") or {}
+    if isinstance(downloads, dict):
+        download_url = downloads.get("srt")
+        if download_url:
+            return str(download_url)
+    for key in ("srt_url", "srt_download_url", "download_url"):
+        download_url = result_json.get(key)
+        if download_url:
+            return str(download_url)
+    return ""
+
+
+def create_with_local_fun_asr(
+    local_file: str,
+    subtitle_file: str = "",
+    api_url: str = LOCAL_FUN_ASR_API_URL,
+    hotword: str = "",
+    enable_spk: Optional[bool] = None,
+    timeout: float = 600.0,
+    session=requests,
+) -> Optional[str]:
+    """Create an SRT file through a locally running FunASR-Pack API."""
+    try:
+        result_json = request_local_fun_asr(
+            local_file=local_file,
+            api_url=api_url,
+            hotword=hotword,
+            enable_spk=enable_spk,
+            timeout=timeout,
+            session=session,
+        )
+
+        srt_file = result_json.get("srt_file")
+        if isinstance(srt_file, str) and srt_file and os.path.isfile(srt_file):
+            output_file = copy_srt_file(srt_file, subtitle_file)
+        else:
+            download_url = _get_local_srt_download_url(result_json)
+            if download_url:
+                output_file = download_local_srt(
+                    download_url,
+                    api_url=api_url,
+                    subtitle_file=subtitle_file,
+                    session=session,
+                )
+            else:
+                srt_content = local_fun_asr_result_to_srt(result_json)
+                output_file = write_srt_file(srt_content, subtitle_file)
+
+        logger.info(f"本地 FunASR-Pack 字幕文件已生成: {output_file}")
+        return output_file
+    except FunAsrError:
+        raise
+    except Exception as exc:
+        raise FunAsrError("本地 FunASR-Pack 字幕转写失败，请检查服务地址、文件或模型状态") from exc
+
+
+def create_with_local_firered_asr(
+    local_file: str,
+    subtitle_file: str = "",
+    api_url: str = LOCAL_FIRERED_ASR_API_URL,
+    enable_vad: Optional[bool] = True,
+    enable_lid: Optional[bool] = True,
+    enable_punc: Optional[bool] = True,
+    return_timestamp: Optional[bool] = True,
+    timeout: float = 600.0,
+    session=requests,
+) -> Optional[str]:
+    """Create an SRT file through a locally running FireRedASR2-AED-Pack API."""
+    service_name = "本地 FireRedASR2-AED-Pack 服务"
+    try:
+        result_json = request_local_firered_asr(
+            local_file=local_file,
+            api_url=api_url,
+            enable_vad=enable_vad,
+            enable_lid=enable_lid,
+            enable_punc=enable_punc,
+            return_timestamp=return_timestamp,
+            timeout=timeout,
+            session=session,
+        )
+
+        srt_file = result_json.get("srt_file")
+        if isinstance(srt_file, str) and srt_file and os.path.isfile(srt_file):
+            output_file = copy_srt_file(srt_file, subtitle_file)
+        else:
+            download_url = _get_local_srt_download_url(result_json)
+            if download_url:
+                output_file = download_local_srt(
+                    download_url,
+                    api_url=api_url,
+                    subtitle_file=subtitle_file,
+                    session=session,
+                    service_name=service_name,
+                )
+            else:
+                srt_content = firered_asr_result_to_srt(result_json)
+                output_file = write_srt_file(srt_content, subtitle_file)
+
+        logger.info(f"本地 FireRedASR2-AED-Pack 字幕文件已生成: {output_file}")
+        return output_file
+    except FunAsrError:
+        raise
+    except Exception as exc:
+        raise FunAsrError("本地ASR字幕转写失败，请检查 FireRedASR2-AED-Pack 服务地址、文件或模型状态") from exc
+
+
 def create_with_fun_asr(
    local_file: str,
    subtitle_file: str = "",
--- a/app/services/generate_video.py
+++ b/app/services/generate_video.py
--- a/app/services/jianying_draft_builder.py
+++ b/app/services/jianying_draft_builder.py
--- a/app/services/jianying_task.py
+++ b/app/services/jianying_task.py
@ -1,27 +1,30 @@
 import json
 import os
+import re
 import subprocess
 import time
 from os import path
+from typing import Dict
 from loguru import logger

 from app.config import config
 from app.models import const
 from app.models.schema import VideoClipParams
-from app.services import voice, clip_video, update_script
+from app.services import voice, clip_video, script_subtitle
+from app.services.jianying_draft_builder import write_plaintext_jianying_draft
 from app.services import state as sm
 from app.utils import utils


-def get_audio_duration_ffprobe(audio_file: str) -> float:
+def get_media_duration_ffprobe(media_file: str) -> float:
    """
-    使用ffprobe获取音频文件的精确时长（秒）
+    使用ffprobe获取媒体文件的精确时长（秒）
    
    Args:
-        audio_file: 音频文件路径
+        media_file: 媒体文件路径
        
    Returns:
-        float: 音频时长（秒），精确到微秒
+        float: 媒体时长（秒），精确到微秒
    """
    try:
        cmd = [
@ -29,20 +32,308 @@ def get_audio_duration_ffprobe(audio_file: str) -> float:
            '-v', 'error',
            '-show_entries', 'format=duration',
            '-of', 'csv=p=0',
-            audio_file
+            media_file
        ]
        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
        duration = float(result.stdout.strip())
-        logger.debug(f"使用ffprobe获取音频时长: {duration:.6f}秒")
+        logger.debug(f"使用ffprobe获取媒体时长: {duration:.6f}秒, 文件: {media_file}")
        return duration
    except subprocess.CalledProcessError as e:
        logger.error(f"ffprobe执行失败: {e.stderr}")
        raise
    except Exception as e:
-        logger.error(f"获取音频时长失败: {str(e)}")
+        logger.error(f"获取媒体时长失败: {str(e)}")
        raise


+def get_audio_duration_ffprobe(audio_file: str) -> float:
+    return get_media_duration_ffprobe(audio_file)
+
+
+def _strip_tts_voice_prefix(voice_name: str, prefix: str) -> str:
+    voice_name = voice_name or ""
+    if voice_name.startswith(prefix):
+        return voice_name[len(prefix):]
+    return voice_name
+
+
+def _strip_indextts_prefix(voice_name: str) -> str:
+    return _strip_tts_voice_prefix(
+        config.normalize_indextts_voice_prefix(voice_name or ""),
+        config.INDEXTTS_VOICE_PREFIX,
+    )
+
+
+def _floor_duration_to_milliseconds(duration: float) -> float:
+    return int(duration * 1000) / 1000.0
+
+
+def _format_seconds_for_trange(seconds: float) -> str:
+    return f"{seconds:.3f}s"
+
+
+def _get_cached_media_duration(media_file: str, duration_cache: Dict[str, float]) -> float:
+    if media_file not in duration_cache:
+        duration_cache[media_file] = _floor_duration_to_milliseconds(
+            get_media_duration_ffprobe(media_file)
+        )
+    return duration_cache[media_file]
+
+
+def _clamp_duration_to_media(
+    requested_duration: float,
+    media_file: str,
+    duration_cache: Dict[str, float],
+    media_label: str,
+    source_start_time: float = 0.0,
+) -> float:
+    requested_duration = _floor_duration_to_milliseconds(max(requested_duration, 0.0))
+    actual_duration = _get_cached_media_duration(media_file, duration_cache)
+    available_duration = _floor_duration_to_milliseconds(
+        max(actual_duration - max(source_start_time, 0.0), 0.0)
+    )
+    safe_duration = min(requested_duration, available_duration)
+
+    logger.info(
+        f"{media_label}实际时长: {actual_duration:.6f}秒, "
+        f"可用时长: {available_duration:.6f}秒, 请求时长: {requested_duration:.3f}秒"
+    )
+    if safe_duration < requested_duration:
+        logger.warning(
+            f"{media_label}短于脚本时长，已将剪映片段时长从 "
+            f"{requested_duration:.3f}秒 调整为 {safe_duration:.3f}秒"
+        )
+
+    return safe_duration
+
+
+def _normalize_indextts_reference_audio(params: VideoClipParams) -> None:
+    """Ensure local clone TTS engines use configured reference audio instead of a stale UI voice."""
+    params.tts_engine = config.normalize_tts_engine_name(params.tts_engine)
+    if params.tts_engine == config.INDEXTTS_ENGINE:
+        tts_config = config.indextts
+        voice_prefix = config.INDEXTTS_VOICE_PREFIX
+        display_name = "IndexTTS-1.5"
+    elif params.tts_engine == config.INDEXTTS2_ENGINE:
+        tts_config = config.indextts2
+        voice_prefix = config.INDEXTTS2_VOICE_PREFIX
+        display_name = "IndexTTS-2"
+    elif params.tts_engine == config.OMNIVOICE_ENGINE:
+        tts_config = config.omnivoice
+        if tts_config.get("mode", "auto") != "voice_clone":
+            return
+        voice_prefix = config.OMNIVOICE_VOICE_PREFIX
+        display_name = "OmniVoice"
+    else:
+        return
+
+    candidate = _strip_tts_voice_prefix(getattr(params, "voice_name", "") or "", voice_prefix)
+    if candidate and os.path.isfile(candidate):
+        params.voice_name = f"{voice_prefix}{candidate}"
+        logger.info(f"{display_name} 使用参考音频: {candidate}")
+        return
+
+    configured_ref = _strip_tts_voice_prefix(tts_config.get("reference_audio", "") or "", voice_prefix)
+    if configured_ref and os.path.isfile(configured_ref):
+        params.voice_name = f"{voice_prefix}{configured_ref}"
+        logger.info(f"{display_name} 使用配置中的参考音频: {configured_ref}")
+        return
+
+    raise ValueError(f"{display_name} 参考音频不存在，请在音频设置中上传或选择有效的参考音频")
+
+
+def _index_tts_results(tts_results: list[Dict]) -> Dict:
+    indexed = {}
+    for tts_result in tts_results or []:
+        item_id = tts_result.get("_id")
+        timestamp = tts_result.get("timestamp")
+        if item_id is not None:
+            indexed[item_id] = tts_result
+        if timestamp:
+            indexed[timestamp] = tts_result
+    return indexed
+
+
+def _get_video_source_paths(params: VideoClipParams) -> list[str]:
+    return clip_video._normalize_video_origin_paths(
+        getattr(params, "video_origin_path", ""),
+        getattr(params, "video_origin_paths", []),
+    )
+
+
+def _resolve_script_video_path(item: Dict, video_source_paths: list[str]) -> str:
+    if not video_source_paths:
+        return ""
+    return clip_video._resolve_script_video_path(item, video_source_paths)
+
+
+def _resolve_tts_result(item: Dict, tts_map: Dict) -> Dict:
+    item_id = item.get("_id")
+    timestamp = item.get("timestamp")
+    if item_id is not None and item_id in tts_map:
+        return tts_map[item_id]
+    if timestamp in tts_map:
+        return tts_map[timestamp]
+    return {}
+
+
+def _build_jianying_draft_script(
+    list_script: list[Dict],
+    params: VideoClipParams,
+    tts_results: list[Dict],
+) -> list[Dict]:
+    video_source_paths = _get_video_source_paths(params)
+    if not video_source_paths:
+        raise ValueError("视频文件不能为空")
+
+    tts_map = _index_tts_results(tts_results)
+    draft_script = []
+    accumulated_duration = 0.0
+
+    for item in list_script:
+        item_copy = dict(item)
+        timestamp = item_copy.get("timestamp", "")
+        try:
+            source_start, source_end = script_subtitle.parse_time_range(timestamp)
+        except ValueError as e:
+            logger.warning(f"解析剪映片段时间戳失败，跳过片段 {item_copy.get('_id')}: {e}")
+            continue
+
+        timestamp_duration = _floor_duration_to_milliseconds(source_end - source_start)
+        if timestamp_duration <= 0:
+            logger.warning(f"剪映片段时长无效，跳过片段 {item_copy.get('_id')}: {timestamp}")
+            continue
+
+        ost = int(item_copy.get("OST", 0) or 0)
+        tts_result = _resolve_tts_result(item_copy, tts_map) if ost in [0, 2] else {}
+        item_duration = timestamp_duration
+        if tts_result.get("duration"):
+            item_duration = _floor_duration_to_milliseconds(float(tts_result.get("duration") or 0.0))
+        if item_duration <= 0:
+            item_duration = timestamp_duration
+
+        item_copy.update({
+            "video": _resolve_script_video_path(item_copy, video_source_paths),
+            "audio": tts_result.get("audio_file", ""),
+            "subtitle": tts_result.get("subtitle_file", ""),
+            "sourceTimeRange": timestamp,
+            "start_time": source_start,
+            "source_start_time": source_start,
+            "duration": item_duration,
+            "use_source_timerange": True,
+            "editedTimeRange": (
+                f"{script_subtitle.format_srt_time(accumulated_duration)}-"
+                f"{script_subtitle.format_srt_time(accumulated_duration + item_duration)}"
+            ),
+        })
+        accumulated_duration += item_duration
+        draft_script.append(item_copy)
+
+    if not draft_script:
+        raise ValueError("没有可写入剪映草稿的视频片段")
+
+    return draft_script
+
+
+def _get_original_subtitle_paths(params: VideoClipParams) -> list[str]:
+    subtitle_paths = getattr(params, "original_subtitle_paths", []) or []
+    if isinstance(subtitle_paths, str):
+        subtitle_paths = [subtitle_paths]
+
+    normalized_paths = []
+    seen = set()
+    for subtitle_path in subtitle_paths:
+        if not isinstance(subtitle_path, str):
+            continue
+        subtitle_path = subtitle_path.strip()
+        if subtitle_path and subtitle_path not in seen:
+            normalized_paths.append(subtitle_path)
+            seen.add(subtitle_path)
+
+    single_subtitle_path = str(getattr(params, "original_subtitle_path", "") or "").strip()
+    if single_subtitle_path and single_subtitle_path not in seen:
+        normalized_paths.insert(0, single_subtitle_path)
+
+    if not normalized_paths:
+        normalized_paths = _find_original_subtitle_paths_for_videos(_get_video_source_paths(params))
+
+    return normalized_paths
+
+
+def _video_stem_candidates(video_path: str) -> list[str]:
+    stem = path.splitext(path.basename(str(video_path or "").strip()))[0]
+    if not stem:
+        return []
+
+    candidates = [stem]
+    timestamp_stripped = re.sub(r"_[0-9]{14}$", "", stem)
+    if timestamp_stripped and timestamp_stripped not in candidates:
+        candidates.append(timestamp_stripped)
+    return candidates
+
+
+def _find_original_subtitle_paths_for_videos(video_paths: list[str]) -> list[str]:
+    subtitle_dir = utils.subtitle_dir()
+    if not path.isdir(subtitle_dir):
+        return []
+
+    subtitle_files = [
+        path.join(subtitle_dir, filename)
+        for filename in os.listdir(subtitle_dir)
+        if filename.lower().endswith(".srt")
+    ]
+    if not subtitle_files:
+        return []
+
+    resolved_paths = []
+    seen = set()
+    for video_path in video_paths:
+        candidates = _video_stem_candidates(video_path)
+        if not candidates:
+            continue
+
+        matches = []
+        for subtitle_path in subtitle_files:
+            subtitle_stem = path.splitext(path.basename(subtitle_path))[0]
+            for candidate in candidates:
+                if subtitle_stem == candidate or subtitle_stem.startswith(f"{candidate}_"):
+                    matches.append(subtitle_path)
+                    break
+
+        if not matches:
+            continue
+
+        matches.sort(key=lambda item: path.getmtime(item), reverse=True)
+        selected_path = matches[0]
+        if selected_path not in seen:
+            resolved_paths.append(selected_path)
+            seen.add(selected_path)
+
+    if resolved_paths:
+        logger.info(f"剪映导出未从参数获取原片字幕，已按视频文件名自动匹配: {resolved_paths}")
+    return resolved_paths
+
+
+def _create_jianying_subtitle_file(
+    task_id: str,
+    draft_script: list[Dict],
+    params: VideoClipParams,
+) -> str:
+    if not getattr(params, "subtitle_enabled", True):
+        return ""
+
+    try:
+        return script_subtitle.create_script_subtitle_file(
+            task_id=task_id,
+            list_script=draft_script,
+            original_subtitle_paths=_get_original_subtitle_paths(params),
+            video_origin_paths=_get_video_source_paths(params),
+        )
+    except Exception as e:
+        logger.warning(f"剪映草稿字幕生成失败，将导出无字幕草稿: {e}")
+        return ""
+
+
 def start_export_jianying_draft(task_id: str, params: VideoClipParams):
    """
    导出到剪映草稿的后台任务
@ -83,6 +374,7 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
    2. 使用 TTS 生成音频素材
    """
    logger.info("\n\n## 2. 根据OST设置生成音频列表")
+    _normalize_indextts_reference_audio(params)
    tts_segments = [
        segment for segment in list_script 
        if segment['OST'] in [0, 2]
@ -101,22 +393,15 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)

    """
-    3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
+    3. 准备剪映草稿时间线 - 直接引用原视频素材和源时间戳
    """
-    logger.info("\n\n## 3. 统一视频裁剪（基于OST类型）")
-    video_clip_result = clip_video.clip_video_unified(
-        video_origin_path=params.video_origin_path,
-        script_list=list_script,
-        tts_results=tts_results
-    )
+    logger.info("\n\n## 3. 准备剪映草稿时间线（不裁剪视频）")
+    new_script_list = _build_jianying_draft_script(list_script, params, tts_results)
+    subtitle_path = _create_jianying_subtitle_file(task_id, new_script_list, params)

-    tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
-    subclip_clip_result = {
-        tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
-    }
-    new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
-
-    logger.info(f"统一裁剪完成，处理了 {len(video_clip_result)} 个视频片段")
+    logger.info(f"剪映草稿时间线准备完成，处理了 {len(new_script_list)} 个视频片段")
+    if subtitle_path:
+        logger.info(f"剪映草稿字幕文件: {subtitle_path}")

    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)

@ -126,114 +411,38 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
    logger.info("\n\n## 4. 导出到剪映草稿")
    
    try:
-        import pyJianYingDraft
-        from pyJianYingDraft import DraftFolder, VideoSegment, AudioSegment, trange, TrackType
        jianying_draft_path = config.ui.get("jianying_draft_path", "")
        if not jianying_draft_path:
            raise ValueError("剪映草稿路径未配置")
        
-        # 创建DraftFolder实例
-        draft_folder = DraftFolder(jianying_draft_path)
-        
        # 使用从参数中获取的草稿名称，如果为空则使用默认名称
        draft_name = getattr(params, 'draft_name', "")
        logger.debug(f"从params获取的草稿名称: '{draft_name}' (类型: {type(draft_name)})")
        if not draft_name:
            draft_name = f"NarratoAI_{int(time.time())}"
            logger.debug(f"使用默认草稿名称: '{draft_name}'")
-        
-        # 创建新草稿
-        script = draft_folder.create_draft(draft_name, 1920, 1080)
-        
-        # 添加视频轨道和音频轨道
-        script.add_track(TrackType.video, '视频轨道')
-        script.add_track(TrackType.audio, '音频轨道')
-        
-        # 处理脚本数据
-        current_time = 0
+
        output_dir = utils.task_dir(task_id)
-        
-        for item in new_script_list:
-            # 获取时间信息
-            start_time = float(item.get('start_time', 0.0))
-            duration = float(item.get('duration', 0.0))
-            timestamp = item.get('timestamp', '')
-            
-            logger.info(f"处理片段: OST={item['OST']}, start_time={start_time}, duration={duration}, timestamp={timestamp}")
-            
-            # 生成音频文件路径
-            audio_file = ""
-            if timestamp:
-                timestamp_formatted = timestamp.replace(':', '_')
-                audio_file = os.path.join(
-                    output_dir,
-                    f"audio_{timestamp_formatted}.mp3"
-                )
-            
-            # 检查是否有裁剪后的视频文件
-            video_file = item.get('video', '')
-            if video_file and not os.path.exists(video_file):
-                video_file = ""
-            
-            # 添加视频片段
-            if video_file:
-                # 使用裁剪后的视频文件
-                # 对于裁剪后的视频，target_timerange的第二个参数是持续时间
-                video_segment = VideoSegment(
-                    video_file,
-                    trange(f"{current_time}s", f"{duration}s")
-                )
-            else:
-                # 使用原始视频文件
-                # source_timerange是从原始视频中截取的部分
-                # target_timerange是片段在时间轴上的位置
-                video_segment = VideoSegment(
-                    params.video_origin_path,
-                    trange(f"{current_time}s", f"{duration}s"),
-                    source_timerange=trange(f"{start_time}s", f"{duration}s")
-                )
-            script.add_segment(video_segment, '视频轨道')
-            
-            # 处理音频
-            if item['OST'] in [0, 2]:  # 需要TTS的片段
-                if os.path.exists(audio_file):
-                    # 使用ffprobe获取精确的音频时长，避免因TTS引擎差异导致时长不匹配
-                    actual_audio_duration = get_audio_duration_ffprobe(audio_file)
-                    logger.info(f"音频文件实际时长: {actual_audio_duration:.6f}秒, 脚本时长(视频): {duration:.3f}秒")
-                    
-                    # 使用音频实际时长和视频时长中的较小值，确保不超过素材时长
-                    # 当TTS语速调整时，音频可能比视频长或短，取较小值可以避免超出素材
-                    safe_duration = min(actual_audio_duration, duration)
-                    logger.info(f"使用时长: {safe_duration:.6f}秒 (取音频和视频时长的较小值)")
-                    
-                    audio_segment = AudioSegment(
-                        audio_file,
-                        trange(f"{current_time}s", f"{safe_duration}s")
-                    )
-                    script.add_segment(audio_segment, '音频轨道')
-                else:
-                    logger.warning(f"音频文件不存在: {audio_file}")
-            # OST=1的片段保留原声，不需要添加额外音频
-            
-            # 更新当前时间
-            current_time += duration
-        
-        # 保存草稿
-        script.save()
-        
-        draft_path = os.path.join(jianying_draft_path, draft_name)
+
+        draft_path, draft_name = write_plaintext_jianying_draft(
+            jianying_draft_path=jianying_draft_path,
+            draft_name=draft_name,
+            new_script_list=new_script_list,
+            params=params,
+            output_dir=output_dir,
+            subtitle_path=subtitle_path,
+        )
        
        logger.success(f"成功导出到剪映草稿: {draft_name}")
        logger.info(f"草稿已保存到: {draft_path}")
        
        # 更新任务状态
-        sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, draft_path=draft_path, draft_name=draft_name)
+        task_kwargs = {"draft_path": draft_path, "draft_name": draft_name}
+        if subtitle_path:
+            task_kwargs["subtitles"] = [subtitle_path]
+        sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **task_kwargs)
        
-        return {"draft_path": draft_path, "draft_name": draft_name}
-        
-    except ImportError as e:
-        logger.error(f"导入pyJianYingDraft失败: {e}")
-        raise ImportError(f"pyJianYingDraft库导入失败: {e}\n请确保已正确安装该库")
+        return task_kwargs
    except Exception as e:
        logger.error(f"导出到剪映草稿失败: {e}")
        import traceback
--- a/app/services/llm/base.py
+++ b/app/services/llm/base.py
@ -178,6 +178,27 @@ class TextModelProvider(BaseLLMProvider):
            生成的文本内容
        """
        pass
+
+    async def generate_text_stream(self,
+                                 prompt: str,
+                                 system_prompt: Optional[str] = None,
+                                 temperature: float = 1.0,
+                                 max_tokens: Optional[int] = None,
+                                 response_format: Optional[str] = None,
+                                 on_chunk=None,
+                                 **kwargs) -> str:
+        """生成文本内容并尽可能回调流式片段；默认退化为一次性输出。"""
+        result = await self.generate_text(
+            prompt=prompt,
+            system_prompt=system_prompt,
+            temperature=temperature,
+            max_tokens=max_tokens,
+            response_format=response_format,
+            **kwargs,
+        )
+        if on_chunk:
+            on_chunk({"type": "content", "text": result})
+        return result
    
    def _build_messages(self, prompt: str, system_prompt: Optional[str] = None) -> List[Dict[str, str]]:
        """构建消息列表"""
--- a/app/services/llm/migration_adapter.py
+++ b/app/services/llm/migration_adapter.py
@ -198,11 +198,19 @@ class VisionAnalyzerAdapter:
 class SubtitleAnalyzerAdapter:
    """字幕分析器适配器"""

-    def __init__(self, api_key: str, model: str, base_url: str, provider: str = None):
+    def __init__(
+        self,
+        api_key: str,
+        model: str,
+        base_url: str,
+        provider: str = None,
+        prompt_category: str = "short_drama_narration",
+    ):
        self.api_key = api_key
        self.model = model
        self.base_url = base_url
        self.provider = provider or "openai"
+        self.prompt_category = prompt_category or "short_drama_narration"

    def _run_async_safely(self, coro_func, *args, **kwargs):
        """安全地运行异步协程"""
@ -225,6 +233,229 @@ class SubtitleAnalyzerAdapter:
        output = output.strip()

        return output
+
+    def _render_prompt(self, name: str, parameters: Dict[str, Any]) -> tuple[str, Optional[str]]:
+        prompt = PromptManager.get_prompt(
+            category=self.prompt_category,
+            name=name,
+            parameters=parameters,
+        )
+        prompt_object = PromptManager.get_prompt_object(
+            category=self.prompt_category,
+            name=name,
+        )
+        return prompt, prompt_object.get_system_prompt()
+
+    def _generate_json_text(
+        self,
+        prompt: str,
+        system_prompt: Optional[str],
+        temperature: float,
+        stream_callback=None,
+    ) -> str:
+        generate_func = (
+            UnifiedLLMService.generate_text_stream
+            if stream_callback
+            else UnifiedLLMService.generate_text
+        )
+        kwargs = {
+            "prompt": prompt,
+            "system_prompt": system_prompt,
+            "provider": self.provider,
+            "temperature": temperature,
+            "response_format": "json",
+            "api_key": self.api_key,
+            "api_base": self.base_url,
+        }
+        if stream_callback:
+            kwargs["on_chunk"] = stream_callback
+        result = self._run_async_safely(generate_func, **kwargs)
+        return self._clean_json_output(result)
+
+    def _generate_plain_text(self, prompt: str, system_prompt: Optional[str], temperature: float) -> str:
+        result = self._run_async_safely(
+            UnifiedLLMService.generate_text,
+            prompt=prompt,
+            system_prompt=system_prompt,
+            provider=self.provider,
+            temperature=temperature,
+            api_key=self.api_key,
+            api_base=self.base_url,
+        )
+        return str(result or "").strip()
+
+    def generate_narration_copy(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str = "",
+        temperature: float = 0.7,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> Dict[str, Any]:
+        """Generate editable narration copy before timeline matching."""
+        try:
+            prompt, system_prompt = self._render_prompt(
+                "narration_copy",
+                {
+                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
+                    "plot_analysis": plot_analysis,
+                    "subtitle_content": subtitle_content,
+                    "narration_language": narration_language,
+                },
+            )
+            narration_copy = self._generate_plain_text(prompt, system_prompt, temperature)
+            return {
+                "status": "success",
+                "narration_copy": narration_copy,
+                "model": self.model,
+                "temperature": temperature,
+            }
+        except Exception as e:
+            logger.error(f"解说文案正文生成失败: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": temperature,
+            }
+
+    def match_narration_copy_to_script(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str,
+        narration_copy: str,
+        temperature: float = 0.3,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+        original_sound_ratio: int = 30,
+        stream_callback=None,
+    ) -> Dict[str, Any]:
+        """Match reviewed narration copy to source footage and return JSON script."""
+        try:
+            prompt, system_prompt = self._render_prompt(
+                "script_matching",
+                {
+                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
+                    "plot_analysis": plot_analysis,
+                    "subtitle_content": subtitle_content,
+                    "narration_copy": narration_copy,
+                    "narration_language": narration_language,
+                    "original_sound_ratio": int(original_sound_ratio),
+                },
+            )
+            narration_script = self._generate_json_text(
+                prompt,
+                system_prompt,
+                min(float(temperature), 0.3),
+                stream_callback=stream_callback,
+            )
+            return {
+                "status": "success",
+                "narration_script": narration_script,
+                "model": self.model,
+                "temperature": temperature,
+            }
+        except Exception as e:
+            logger.error(f"解说文案画面匹配失败: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": temperature,
+            }
+
+    def plan_narration_segments(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str = "",
+        temperature: float = 0.3,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> str:
+        """Plan source segments before generating final copy."""
+        prompt, system_prompt = self._render_prompt(
+            "segment_planning",
+            {
+                "drama_name": short_name,
+                "drama_genre": drama_genre,
+                "plot_analysis": plot_analysis,
+                "subtitle_content": subtitle_content,
+                "narration_language": narration_language,
+            },
+        )
+        return self._generate_json_text(prompt, system_prompt, min(float(temperature), 0.3))
+
+    def generate_narration_script_from_plan(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str,
+        segment_plan: str,
+        temperature: float = 0.7,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> str:
+        prompt, system_prompt = self._render_prompt(
+            "script_generation",
+            {
+                "drama_name": short_name,
+                "drama_genre": drama_genre,
+                "plot_analysis": plot_analysis,
+                "subtitle_content": subtitle_content,
+                "segment_plan": segment_plan,
+                "narration_language": narration_language,
+            },
+        )
+        return self._generate_json_text(prompt, system_prompt, temperature)
+
+    def repair_narration_script(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str,
+        invalid_script: str,
+        validation_errors: str,
+        temperature: float = 0.3,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+        stream_callback=None,
+    ) -> Dict[str, Any]:
+        """Repair a generated script once after deterministic validation fails."""
+        try:
+            prompt, system_prompt = self._render_prompt(
+                "script_repair",
+                {
+                    "drama_name": short_name,
+                    "drama_genre": drama_genre,
+                    "plot_analysis": plot_analysis,
+                    "subtitle_content": subtitle_content,
+                    "invalid_script": invalid_script,
+                    "validation_errors": validation_errors,
+                    "narration_language": narration_language,
+                },
+            )
+            repaired_script = self._generate_json_text(
+                prompt,
+                system_prompt,
+                min(float(temperature), 0.3),
+                stream_callback=stream_callback,
+            )
+            return {
+                "status": "success",
+                "narration_script": repaired_script,
+                "model": self.model,
+                "temperature": temperature,
+            }
+        except Exception as e:
+            logger.error(f"解说文案修复失败: {str(e)}")
+            return {
+                "status": "error",
+                "message": str(e),
+                "temperature": temperature,
+            }
    
    def analyze_subtitle(self, subtitle_content: str) -> Dict[str, Any]:
        """
@ -243,6 +474,7 @@ class SubtitleAnalyzerAdapter:
                subtitle_content=subtitle_content,
                provider=self.provider,
                temperature=1.0,
+                prompt_category=self.prompt_category,
                api_key=self.api_key,
                api_base=self.base_url
            )
@ -262,7 +494,15 @@ class SubtitleAnalyzerAdapter:
                "temperature": 1.0
            }
    
-    def generate_narration_script(self, short_name: str, plot_analysis: str, subtitle_content: str = "", temperature: float = 0.7) -> Dict[str, Any]:
+    def generate_narration_script(
+        self,
+        short_name: str,
+        plot_analysis: str,
+        subtitle_content: str = "",
+        temperature: float = 0.7,
+        narration_language: str = "简体中文（中国）",
+        drama_genre: str = "逆袭/复仇",
+    ) -> Dict[str, Any]:
        """
        生成解说文案 - 兼容原有接口

@ -271,36 +511,30 @@ class SubtitleAnalyzerAdapter:
            plot_analysis: 剧情分析内容
            subtitle_content: 原始字幕内容，用于提供准确的时间戳信息
            temperature: 生成温度
+            narration_language: 解说台词目标语言

        Returns:
            生成结果字典
        """
        try:
-            # 使用新的提示词管理系统构建提示词
-            prompt = PromptManager.get_prompt(
-                category="short_drama_narration",
-                name="script_generation",
-                parameters={
-                    "drama_name": short_name,
-                    "plot_analysis": plot_analysis,
-                    "subtitle_content": subtitle_content
-                }
-            )
-            
-            # 使用统一服务生成文案
-            result = self._run_async_safely(
-                UnifiedLLMService.generate_text,
-                prompt=prompt,
-                system_prompt="你是一位专业的短视频解说脚本撰写专家。",
-                provider=self.provider,
+            segment_plan = self.plan_narration_segments(
+                short_name=short_name,
+                plot_analysis=plot_analysis,
+                subtitle_content=subtitle_content,
                temperature=temperature,
-                response_format="json",
-                api_key=self.api_key,
-                api_base=self.base_url
+                narration_language=narration_language,
+                drama_genre=drama_genre,
+            )
+
+            cleaned_result = self.generate_narration_script_from_plan(
+                short_name=short_name,
+                plot_analysis=plot_analysis,
+                subtitle_content=subtitle_content,
+                segment_plan=segment_plan,
+                temperature=temperature,
+                narration_language=narration_language,
+                drama_genre=drama_genre,
            )
-            
-            # 清理JSON输出
-            cleaned_result = self._clean_json_output(result)

            # 新的提示词系统返回的是包含items数组的JSON格式
            # 为了保持向后兼容，我们需要直接返回这个JSON字符串
--- a/app/services/llm/openai_compatible_provider.py
+++ b/app/services/llm/openai_compatible_provider.py
@ -22,7 +22,7 @@ from openai import (
 )

 from app.config import config
-from app.config.defaults import normalize_openai_compatible_model_name
+from app.config.defaults import DEFAULT_LLM_GENERATION_CONFIG, normalize_openai_compatible_model_name
 from .base import TextModelProvider, VisionModelProvider
 from .exceptions import APICallError, AuthenticationError, ContentFilterError, RateLimitError

@ -68,18 +68,59 @@ class _OpenAICompatibleBase:
        # SDK client 按请求参数动态构建，这里无需初始化全局状态。
        pass

+    def _generation_config_value(self, model_type: str, param_name: str, override: Any = None) -> Any:
+        if override is not None:
+            return override
+        return config.app.get(
+            f"{model_type}_openai_{param_name}",
+            DEFAULT_LLM_GENERATION_CONFIG[param_name],
+        )
+
+    def _build_chat_completion_options(
+        self,
+        model_type: str,
+        temperature: Optional[float] = None,
+        max_tokens: Optional[int] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Build common OpenAI-compatible generation options from config and overrides."""
+        options: Dict[str, Any] = {
+            "temperature": float(self._generation_config_value(model_type, "temperature", temperature)),
+        }
+
+        top_p = float(self._generation_config_value(model_type, "top_p", kwargs.get("top_p")))
+        options["top_p"] = top_p
+
+        configured_max_tokens = self._generation_config_value(model_type, "max_tokens", max_tokens)
+        if configured_max_tokens is not None and int(configured_max_tokens) > 0:
+            options["max_tokens"] = int(configured_max_tokens)
+
+        extra_body: Dict[str, Any] = {}
+
+        thinking_level = str(
+            self._generation_config_value(model_type, "thinking_level", kwargs.get("thinking_level")) or "auto"
+        )
+        if thinking_level in {"low", "medium", "high"}:
+            extra_body["reasoning_effort"] = thinking_level
+
+        if extra_body:
+            options["extra_body"] = extra_body
+
+        return options
+
    def _build_client(
        self,
        api_key_override: Optional[str] = None,
        base_url_override: Optional[str] = None,
        timeout_override: Optional[float] = None,
+        max_retries_override: Optional[int] = None,
    ) -> AsyncOpenAI:
        """按请求构建 AsyncOpenAI 客户端，支持动态覆盖 api_key / base_url。"""
        api_key = api_key_override or self.api_key
        base_url = base_url_override or self.base_url or None

        timeout_seconds: float = timeout_override or config.app.get("llm_text_timeout", 180)
-        max_retries: int = config.app.get("llm_max_retries", 3)
+        max_retries: int = max_retries_override or config.app.get("llm_max_retries", 3)

        return AsyncOpenAI(
            api_key=api_key,
@ -147,11 +188,17 @@ class OpenAICompatibleVisionProvider(_OpenAICompatibleBase, VisionModelProvider)
        )

        try:
+            generation_overrides = dict(kwargs)
+            completion_options = self._build_chat_completion_options(
+                "vision",
+                temperature=generation_overrides.pop("temperature", None),
+                max_tokens=generation_overrides.pop("max_tokens", None),
+                **generation_overrides,
+            )
            response = await client.chat.completions.create(
                model=model_name,
                messages=messages,
-                temperature=kwargs.get("temperature", 1.0),
-                max_tokens=kwargs.get("max_tokens", 4000),
+                **completion_options,
            )
            if response.choices and response.choices[0].message and response.choices[0].message.content:
                return response.choices[0].message.content
@ -186,6 +233,61 @@ class OpenAICompatibleVisionProvider(_OpenAICompatibleBase, VisionModelProvider)
 class OpenAICompatibleTextProvider(_OpenAICompatibleBase, TextModelProvider):
    """OpenAI 兼容文本模型提供商。"""

+    def _build_text_completion_kwargs(
+        self,
+        messages: List[Dict[str, str]],
+        temperature: float,
+        max_tokens: Optional[int],
+        response_format: Optional[str],
+        kwargs: Dict[str, Any],
+    ) -> Dict[str, Any]:
+        model_name = _normalize_model_name(self.model_name)
+        generation_kwargs = dict(kwargs)
+        temperature_override = generation_kwargs.pop("temperature", None)
+        if temperature_override is None and temperature != 1.0:
+            temperature_override = temperature
+
+        completion_kwargs: Dict[str, Any] = {
+            "model": model_name,
+            "messages": messages,
+        }
+        completion_kwargs.update(
+            self._build_chat_completion_options(
+                "text",
+                temperature=temperature_override,
+                max_tokens=generation_kwargs.pop("max_tokens", max_tokens),
+                **generation_kwargs,
+            )
+        )
+        if response_format == "json":
+            completion_kwargs["response_format"] = {"type": "json_object"}
+        return completion_kwargs
+
+    @staticmethod
+    def _emit_stream_chunk(on_chunk, chunk_type: str, text: str):
+        if not on_chunk or not text:
+            return
+        try:
+            on_chunk({"type": chunk_type, "text": text})
+        except Exception as exc:
+            logger.debug(f"流式回调更新失败: {exc}")
+
+    @staticmethod
+    def _extract_reasoning_delta(delta: Any) -> str:
+        if delta is None:
+            return ""
+        if hasattr(delta, "reasoning_content"):
+            value = getattr(delta, "reasoning_content")
+            if value:
+                return str(value)
+        if hasattr(delta, "model_dump"):
+            data = delta.model_dump(exclude_none=True)
+            for key in ("reasoning_content", "reasoning", "thinking"):
+                value = data.get(key)
+                if value:
+                    return str(value)
+        return ""
+
    async def generate_text(
        self,
        prompt: str,
@ -196,7 +298,6 @@ class OpenAICompatibleTextProvider(_OpenAICompatibleBase, TextModelProvider):
        **kwargs,
    ) -> str:
        messages = self._build_messages(prompt, system_prompt)
-        model_name = _normalize_model_name(self.model_name)

        client = self._build_client(
            api_key_override=kwargs.get("api_key"),
@ -204,15 +305,13 @@ class OpenAICompatibleTextProvider(_OpenAICompatibleBase, TextModelProvider):
            timeout_override=config.app.get("llm_text_timeout", 180),
        )

-        completion_kwargs: Dict[str, Any] = {
-            "model": model_name,
-            "messages": messages,
-            "temperature": temperature,
-        }
-        if max_tokens:
-            completion_kwargs["max_tokens"] = max_tokens
-        if response_format == "json":
-            completion_kwargs["response_format"] = {"type": "json_object"}
+        completion_kwargs = self._build_text_completion_kwargs(
+            messages,
+            temperature,
+            max_tokens,
+            response_format,
+            kwargs,
+        )

        try:
            response = await client.chat.completions.create(**completion_kwargs)
@ -250,5 +349,81 @@ class OpenAICompatibleTextProvider(_OpenAICompatibleBase, TextModelProvider):
            logger.error(f"OpenAI 兼容接口调用失败: {exc}")
            raise APICallError(f"调用失败: {exc}")

+    async def generate_text_stream(
+        self,
+        prompt: str,
+        system_prompt: Optional[str] = None,
+        temperature: float = 1.0,
+        max_tokens: Optional[int] = None,
+        response_format: Optional[str] = None,
+        on_chunk=None,
+        **kwargs,
+    ) -> str:
+        messages = self._build_messages(prompt, system_prompt)
+        client = self._build_client(
+            api_key_override=kwargs.get("api_key"),
+            base_url_override=kwargs.get("api_base"),
+            timeout_override=config.app.get("llm_text_timeout", 180),
+        )
+        completion_kwargs = self._build_text_completion_kwargs(
+            messages,
+            temperature,
+            max_tokens,
+            response_format,
+            kwargs,
+        )
+        completion_kwargs["stream"] = True
+
+        async def collect_stream() -> str:
+            content_parts: List[str] = []
+            stream = await client.chat.completions.create(**completion_kwargs)
+            async for chunk in stream:
+                if not getattr(chunk, "choices", None):
+                    continue
+                delta = chunk.choices[0].delta
+                reasoning_delta = self._extract_reasoning_delta(delta)
+                if reasoning_delta:
+                    self._emit_stream_chunk(on_chunk, "reasoning", reasoning_delta)
+
+                content_delta = getattr(delta, "content", None) if delta is not None else None
+                if content_delta:
+                    content_parts.append(content_delta)
+                    self._emit_stream_chunk(on_chunk, "content", content_delta)
+
+            result = "".join(content_parts).strip()
+            if result:
+                self._emit_stream_chunk(on_chunk, "done", "")
+                return result
+            raise APICallError("OpenAI 兼容接口返回空响应")
+
+        try:
+            return await collect_stream()
+
+        except OpenAIBadRequestError as exc:
+            error_msg = str(exc)
+            if response_format == "json" and _is_response_format_error(error_msg):
+                logger.warning("目标网关不支持流式 response_format，回退为提示词约束 JSON 输出")
+                completion_kwargs.pop("response_format", None)
+                messages[-1]["content"] += "\n\n请确保输出严格的JSON格式，不要包含任何其他文字或标记。"
+                result = await collect_stream()
+                return _clean_json_output(result)
+
+            if _is_content_filter_error(error_msg):
+                raise ContentFilterError(f"内容被安全过滤器阻止: {error_msg}")
+            raise APICallError(f"请求错误: {error_msg}")
+
+        except OpenAIAuthError as exc:
+            logger.error(f"OpenAI 兼容接口认证失败: {exc}")
+            raise AuthenticationError(str(exc))
+        except OpenAIRateLimitError as exc:
+            logger.error(f"OpenAI 兼容接口速率限制: {exc}")
+            raise RateLimitError(str(exc))
+        except OpenAIAPIError as exc:
+            logger.error(f"OpenAI 兼容接口 API 错误: {exc}")
+            raise APICallError(f"API 错误: {exc}")
+        except Exception as exc:
+            logger.error(f"OpenAI 兼容接口流式调用失败: {exc}")
+            raise APICallError(f"流式调用失败: {exc}")
+
    async def _make_api_call(self, payload: Dict[str, Any]) -> Dict[str, Any]:
        return payload
--- a/app/services/llm/test_openai_compat_unittest.py
+++ b/app/services/llm/test_openai_compat_unittest.py
@ -8,7 +8,7 @@ from app.config import config
 from app.services.llm.base import TextModelProvider
 from app.services.llm.manager import LLMServiceManager
 from app.services.llm.migration_adapter import LegacyLLMAdapter, VisionAnalyzerAdapter
-from app.services.llm.openai_compatible_provider import OpenAICompatibleVisionProvider
+from app.services.llm.openai_compatible_provider import OpenAICompatibleTextProvider, OpenAICompatibleVisionProvider
 from app.services.llm.providers import register_all_providers


@ -116,6 +116,59 @@ class OpenAICompatVisionConcurrencyTests(unittest.IsolatedAsyncioTestCase):
        self.assertEqual(2, max_in_flight)


+class OpenAICompatGenerationOptionTests(unittest.TestCase):
+    def setUp(self):
+        self._original_app = dict(config.app)
+
+    def tearDown(self):
+        config.app.clear()
+        config.app.update(self._original_app)
+
+    def test_build_options_uses_generation_defaults(self):
+        provider = OpenAICompatibleTextProvider(api_key="k", model_name="m")
+        for key in (
+            "text_openai_temperature",
+            "text_openai_top_p",
+            "text_openai_max_tokens",
+            "text_openai_thinking_level",
+        ):
+            config.app.pop(key, None)
+
+        options = provider._build_chat_completion_options("text")
+
+        self.assertEqual(1.0, options["temperature"])
+        self.assertEqual(0.95, options["top_p"])
+        self.assertEqual(65536, options["max_tokens"])
+        self.assertNotIn("extra_body", options)
+
+    def test_build_options_uses_per_model_generation_config(self):
+        provider = OpenAICompatibleTextProvider(api_key="k", model_name="m")
+        config.app.update(
+            {
+                "text_openai_temperature": 0.3,
+                "text_openai_top_p": 0.8,
+                "text_openai_max_tokens": 2048,
+                "text_openai_thinking_level": "high",
+            }
+        )
+
+        options = provider._build_chat_completion_options("text")
+
+        self.assertEqual(0.3, options["temperature"])
+        self.assertEqual(0.8, options["top_p"])
+        self.assertEqual(2048, options["max_tokens"])
+        self.assertEqual({"reasoning_effort": "high"}, options["extra_body"])
+
+    def test_explicit_generation_options_override_config(self):
+        provider = OpenAICompatibleTextProvider(api_key="k", model_name="m")
+        config.app["text_openai_temperature"] = 0.3
+
+        options = provider._build_chat_completion_options("text", temperature=0.9, max_tokens=512)
+
+        self.assertEqual(0.9, options["temperature"])
+        self.assertEqual(512, options["max_tokens"])
+
+
 class ExplicitVisionAdapterSettingsTests(unittest.IsolatedAsyncioTestCase):
    class _CapturingVisionProvider:
        last_init: tuple[str, str, str | None] | None = None
--- a/app/services/llm/test_subtitle_adapter_pipeline_unittest.py
+++ b/app/services/llm/test_subtitle_adapter_pipeline_unittest.py
@ -0,0 +1,241 @@
+import json
+import unittest
+from unittest import mock
+
+from app.services.llm.migration_adapter import SubtitleAnalyzerAdapter
+from app.services.llm.unified_service import UnifiedLLMService
+from app.services.prompts import PromptManager
+
+
+class SubtitleAnalyzerAdapterPipelineTests(unittest.TestCase):
+    def test_generate_narration_copy_uses_plain_text_prompt_with_selected_type(self):
+        adapter = SubtitleAnalyzerAdapter(
+            api_key="sk-test",
+            model="test-model",
+            base_url="https://example.test/v1",
+            provider="openai",
+        )
+
+        with mock.patch.object(adapter, "_run_async_safely", return_value="她被家人逼到绝路，反击从这一刻开始。") as call:
+            result = adapter.generate_narration_copy(
+                short_name="测试短剧",
+                plot_analysis="女主被家人误会后反击。",
+                subtitle_content="# 视频 1: 1.mp4\n00:00:01,000 --> 00:00:04,000\n女主被误会。",
+                temperature=0.7,
+                narration_language="简体中文（中国）",
+                drama_genre="家庭伦理",
+            )
+
+        self.assertEqual("success", result["status"])
+        self.assertIn("反击", result["narration_copy"])
+        self.assertIn("家庭伦理", call.call_args.kwargs["prompt"])
+        self.assertNotIn("response_format", call.call_args.kwargs)
+
+    def test_generate_narration_copy_can_use_film_tv_prompt_category(self):
+        self.assertTrue(PromptManager.exists("film_tv_narration", "narration_copy"))
+        adapter = SubtitleAnalyzerAdapter(
+            api_key="sk-test",
+            model="test-model",
+            base_url="https://example.test/v1",
+            provider="openai",
+            prompt_category="film_tv_narration",
+        )
+
+        with mock.patch.object(adapter, "_run_async_safely", return_value="他发现证据不对，真正的凶手另有其人。") as call:
+            result = adapter.generate_narration_copy(
+                short_name="测试电影",
+                plot_analysis="主角发现证据疑点。",
+                subtitle_content="# 视频 1: 1.mp4\n00:00:01,000 --> 00:00:04,000\n证据不对。",
+                temperature=0.7,
+                narration_language="简体中文（中国）",
+                drama_genre="悬疑/犯罪",
+            )
+
+        self.assertEqual("success", result["status"])
+        self.assertIn("影视解说正文创作任务", call.call_args.kwargs["prompt"])
+        self.assertIn("用户选择的影视类型", call.call_args.kwargs["prompt"])
+        self.assertNotIn("短剧解说正文创作任务", call.call_args.kwargs["prompt"])
+
+    def test_film_tv_script_prompts_exclude_intro_outro_and_ads(self):
+        base_parameters = {
+            "drama_name": "测试电影",
+            "drama_genre": "悬疑/犯罪",
+            "plot_analysis": "主角发现证据疑点。",
+            "subtitle_content": "# 视频 1: 1.mp4\n00:00:01,000 --> 00:00:04,000\n证据不对。",
+            "narration_language": "简体中文（中国）",
+        }
+        prompt_parameters = {
+            "segment_planning": base_parameters,
+            "script_matching": {
+                **base_parameters,
+                "narration_copy": "他发现证据不对，真正的凶手另有其人。",
+                "original_sound_ratio": 30,
+            },
+            "script_generation": {
+                **base_parameters,
+                "segment_plan": '{"segments": []}',
+            },
+            "script_repair": {
+                **base_parameters,
+                "invalid_script": '{"items": []}',
+                "validation_errors": "片段包含广告",
+            },
+        }
+
+        for prompt_name, parameters in prompt_parameters.items():
+            with self.subTest(prompt_name=prompt_name):
+                prompt = PromptManager.get_prompt(
+                    category="film_tv_narration",
+                    name=prompt_name,
+                    parameters=parameters,
+                )
+                self.assertIn("片头", prompt)
+                self.assertIn("片尾", prompt)
+                self.assertIn("广告", prompt)
+                self.assertIn("绝对不能", prompt)
+
+    def test_match_narration_copy_to_script_uses_json_prompt_with_selected_type(self):
+        adapter = SubtitleAnalyzerAdapter(
+            api_key="sk-test",
+            model="test-model",
+            base_url="https://example.test/v1",
+            provider="openai",
+        )
+        matched = json.dumps(
+            {
+                "items": [
+                    {
+                        "_id": 1,
+                        "video_id": 1,
+                        "video_name": "1.mp4",
+                        "timestamp": "00:00:01,000-00:00:04,000",
+                        "picture": "女主被家人误会",
+                        "narration": "她被家人逼到绝路，反击从这一刻开始。",
+                        "OST": 0,
+                    }
+                ]
+            },
+            ensure_ascii=False,
+        )
+
+        with mock.patch.object(adapter, "_run_async_safely", return_value=matched) as call:
+            result = adapter.match_narration_copy_to_script(
+                short_name="测试短剧",
+                plot_analysis="女主被家人误会后反击。",
+                subtitle_content="# 视频 1: 1.mp4\n00:00:01,000 --> 00:00:04,000\n女主被误会。",
+                narration_copy="她被家人逼到绝路，反击从这一刻开始。",
+                temperature=0.7,
+                narration_language="简体中文（中国）",
+                drama_genre="家庭伦理",
+                original_sound_ratio=60,
+            )
+
+        self.assertEqual("success", result["status"])
+        self.assertEqual(1, json.loads(result["narration_script"])["items"][0]["_id"])
+        self.assertIn("家庭伦理", call.call_args.kwargs["prompt"])
+        self.assertIn("60%", call.call_args.kwargs["prompt"])
+        self.assertEqual("json", call.call_args.kwargs["response_format"])
+
+    def test_match_narration_copy_to_script_uses_streaming_when_callback_exists(self):
+        adapter = SubtitleAnalyzerAdapter(
+            api_key="sk-test",
+            model="test-model",
+            base_url="https://example.test/v1",
+            provider="openai",
+        )
+        matched = json.dumps({"items": []}, ensure_ascii=False)
+
+        with mock.patch.object(adapter, "_run_async_safely", return_value=matched) as call:
+            result = adapter.match_narration_copy_to_script(
+                short_name="测试短剧",
+                plot_analysis="女主被家人误会后反击。",
+                subtitle_content="# 视频 1: 1.mp4",
+                narration_copy="她被家人逼到绝路，反击从这一刻开始。",
+                stream_callback=lambda _event: None,
+            )
+
+        self.assertEqual("success", result["status"])
+        self.assertIs(UnifiedLLMService.generate_text_stream, call.call_args.args[0])
+        self.assertIn("on_chunk", call.call_args.kwargs)
+
+    def test_generate_narration_script_plans_segments_before_copywriting(self):
+        adapter = SubtitleAnalyzerAdapter(
+            api_key="sk-test",
+            model="test-model",
+            base_url="https://example.test/v1",
+            provider="openai",
+        )
+        responses = iter(
+            [
+                json.dumps(
+                    {
+                        "segments": [
+                            {
+                                "_id": 1,
+                                "video_id": 1,
+                                "video_name": "1.mp4",
+                                "timestamp": "00:00:01,000-00:00:04,000",
+                                "OST": 0,
+                                "intent": "开场钩子",
+                            }
+                        ]
+                    },
+                    ensure_ascii=False,
+                ),
+                json.dumps(
+                    {
+                        "items": [
+                            {
+                                "_id": 1,
+                                "video_id": 1,
+                                "video_name": "1.mp4",
+                                "timestamp": "00:00:01,000-00:00:04,000",
+                                "picture": "女主被误会",
+                                "narration": "她被所有人误会，真正的反击却刚刚开始。",
+                                "OST": 0,
+                            }
+                        ]
+                    },
+                    ensure_ascii=False,
+                ),
+            ]
+        )
+
+        with mock.patch.object(adapter, "_run_async_safely", side_effect=lambda *_args, **_kwargs: next(responses)) as call:
+            result = adapter.generate_narration_script(
+                short_name="测试短剧",
+                plot_analysis="女主被误会后反击。",
+                subtitle_content="# 视频 1: 1.mp4\n00:00:01,000 --> 00:00:04,000\n女主被误会。",
+                temperature=0.7,
+                narration_language="简体中文（中国）",
+            )
+
+        self.assertEqual("success", result["status"])
+        self.assertEqual(2, call.call_count)
+        self.assertEqual(1, json.loads(result["narration_script"])["items"][0]["_id"])
+
+    def test_repair_narration_script_returns_repaired_json(self):
+        adapter = SubtitleAnalyzerAdapter(
+            api_key="sk-test",
+            model="test-model",
+            base_url="https://example.test/v1",
+            provider="openai",
+        )
+        repaired = json.dumps({"items": []}, ensure_ascii=False)
+
+        with mock.patch.object(adapter, "_run_async_safely", return_value=repaired):
+            result = adapter.repair_narration_script(
+                short_name="测试短剧",
+                plot_analysis="",
+                subtitle_content="# 视频 1: 1.mp4",
+                invalid_script="{bad}",
+                validation_errors="时间戳错误",
+                narration_language="简体中文（中国）",
+            )
+
+        self.assertEqual("success", result["status"])
+        self.assertEqual(repaired, result["narration_script"])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/llm/unified_service.py
+++ b/app/services/llm/unified_service.py
@ -12,6 +12,7 @@ from loguru import logger
 from .manager import LLMServiceManager
 from .validators import OutputValidator
 from .exceptions import LLMServiceError
+from app.services.prompts import PromptManager

 # 提供商注册由 webui.py:main() 显式调用（见 LLM 提供商注册机制重构）
 # 这样更可靠，错误也更容易调试
@ -107,6 +108,37 @@ class UnifiedLLMService:
        except Exception as e:
            logger.error(f"文本生成失败: {str(e)}")
            raise LLMServiceError(f"文本生成失败: {str(e)}")
+
+    @staticmethod
+    async def generate_text_stream(prompt: str,
+                                 system_prompt: Optional[str] = None,
+                                 provider: Optional[str] = None,
+                                 temperature: float = 1.0,
+                                 max_tokens: Optional[int] = None,
+                                 response_format: Optional[str] = None,
+                                 on_chunk=None,
+                                 **kwargs) -> str:
+        """
+        流式生成文本内容；不支持流式的 provider 会退化为一次性返回。
+        """
+        try:
+            text_provider = LLMServiceManager.get_text_provider(provider)
+            result = await text_provider.generate_text_stream(
+                prompt=prompt,
+                system_prompt=system_prompt,
+                temperature=temperature,
+                max_tokens=max_tokens,
+                response_format=response_format,
+                on_chunk=on_chunk,
+                **kwargs
+            )
+
+            logger.info(f"流式文本生成完成，生成内容长度: {len(result)} 字符")
+            return result
+
+        except Exception as e:
+            logger.error(f"流式文本生成失败: {str(e)}")
+            raise LLMServiceError(f"流式文本生成失败: {str(e)}")
    
    @staticmethod
    async def generate_narration_script(prompt: str,
@ -162,6 +194,7 @@ class UnifiedLLMService:
    async def analyze_subtitle(subtitle_content: str,
                             provider: Optional[str] = None,
                             temperature: float = 1.0,
+                             prompt_category: str = "short_drama_narration",
                             validate_output: bool = True,
                             **kwargs) -> str:
        """
@ -181,12 +214,20 @@ class UnifiedLLMService:
            LLMServiceError: 服务调用失败时抛出
        """
        try:
-            # 构建分析提示词
-            system_prompt = "你是一位专业的剧本分析师和剧情概括助手。请仔细分析字幕内容，提取关键剧情信息。"
+            prompt = PromptManager.get_prompt(
+                category=prompt_category,
+                name="plot_analysis",
+                parameters={"subtitle_content": subtitle_content},
+            )
+            prompt_object = PromptManager.get_prompt_object(
+                category=prompt_category,
+                name="plot_analysis",
+            )
+            system_prompt = prompt_object.get_system_prompt()
            
            # 生成分析结果
            result = await UnifiedLLMService.generate_text(
-                prompt=subtitle_content,
+                prompt=prompt,
                system_prompt=system_prompt,
                provider=provider,
                temperature=temperature,
--- a/app/services/llm/validators.py
+++ b/app/services/llm/validators.py
@ -113,6 +113,8 @@ class OutputValidator:
                            "required": ["_id", "timestamp", "picture", "narration"],
                            "properties": {
                                "_id": {"type": "number"},
+                                "video_id": {"type": "number"},
+                                "video_name": {"type": "string"},
                                "timestamp": {"type": "string"},
                                "picture": {"type": "string"},
                                "narration": {"type": "string"},
@ -161,6 +163,16 @@ class OutputValidator:
        item_id = item.get("_id")
        if not isinstance(item_id, (int, float)) or item_id <= 0:
            raise ValidationError(f"第{index+1}项ID必须为正整数: {item_id}", "invalid_id")
+
+        video_id = item.get("video_id")
+        if video_id not in (None, "") and (
+            not isinstance(video_id, (int, float)) or video_id <= 0
+        ):
+            raise ValidationError(f"第{index+1}项video_id必须为正整数: {video_id}", "invalid_video_id")
+
+        video_name = item.get("video_name")
+        if video_name not in (None, "") and not isinstance(video_name, str):
+            raise ValidationError(f"第{index+1}项video_name必须为字符串: {video_name}", "invalid_video_name")
    
    @staticmethod
    def validate_subtitle_analysis(output: str) -> str:
--- a/app/services/merger_video.py
+++ b/app/services/merger_video.py
@ -9,6 +9,7 @@
 '''

 import os
+import json
 import shutil
 import subprocess
 from enum import Enum
@ -127,6 +128,188 @@ def create_ffmpeg_concat_file(video_paths: List[str], concat_file_path: str) ->
    return concat_file_path


+def _get_video_stream_signature(video_path: str) -> Optional[dict]:
+    """
+    获取用于判断 concat copy 是否安全的视频流关键参数。
+    """
+    probe_cmd = [
+        'ffprobe', '-v', 'error',
+        '-select_streams', 'v:0',
+        '-show_entries',
+        'stream=codec_name,profile,width,height,pix_fmt,r_frame_rate,avg_frame_rate,time_base,sample_aspect_ratio',
+        '-of', 'json',
+        video_path
+    ]
+
+    try:
+        result = subprocess.run(
+            probe_cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            check=True
+        )
+        streams = json.loads(result.stdout or "{}").get("streams", [])
+        if not streams:
+            logger.warning(f"视频没有可用的视频流，不能使用 copy 合并: {video_path}")
+            return None
+
+        stream = streams[0]
+        return {
+            "codec_name": stream.get("codec_name"),
+            "profile": stream.get("profile"),
+            "width": stream.get("width"),
+            "height": stream.get("height"),
+            "pix_fmt": stream.get("pix_fmt"),
+            "r_frame_rate": stream.get("r_frame_rate"),
+            "avg_frame_rate": stream.get("avg_frame_rate"),
+            "time_base": stream.get("time_base"),
+            "sample_aspect_ratio": stream.get("sample_aspect_ratio", "1:1"),
+        }
+    except Exception as e:
+        logger.warning(f"探测视频流参数失败，不能使用 copy 合并: {video_path}, 错误: {str(e)}")
+        return None
+
+
+def _can_concat_video_copy(video_paths: List[str]) -> bool:
+    """
+    判断所有片段的视频流参数是否一致，避免 concat copy 造成时间轴或封装异常。
+    """
+    if not video_paths:
+        return False
+
+    signatures = []
+    for video_path in video_paths:
+        signature = _get_video_stream_signature(video_path)
+        if not signature:
+            return False
+        signatures.append(signature)
+
+    base_signature = signatures[0]
+    for video_path, signature in zip(video_paths[1:], signatures[1:]):
+        if signature != base_signature:
+            logger.warning(
+                "视频片段参数不一致，跳过 copy 合并并回退重编码: "
+                f"{video_path}, 基准={base_signature}, 当前={signature}"
+            )
+            return False
+
+    return True
+
+
+def _get_media_duration(video_path: str) -> Optional[float]:
+    probe_cmd = [
+        'ffprobe', '-v', 'error',
+        '-show_entries', 'format=duration',
+        '-of', 'csv=p=0',
+        video_path
+    ]
+
+    try:
+        result = subprocess.run(
+            probe_cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            check=True
+        )
+        return float(result.stdout.strip())
+    except Exception as e:
+        logger.warning(f"探测视频时长失败: {video_path}, 错误: {str(e)}")
+        return None
+
+
+def _concat_duration_matches(video_paths: List[str], output_path: str) -> bool:
+    input_durations = []
+    for video_path in video_paths:
+        duration = _get_media_duration(video_path)
+        if duration is None:
+            return False
+        input_durations.append(duration)
+
+    output_duration = _get_media_duration(output_path)
+    if output_duration is None:
+        return False
+
+    expected_duration = sum(input_durations)
+    diff = abs(expected_duration - output_duration)
+    tolerance = max(0.5, len(video_paths) * 0.04)
+    if diff > tolerance:
+        logger.warning(
+            "视频流 copy 合并后的时长偏差过大，将回退重编码: "
+            f"期望={expected_duration:.3f}s, 实际={output_duration:.3f}s, 偏差={diff:.3f}s"
+        )
+        return False
+
+    logger.info(
+        "视频流 copy 合并时长校验通过: "
+        f"期望={expected_duration:.3f}s, 实际={output_duration:.3f}s"
+    )
+    return True
+
+
+def _build_concat_video_copy_cmd(concat_file: str, output_path: str) -> List[str]:
+    return [
+        'ffmpeg', '-y',
+        '-f', 'concat',
+        '-safe', '0',
+        '-i', concat_file,
+        '-c:v', 'copy',
+        '-an',
+        '-movflags', '+faststart',
+        '-avoid_negative_ts', 'make_zero',
+        output_path
+    ]
+
+
+def _build_concat_video_reencode_cmd(concat_file: str, output_path: str, threads: int) -> List[str]:
+    return [
+        'ffmpeg', '-y',
+        '-f', 'concat',
+        '-safe', '0',
+        '-i', concat_file,
+        '-c:v', 'libx264',
+        '-preset', 'medium',
+        '-profile:v', 'high',
+        '-an',
+        '-threads', str(threads),
+        output_path
+    ]
+
+
+def _concat_video_streams(
+        video_paths: List[str],
+        concat_file: str,
+        output_path: str,
+        threads: int
+) -> None:
+    """
+    优先使用无损 copy 合并视频流，失败时回退到原来的重编码合并。
+    """
+    if _can_concat_video_copy(video_paths):
+        copy_cmd = _build_concat_video_copy_cmd(concat_file, output_path)
+        try:
+            subprocess.run(copy_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+            if _concat_duration_matches(video_paths, output_path):
+                logger.info("视频流 copy 合并完成")
+                return
+
+            if os.path.exists(output_path):
+                try:
+                    os.remove(output_path)
+                except OSError as e:
+                    logger.warning(f"删除 copy 合并临时结果失败，将继续尝试重编码覆盖: {str(e)}")
+        except subprocess.CalledProcessError as e:
+            error_msg = e.stderr.decode() if e.stderr else str(e)
+            logger.warning(f"视频流 copy 合并失败，将回退重编码合并: {error_msg}")
+    else:
+        logger.info("视频流不满足 copy 合并条件，将使用重编码合并")
+
+    reencode_cmd = _build_concat_video_reencode_cmd(concat_file, output_path, threads)
+    subprocess.run(reencode_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    logger.info("视频流重编码合并完成")
+
+
 def process_single_video(
        input_path: str,
        output_path: str,
@ -474,22 +657,7 @@ def combine_clip_videos(
            concat_file = os.path.join(temp_dir, "concat_list.txt")
            create_ffmpeg_concat_file(video_paths_only, concat_file)

-            # 合并所有视频流，但不包含音频
-            concat_cmd = [
-                'ffmpeg', '-y',
-                '-f', 'concat',
-                '-safe', '0',
-                '-i', concat_file,
-                '-c:v', 'libx264',
-                '-preset', 'medium',
-                '-profile:v', 'high',
-                '-an',  # 不包含音频
-                '-threads', str(threads),
-                video_concat_path
-            ]
-
-            subprocess.run(concat_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-            logger.info("视频流合并完成")
+            _concat_video_streams(video_paths_only, concat_file, video_concat_path, threads)

            # 2. 提取并合并有音频的片段
            audio_segments = [video for video in processed_videos if video["keep_audio"]]
--- a/app/services/prompts/init.py
+++ b/app/services/prompts/init.py
@ -56,11 +56,13 @@ __all__ = [
 def initialize_prompts():
    """初始化提示词模块，注册所有提示词"""
    from . import documentary
+    from . import film_tv_narration
    from . import short_drama_editing  
    from . import short_drama_narration
    
    # 注册各模块的提示词
    documentary.register_prompts()
+    film_tv_narration.register_prompts()
    short_drama_editing.register_prompts()
    short_drama_narration.register_prompts()

--- a/app/services/prompts/film_tv_narration/init.py
+++ b/app/services/prompts/film_tv_narration/init.py
@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: NarratoAI
+@File   : __init__.py
+@Description: 影视解说提示词模块
+"""
+
+from .plot_analysis import PlotAnalysisPrompt
+from .narration_copy import NarrationCopyPrompt
+from .segment_planning import SegmentPlanningPrompt
+from .script_generation import ScriptGenerationPrompt
+from .script_matching import ScriptMatchingPrompt
+from .script_repair import ScriptRepairPrompt
+from ..manager import PromptManager
+
+
+def register_prompts():
+    """注册影视解说相关的提示词"""
+    plot_analysis_prompt = PlotAnalysisPrompt()
+    PromptManager.register_prompt(plot_analysis_prompt, is_default=True)
+
+    narration_copy_prompt = NarrationCopyPrompt()
+    PromptManager.register_prompt(narration_copy_prompt, is_default=True)
+
+    segment_planning_prompt = SegmentPlanningPrompt()
+    PromptManager.register_prompt(segment_planning_prompt, is_default=True)
+
+    script_generation_prompt = ScriptGenerationPrompt()
+    PromptManager.register_prompt(script_generation_prompt, is_default=True)
+
+    script_matching_prompt = ScriptMatchingPrompt()
+    PromptManager.register_prompt(script_matching_prompt, is_default=True)
+
+    script_repair_prompt = ScriptRepairPrompt()
+    PromptManager.register_prompt(script_repair_prompt, is_default=True)
+
+
+__all__ = [
+    "PlotAnalysisPrompt",
+    "NarrationCopyPrompt",
+    "SegmentPlanningPrompt",
+    "ScriptGenerationPrompt",
+    "ScriptMatchingPrompt",
+    "ScriptRepairPrompt",
+    "register_prompts",
+]
--- a/app/services/prompts/film_tv_narration/narration_copy.py
+++ b/app/services/prompts/film_tv_narration/narration_copy.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 影视解说-解说文案
+@File   : narration_copy.py
+@Description: 生成可供用户审核修改的影视解说正文
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class NarrationCopyPrompt(ParameterizedPrompt):
+    """影视解说正文生成提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="narration_copy",
+            category="film_tv_narration",
+            version="v1.0",
+            description="基于剧情理解和字幕生成可审核修改的影视解说正文，不绑定时间戳",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.TEXT,
+            tags=["影视", "解说文案", "电影解说", "剧情承接", "用户审核"],
+            parameters=["drama_name", "drama_genre", "plot_analysis", "subtitle_content", "narration_language"],
+        )
+        super().__init__(metadata, required_parameters=["drama_name", "plot_analysis", "subtitle_content"])
+
+        self._system_prompt = (
+            "你是一位影视解说文案创作者。你只输出可供用户审核修改的解说正文，"
+            "不要输出JSON、时间戳、编号、标题、解释或Markdown。"
+        )
+
+    def get_template(self) -> str:
+        return """# 影视解说正文创作任务
+
+## 目标
+为影视作品《${drama_name}》创作一份可直接给用户审核修改的解说文案正文。此阶段不做画面匹配，不输出时间戳。
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 原始字幕
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 输出语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的影视类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 类型写作规则
+必须按用户选择的影视类型调整表达重点，不要自行改判类型：
+- 剧情/情感：突出人物选择、关系裂痕、命运压力和情绪余波。
+- 悬疑/犯罪：突出线索、疑点、动机、误导和未揭开的真相。
+- 动作/冒险：突出目标、危险升级、身体对抗和关键抉择。
+- 喜剧/轻松：突出误会、反差、节奏包袱和人物可爱处。
+- 科幻/奇幻：突出设定规则、未知威胁、世界观反差和代价。
+- 历史/战争：突出时代处境、阵营选择、牺牲和局势变化。
+- 恐怖/惊悚：突出异常细节、压迫感、未知危险和心理悬念。
+- 自定义类型：严格服从用户填写的类型方向。
+
+## 开头钩子公式
+开头必须使用“人物困境 + 反常信息 + 悬念问题”：
+1. 先点出主角或关键人物正在面对什么压力。
+2. 再抛出一个违背常识、关系突变或危险升级的信息。
+3. 最后留下观众想继续看的问题：他为什么这样做、谁在撒谎、这场选择会把所有人推向哪里。
+
+## 写作规则
+1. 必须使用 ${narration_language}。
+2. 严格基于剧情理解和字幕事实，不编造核心情节、身份、结局。
+3. 先写清楚人物动机和因果链，再写情绪金句；不要只堆形容词。
+4. 每句话只表达一个信息点，适合后续按句匹配画面。
+5. 句子尽量短，单句优先 15-35 字；信息复杂时拆成多句。
+6. 每 2-3 句要有明确承接，让观众知道为什么从上一幕来到下一幕。
+7. 总长度控制在 350-750 字；短素材取下限，长素材取上限。
+8. 不要使用编号、项目符号、章节标题或括号说明。
+
+## 输出要求
+只输出解说正文。不要输出 JSON、时间戳、代码块或任何解释。"""
--- a/app/services/prompts/film_tv_narration/plot_analysis.py
+++ b/app/services/prompts/film_tv_narration/plot_analysis.py
@ -0,0 +1,99 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 影视解说-剧情分析
+@File   : plot_analysis.py
+@Description: 影视剧情分析提示词
+"""
+
+from ..base import TextPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class PlotAnalysisPrompt(TextPrompt):
+    """影视剧情分析提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="plot_analysis",
+            category="film_tv_narration",
+            version="v1.0",
+            description="结合字幕和可选联网检索上下文，输出适合影视解说脚本生成的结构化剧情理解",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.TEXT,
+            tags=["影视", "电影", "电视剧", "剧情分析", "字幕解析", "解说脚本素材"],
+            parameters=["subtitle_content"],
+        )
+        super().__init__(metadata)
+
+        self._system_prompt = (
+            "你是一位专业的影视解说策划和剧作分析师。请输出克制、结构化、"
+            "可直接供下游影视解说脚本生成使用的剧情理解材料。"
+        )
+
+    def get_template(self) -> str:
+        return """# 角色
+你是一位专业的影视解说策划和剧作分析师。你的输出不是给观众看的成片文案，而是给下游“影视解说脚本生成器”使用的结构化剧情理解材料。
+
+# 输入说明
+下面的输入可能只包含一个视频的原始字幕，也可能包含多个视频文件的字幕；也可能同时包含联网检索结果和原始字幕。
+- 联网检索结果只能用于辅助识别作品名称、人物关系、时代背景、公开剧情梗概。
+- 原始字幕是唯一可信的当前片段事实来源。
+- 如果联网检索结果与字幕冲突，必须以字幕为准。
+- 如果联网检索结果包含当前字幕尚未出现的后续剧情，只能放在“字幕未覆盖/需谨慎信息”中，不能写进当前剧情事实。
+- 多个视频字幕会以“视频 1: 文件名”“视频 2: 文件名”等标题分隔。时间戳均为对应视频内部时间，不是拼接后的累计时间。
+
+# 核心任务
+请基于输入完成剧情理解，目标是帮助后续生成高质量影视解说脚本：
+1. 识别作品名称、当前字幕范围、视频来源、联网检索辅助信息和字幕事实边界。
+2. 统一人物称呼，梳理人物关系、动机和当前场景中的立场变化。
+3. 用 120-220 字概括当前字幕覆盖的剧情，不提前剧透字幕未出现的内容。
+4. 按视频来源和字幕时间顺序拆分关键剧情段落，并为每段标注准确 video_id / video_name / 时间戳。
+5. 提炼解说创作可用的开场钩子、人物困境、情绪转折、信息反转、名场面和建议保留原声片段。
+
+# 强制输出规则
+1. 禁止输出寒暄、解释身份或“好的，我将……”等聊天式开场。
+2. 禁止编造字幕中没有的具体事件、对白、关系进展或结局。
+3. 时间戳必须直接来自对应视频字幕；无法确定时写“字幕未明确”，不要猜测。
+4. 多视频场景下必须明确每段来自哪个视频文件，禁止把不同视频的同名时间戳混在一起。
+5. 人名必须统一：优先采用联网检索中的正式名称；如果字幕写法不同，在人物表中保留“字幕称呼”。
+6. 内容要简洁、客观、可复用，避免散文化长段落。
+7. 必须严格按照下面的 Markdown 格式输出，不要添加额外章节。
+
+# 输出格式
+## 一、基础识别
+- 作品名称：[如输入可判断则填写，否则写“未知”]
+- 当前字幕范围：[开始时间戳] --> [结束时间戳]；无法确定则写“字幕未明确”
+- 视频来源：[列出视频编号、文件名和各自字幕时间范围；单视频也要写]
+- 联网检索确认：[仅写可辅助理解的公开信息；没有联网结果则写“未启用/未提供”]
+- 字幕内实际出现：[列出当前字幕真实出现的关键事实，2-5 条]
+- 字幕未覆盖/需谨慎信息：[列出联网结果提到但当前字幕未发生的内容；没有则写“无”]
+
+## 二、人物与关系
+| 统一称呼 | 字幕称呼 | 身份/关系 | 当前动机/立场 | 确定性 |
+|---|---|---|---|---|
+| [人物名] | [字幕原文称呼] | [身份或关系] | [在当前片段中的目标、压力或转变] | 字幕明确/联网辅助/合理推断 |
+
+## 三、整体剧情概括
+[120-220 字，只概括当前字幕覆盖的剧情。必须包含核心冲突、人物动机、场景推进和当前悬念。]
+
+## 四、分段剧情解析
+| 视频 | 时间戳 | 段落主题 | 剧情事件 | 叙事功能 |
+|---|---|---|---|---|
+| [video_id + video_name] | [开始] --> [结束] | [简短主题] | [当前段落发生了什么] | [铺垫/冲突升级/人物塑造/反转/悬念/情绪爆发/名场面等] |
+
+## 五、解说创作重点
+- 开场钩子：[用一句话指出最适合开场抓人的冲突、谜题或人物困境]
+- 核心冲突：[当前片段最主要的矛盾]
+- 情绪转折/信息反转：[列 1-3 条，没有则写“无明显”]
+- 名场面/高光对白：[列 1-3 条，没有则写“无明显”]
+- 悬念点：[当前片段留下的疑问或后续期待]
+- 建议保留原声片段：
+  1. [video_id + video_name + 时间戳]：[保留理由；如果没有合适原声，写“无明显”]
+
+## 六、联网信息校验
+- 可用于辅助理解的信息：[联网结果中可帮助理解当前字幕的信息；没有则写“无”]
+- 与字幕不一致或字幕未覆盖的信息：[必须列出，不要混入当前剧情事实；没有则写“无”]
+
+# 输入内容
+${subtitle_content}"""
--- a/app/services/prompts/film_tv_narration/script_generation.py
+++ b/app/services/prompts/film_tv_narration/script_generation.py
@ -0,0 +1,154 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 影视解说-文案画面匹配
+@File   : script_generation.py
+@Description: 影视解说脚本生成提示词
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class ScriptGenerationPrompt(ParameterizedPrompt):
+    """影视解说脚本生成提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="script_generation",
+            category="film_tv_narration",
+            version="v1.0",
+            description="基于已规划片段生成高质量影视解说脚本，重点补足人物动机、信息承接和剧情因果",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.JSON,
+            tags=["影视", "解说脚本", "文案生成", "原声片段", "悬念", "名场面"],
+            parameters=[
+                "drama_name",
+                "drama_genre",
+                "plot_analysis",
+                "subtitle_content",
+                "segment_plan",
+                "narration_language",
+            ],
+        )
+        super().__init__(metadata, required_parameters=["drama_name", "plot_analysis", "segment_plan"])
+
+        self._system_prompt = (
+            "你是一位影视解说文案写手。你必须严格按照JSON格式输出，"
+            "只能补充picture和narration，不能改动上游片段规划中的_id、video_id、video_name、timestamp和OST。"
+        )
+
+    def get_template(self) -> str:
+        return """# 影视解说脚本文案生成任务
+
+## 任务目标
+为影视作品《${drama_name}》生成最终可剪辑解说脚本。片段已经由上游规划完成，你只能补充 picture 和 narration，不能改变片段来源和时间戳。
+
+## 输入材料
+
+### 剧情概述
+<plot>
+${plot_analysis}
+</plot>
+
+### 已规划片段（必须逐项照抄结构字段）
+<segment_plan>
+${segment_plan}
+</segment_plan>
+
+### 原始字幕（含视频编号和精确时间戳）
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+### 解说台词语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+### 用户选择的影视类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+字幕可能来自多个视频文件。每个字幕分段标题会以“视频 1: 文件名”“视频 2: 文件名”等形式标识来源。
+生成脚本时必须把每个片段绑定到对应视频来源，时间戳表示该视频文件内部的局部时间，不是把多个视频拼接后的全局时间。
+所有 OST=0 的 narration 字段必须使用上方指定的解说台词语言输出；不要因为原始字幕是其他语言就切回字幕原语言。
+OST=1 的原声片段 narration 字段必须继续使用“播放原片+序号”格式，不要翻译这个固定标记。
+
+## 绝对绑定规则
+0. 最高优先级：如果 segment_plan 中混入片头、片尾、演职员表、版权声明、平台水印展示、下集预告、花絮、赞助口播、商品露出、贴片广告、中插广告、片中广告或任何与主线剧情无关的推广片段，必须直接删除这些片段，绝对不能输出到最终 items；此规则高于下面所有“照抄 segment_plan”的绑定规则。
+1. 除被第 0 条删除的片头、片尾和广告片段外，输出 items 数量、顺序和 _id 必须与 segment_plan 完全一致。
+2. 除被第 0 条删除的片头、片尾和广告片段外，每个 item 的 _id、video_id、video_name、timestamp、OST 必须逐字复制 segment_plan，不得新增、合并、拆分或改动。
+3. 你只能补充 picture 和 narration 两个字段。
+4. OST=1 的 narration 必须写成“播放原片+_id”，例如 _id 为 5 时写“播放原片5”。
+5. OST=0 的 narration 必须使用 ${narration_language}，并严格基于剧情和字幕，不虚构字幕外的具体事件。
+
+## 叙事连续性要求
+- 你必须把每个 OST=0 当成“观众理解剧情的桥”，不能只概括当前画面。
+- 每个 OST=0 narration 要尽量回答：上一段发生了什么、人物为什么这么做、这一段带来什么新信息或新危机。
+- 跨 video_id 或跨时间大跳跃时，OST=0 必须明确补出承接句，例如“真正危险的不是这场争吵，而是他终于发现证据指向了身边人”。
+- 原声片段前后的 OST=0 要解释原声的重要性，避免观众只看到对白片段合集。
+- 如果 segment_plan 中有 story_role、intent、transition 字段，必须利用它们组织 narration，但不要把这些字段输出到最终 JSON。
+- 结尾 OST=0 要留下后续阻力、真相疑问或人物选择；如果结尾是 OST=1，则前一个 OST=0 必须提前点出这段原声会把矛盾推向哪里。
+
+## 开头钩子要求
+- 第一段必须是 OST=0 解说钩子，不能直接播放原片。
+- 开头用“人物困境 + 反常信息 + 悬念问题”：主角压力 + 异常线索/关系突变 + 后续疑问。
+- 写法示例方向：他以为这只是一次普通问询，可一句话之后，所有证据都指向了他最信任的人。
+- 示例只用于理解公式，必须基于当前字幕事实原创，不要夸大到字幕没有的情节。
+
+## 解说密度与画面节奏
+- OST=0 文案必须能被当前 timestamp 的画面承载，按“解说字数 / 5 = 所需视频秒数”估算。
+- 如果画面只有 6 秒，就不要写 80 字；应压缩到约 30 字，或依赖 segment_plan 选择更长画面。
+- 优先短句，单句只表达一个信息点；不要把人物介绍、前因、反转和悬念全塞进一个短画面。
+- 长信息要拆成多段，每段只承担一个叙事功能，让画面节奏跟上解说。
+
+## 用户选择类型文案规则
+影视类型由用户手动选择为 ${drama_genre}，不得自行改判。必须按对应方向写：
+- 剧情/情感：突出人物选择、关系裂痕、命运压力和情绪余波。
+- 悬疑/犯罪：突出线索、疑点、动机、误导和未揭开的真相。
+- 动作/冒险：突出目标、危险升级、身体对抗和关键抉择。
+- 喜剧/轻松：突出误会、反差、节奏包袱和人物可爱处。
+- 科幻/奇幻：突出设定规则、未知威胁、世界观反差和代价。
+- 历史/战争：突出时代处境、阵营选择、牺牲和局势变化。
+- 恐怖/惊悚：突出异常细节、压迫感、未知危险和心理悬念。
+- 自定义类型：严格服从用户填写的类型方向。
+
+## 文案质量要求
+- 开场片段要有强钩子，直接点出冲突、疑点或人物困境。
+- 最终剪辑脚本不得包含片头、片尾或任何广告片段；如果字幕内容明显属于非剧情推广，不要把它包装成剧情解说。
+- 每段解说优先 25-90 字，具体长度必须服从画面时长；短画面宁可少说，不要密集灌信息。
+- 可以使用“可真正的问题是”“而他还不知道”“这句话背后”“危险已经开始靠近”等影视解说转折语，但不要堆砌。
+- picture 要描述画面和人物状态，便于后期识别素材。
+- 少用孤立信息句，多用承接句；不要让观众感觉剧情突然跳场。
+- 不要解释规则，不要输出 Markdown，不要输出代码块。
+
+## 输出格式
+
+请严格按照以下JSON格式输出，绝不添加任何其他文字、说明或代码块标记：
+
+{
+  "items": [
+    {
+        "_id": 1,
+        "video_id": 1,
+        "video_name": "1.mp4",
+        "timestamp": "00:00:01,000-00:00:05,500",
+        "picture": "男主站在审讯室门口，神情紧张地看向桌上的证据袋",
+        "narration": "他以为这只是一次普通问询，可桌上的证据却把所有矛头指向了自己。",
+        "OST": 0
+    },
+    {
+        "_id": 2,
+        "video_id": 1,
+        "video_name": "1.mp4",
+        "timestamp": "00:00:05,500-00:00:08,000",
+        "picture": "警官低声质问，男主沉默不语",
+        "narration": "播放原片2",
+        "OST": 1
+    }
+  ]
+}
+
+现在请基于以上要求，为影视作品《${drama_name}》创作解说脚本："""
--- a/app/services/prompts/film_tv_narration/script_matching.py
+++ b/app/services/prompts/film_tv_narration/script_matching.py
@ -0,0 +1,133 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 影视解说-文案画面匹配
+@File   : script_matching.py
+@Description: 将用户审核后的影视解说文案匹配到字幕时间戳并生成最终剪辑脚本
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class ScriptMatchingPrompt(ParameterizedPrompt):
+    """影视解说文案画面匹配提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="script_matching",
+            category="film_tv_narration",
+            version="v1.0",
+            description="将审核后的影视解说文案按叙事节奏拆分，并匹配到字幕时间戳生成最终剪辑JSON",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.JSON,
+            tags=["影视", "画面匹配", "剪辑脚本", "时间戳", "用户文案"],
+            parameters=[
+                "drama_name",
+                "drama_genre",
+                "plot_analysis",
+                "subtitle_content",
+                "narration_copy",
+                "narration_language",
+                "original_sound_ratio",
+            ],
+        )
+        super().__init__(
+            metadata,
+            required_parameters=["drama_name", "subtitle_content", "narration_copy"],
+        )
+
+        self._system_prompt = (
+            "你是一位懂影视叙事节奏的剪辑师。你必须严格输出JSON，"
+            "核心任务是把用户审核后的解说文案逐句匹配到最合适的原视频字幕时间戳。"
+        )
+
+    def get_template(self) -> str:
+        return """# 影视解说文案画面匹配任务
+
+## 目标
+用户已经审核并修改了解说文案。请根据这份文案和原始字幕，生成最终可剪辑 JSON 脚本。
+
+## 作品名
+${drama_name}
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 用户审核后的解说文案
+<narration_copy>
+${narration_copy}
+</narration_copy>
+
+## 原始字幕（含视频编号和局部时间戳）
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 输出语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的影视类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 用户选择的原片占比
+<original_sound_ratio>
+${original_sound_ratio}%
+</original_sound_ratio>
+
+## 匹配流程
+1. 先按句号、问号、感叹号、省略号切分解说文案，得到候选解说句。
+2. 逗号只在明显分割两个动作、场景、观点或描述对象时切分；不要切出没有独立意义的碎片。
+3. 不要求每个候选句都单独输出为 OST=0；可以合并、压缩相邻候选句作为剧情桥段，但不能改变用户文案的核心意思。
+4. 严禁把解说文案匹配到片头、片尾、演职员表、版权声明、平台水印展示、下集预告、花絮、赞助口播、商品露出、贴片广告、中插广告、片中广告或任何与主线剧情无关的推广片段；这些内容绝对不能进入最终 items。
+5. 如果字幕或画面文字出现“广告”“赞助”“推广”“片头”“片尾”“预告”“下集”“扫码”“购买”“会员”“关注”等明显非剧情信号，必须跳过对应时间段，不得用作 OST=0 或 OST=1。
+6. 为每个解说片段寻找最匹配的原始字幕画面，优先选择能表达该句核心含义、人物状态或信息转折的画面。
+7. 使用公式估算所需画面时长：所需秒数 = 解说字数 / 5。匹配画面时长尽量接近，误差优先控制在 ±0.5 秒。
+8. 如果一句解说太长，必须拆成多个 OST=0 片段，分别匹配不同或连续画面。
+9. timestamp 必须使用对应 video_id 内部局部时间戳，不得换算为多个视频拼接后的累计时间。
+10. 同一 video_id 内时间段不得交叉或重叠。
+11. 第一段必须是 OST=0 解说钩子，不能直接播放原片。
+12. OST=1 原声片段的总时长占比要尽量接近用户选择的 ${original_sound_ratio}%。这里按最终 items 的 timestamp 总时长估算，不按片段数量估算。
+13. 不要自行判断或改写影视类型；画面匹配和 picture 描述要服务用户选择的 ${drama_genre} 叙事重点。
+
+## 原片占比规则
+- ${original_sound_ratio}% = 0% 时，不要输出 OST=1，全部使用解说承接。
+- ${original_sound_ratio}% 在 10%-30% 时，只保留关键对白、信息反转、情绪爆发或名场面原声。
+- ${original_sound_ratio}% 在 40%-60% 时，解说负责串联因果，原片负责承载关键场面和对白。
+- ${original_sound_ratio}% 在 70%-90% 时，以原片对白和表演为主，解说只做开场钩子、转场桥和必要补充。
+- 如果原片占比与“第一段必须 OST=0”冲突，优先保证第一段是 OST=0，然后在后续片段提高 OST=1 时长占比。
+- 选择高原片占比时，可以把用户文案合并成更少的 OST=0 桥段，不要为了逐句使用文案而压低原片占比。
+
+## 字段规则
+- _id：从 1 开始连续递增。
+- video_id：来自字幕分段标题，例如“视频 2”就填 2。
+- video_name：对应视频文件名，必须从字幕分段标题提取。
+- timestamp：格式为 "HH:MM:SS,mmm-HH:MM:SS,mmm"。
+- picture：描述匹配画面中人物、动作、情绪、场景和关键道具。
+- narration：OST=0 时填写用户文案片段；OST=1 时填写“播放原片+_id”。
+- OST：解说片段填 0，原声片段填 1。
+
+## 输出格式
+只输出严格 JSON：
+
+{
+  "items": [
+    {
+      "_id": 1,
+      "video_id": 1,
+      "video_name": "1.mp4",
+      "timestamp": "00:00:01,000-00:00:06,000",
+      "picture": "主角站在走廊尽头，回头看向紧闭的房门",
+      "narration": "他以为自己终于逃出了那间房，可真正的危险，其实才刚刚醒来。",
+      "OST": 0
+    }
+  ]
+}
+
+现在请基于用户审核后的解说文案生成最终剪辑脚本。"""
--- a/app/services/prompts/film_tv_narration/script_repair.py
+++ b/app/services/prompts/film_tv_narration/script_repair.py
@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 影视解说-脚本修复
+@File   : script_repair.py
+@Description: 影视解说脚本校验失败后的JSON修复提示词
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class ScriptRepairPrompt(ParameterizedPrompt):
+    """影视解说脚本修复提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="script_repair",
+            category="film_tv_narration",
+            version="v1.0",
+            description="根据确定性校验错误修复影视解说脚本JSON，优先修正时间戳、视频来源和格式问题",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.JSON,
+            tags=["影视", "解说脚本", "JSON修复", "时间戳校验", "多视频"],
+            parameters=[
+                "drama_name",
+                "drama_genre",
+                "plot_analysis",
+                "subtitle_content",
+                "invalid_script",
+                "validation_errors",
+                "narration_language",
+            ],
+        )
+        super().__init__(
+            metadata,
+            required_parameters=["drama_name", "subtitle_content", "invalid_script", "validation_errors"],
+        )
+
+        self._system_prompt = (
+            "你是一位影视解说脚本JSON修复器。你只能根据校验错误修复JSON，"
+            "必须输出严格JSON，不能输出解释、Markdown或代码块。"
+        )
+
+    def get_template(self) -> str:
+        return """# 影视解说脚本修复任务
+
+## 修复目标
+下面的影视作品《${drama_name}》解说脚本未通过剪辑校验。请只根据校验错误和字幕内容修复它，输出一个完整可剪辑的 JSON。
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 校验错误
+<validation_errors>
+${validation_errors}
+</validation_errors>
+
+## 当前无效脚本
+<invalid_script>
+${invalid_script}
+</invalid_script>
+
+## 可用字幕窗口
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 解说台词目标语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的影视类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 修复规则
+1. 只输出 JSON，不要任何解释、标题、Markdown 或代码块。
+2. 输出根对象必须是 {"items": [...]}。
+3. 每个 item 必须包含 _id、video_id、video_name、timestamp、picture、narration、OST。
+4. 必须删除片头、片尾、演职员表、版权声明、平台水印展示、下集预告、花絮、赞助口播、商品露出、贴片广告、中插广告、片中广告或任何与主线剧情无关的推广片段；这些内容绝对不能出现在修复后的 items 中。
+5. 如果字幕或画面文字出现“广告”“赞助”“推广”“片头”“片尾”“预告”“下集”“扫码”“购买”“会员”“关注”等明显非剧情信号，必须删除对应 item，不得改写成解说片段。
+6. video_id、video_name 和 timestamp 必须来自对应字幕窗口；不得把不同视频的同名时间戳混用。
+7. 同一 video_id 内片段不得交叉或重叠。
+8. OST=1 的 narration 必须是“播放原片+序号”；OST=0 的 narration 必须使用 ${narration_language}。
+9. 禁止连续 3 个或更多 OST=1；必须插入或改写 OST=0 解说片段承接剧情。
+10. 跨 video_id 切换前后不能都是 OST=1；必须至少有一个 OST=0 片段解释场景和剧情为什么切换。
+11. OST=0 narration 要补足人物动机、信息承接和因果转折，不要只概括当前画面。
+12. 第一段必须是 OST=0 解说钩子，按“人物困境 + 反常信息 + 悬念问题”写，不要直接播放原片。
+13. OST=0 文案必须匹配画面时长，按“解说字数 / 5 = 所需视频秒数”估算；过密时要缩短文案、延长时间戳或拆成多个片段。
+14. 不要自行改判影视类型；如需改写 narration，必须按用户选择的 ${drama_genre} 保持表达重点。
+15. 尽量保留原脚本中没有错误的片段；无法修复的片段可以删除，但剩余片段必须重新按 1 开始编号。
+
+请输出修复后的完整 JSON。"""
--- a/app/services/prompts/film_tv_narration/segment_planning.py
+++ b/app/services/prompts/film_tv_narration/segment_planning.py
@ -0,0 +1,105 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 影视解说-片段规划
+@File   : segment_planning.py
+@Description: 影视解说脚本片段规划提示词
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class SegmentPlanningPrompt(ParameterizedPrompt):
+    """影视解说片段规划提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="segment_planning",
+            category="film_tv_narration",
+            version="v1.0",
+            description="基于剧情理解和原始字幕规划可剪辑片段，优先保证影视叙事连续性和原声解说节奏",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.JSON,
+            tags=["影视", "解说脚本", "片段规划", "时间戳", "多视频", "原声"],
+            parameters=["drama_name", "drama_genre", "plot_analysis", "subtitle_content", "narration_language"],
+        )
+        super().__init__(metadata, required_parameters=["drama_name", "plot_analysis", "subtitle_content"])
+
+        self._system_prompt = (
+            "你是一位影视解说剪辑规划师。你的任务是从字幕中选择可剪辑片段，"
+            "必须严格输出JSON，不能写解说文案，不能输出Markdown或额外说明。"
+        )
+
+    def get_template(self) -> str:
+        return """# 影视解说片段规划任务
+
+## 目标
+为影视作品《${drama_name}》规划一组可直接剪辑的视频片段。你只负责选片段和标注用途，不写最终解说台词。
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 原始字幕（含视频编号和局部时间戳）
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 解说台词目标语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的影视类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 叙事规划目标
+你不是在挑精彩片段合集，而是在规划一条观众能顺着看懂的影视解说故事线。必须先想清楚“人物处境 -> 事件触发 -> 关系或信息变化 -> 新危机 -> 悬念”的因果链，再选片段。
+
+## 开场钩子规则
+第一段必须是 OST=0 解说开场，不要直接播放原片。开头参考“人物困境 + 反常信息 + 悬念问题”的公式：
+- 先给人物一个明确压力：被误解、被追捕、被迫选择、失去重要之人、发现异常线索。
+- 再给一个反常信息：熟人背叛、证据失效、规则被打破、危险提前出现。
+- 最后抛出问题：谁在说谎、真相藏在哪里、这次选择会付出什么代价。
+- 不要照抄示例，要基于字幕事实改写成当前作品自己的钩子。
+
+## 规划规则
+1. 只能使用原始字幕中真实存在的视频编号、视频文件名和时间范围。
+2. timestamp 必须是对应 video_id 内部的局部时间戳，禁止换算成多个视频拼接后的累计时间。
+3. 同一个 video_id 内的片段不得交叉或重叠；尽量按故事顺序排列。
+4. 严禁选择片头、片尾、演职员表、版权声明、平台水印展示、下集预告、花絮、赞助口播、商品露出、贴片广告、中插广告、片中广告或任何与主线剧情无关的推广片段；这些内容绝对不能进入 segments。
+5. 如果字幕或画面文字出现“广告”“赞助”“推广”“片头”“片尾”“预告”“下集”“扫码”“购买”“会员”“关注”等明显非剧情信号，必须整段跳过，不得用作 OST=0 或 OST=1。
+6. 每个片段必须推动主线、解释人物动机、制造情绪转折、承接原声或保留关键对白。
+7. OST=1 表示保留原声，适合关键对白、情绪爆发、真相揭露、名场面和反转；OST=0 表示后续需要配解说。
+8. 原声片段单段优先控制在 3-10 秒；解说片段可以更长，但必须能从字幕范围中定位。
+9. 影视类型由用户手动选择为 ${drama_genre}，不得自行改判；选片段时优先服务该类型的主要看点。
+10. 禁止连续 3 个或更多 OST=1；每 1-2 个原声片段后必须安排 OST=0 解说片段承接剧情。
+11. 跨 video_id 切换前后必须至少有一个 OST=0 片段作为剧情桥段，解释为什么从上一场转到下一场。
+12. 每个 OST=0 片段必须承担明确叙事功能：开场钩子、人物介绍、因果过渡、信息解释、情绪转折、冲突升级、结尾悬念。
+13. 不要跳过关键因果；关系变化、线索发现、危机升级必须有画面或解说桥段承接。
+14. 结尾优先选择能留下新问题、新危险或人物选择的片段，不要只停在原声对白堆叠上。
+15. 解说画面必须给足时长：按“解说字数 / 5 = 所需视频秒数”预估，短画面不要承载长解说。
+
+## 输出格式
+只输出严格 JSON：
+
+{
+  "segments": [
+    {
+      "_id": 1,
+      "video_id": 1,
+      "video_name": "1.mp4",
+      "timestamp": "00:00:01,000-00:00:05,500",
+      "OST": 0,
+      "story_role": "开场钩子",
+      "intent": "点出主角困境和反常线索，制造继续观看的疑问",
+      "transition": "从当前场景切入人物压力，引出下一段关键对白"
+    }
+  ]
+}
+
+现在请规划影视作品《${drama_name}》的解说片段。"""
--- a/app/services/prompts/short_drama_narration/init.py
+++ b/app/services/prompts/short_drama_narration/init.py
@ -10,7 +10,11 @@
 """

 from .plot_analysis import PlotAnalysisPrompt
+from .narration_copy import NarrationCopyPrompt
+from .segment_planning import SegmentPlanningPrompt
 from .script_generation import ScriptGenerationPrompt
+from .script_matching import ScriptMatchingPrompt
+from .script_repair import ScriptRepairPrompt
 from ..manager import PromptManager


@ -20,14 +24,34 @@ def register_prompts():
    # 注册剧情分析提示词
    plot_analysis_prompt = PlotAnalysisPrompt()
    PromptManager.register_prompt(plot_analysis_prompt, is_default=True)
+
+    # 注册可审核解说文案提示词
+    narration_copy_prompt = NarrationCopyPrompt()
+    PromptManager.register_prompt(narration_copy_prompt, is_default=True)
+
+    # 注册片段规划提示词
+    segment_planning_prompt = SegmentPlanningPrompt()
+    PromptManager.register_prompt(segment_planning_prompt, is_default=True)
    
    # 注册解说脚本生成提示词
    script_generation_prompt = ScriptGenerationPrompt()
    PromptManager.register_prompt(script_generation_prompt, is_default=True)

+    # 注册文案画面匹配提示词
+    script_matching_prompt = ScriptMatchingPrompt()
+    PromptManager.register_prompt(script_matching_prompt, is_default=True)
+
+    # 注册解说脚本修复提示词
+    script_repair_prompt = ScriptRepairPrompt()
+    PromptManager.register_prompt(script_repair_prompt, is_default=True)
+

 __all__ = [
    "PlotAnalysisPrompt",
+    "NarrationCopyPrompt",
+    "SegmentPlanningPrompt",
    "ScriptGenerationPrompt",
+    "ScriptMatchingPrompt",
+    "ScriptRepairPrompt",
    "register_prompts"
 ]
--- a/app/services/prompts/short_drama_narration/narration_copy.py
+++ b/app/services/prompts/short_drama_narration/narration_copy.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 短剧解说-解说文案
+@File   : narration_copy.py
+@Description: 生成可供用户审核修改的短剧解说正文
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class NarrationCopyPrompt(ParameterizedPrompt):
+    """短剧解说正文生成提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="narration_copy",
+            category="short_drama_narration",
+            version="v1.0",
+            description="基于剧情理解和字幕生成可审核修改的短剧解说正文，不绑定时间戳",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.TEXT,
+            tags=["短剧", "解说文案", "爆款开头", "叙事连续性", "用户审核"],
+            parameters=["drama_name", "drama_genre", "plot_analysis", "subtitle_content", "narration_language"],
+        )
+        super().__init__(metadata, required_parameters=["drama_name", "plot_analysis", "subtitle_content"])
+
+        self._system_prompt = (
+            "你是一位短剧解说文案创作者。你只输出可供用户审核修改的解说正文，"
+            "不要输出JSON、时间戳、编号、标题、解释或Markdown。"
+        )
+
+    def get_template(self) -> str:
+        return """# 短剧解说正文创作任务
+
+## 目标
+为短剧《${drama_name}》创作一份可直接给用户审核修改的解说文案正文。此阶段不做画面匹配，不输出时间戳。
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 原始字幕
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 输出语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的短剧类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 类型写作规则
+必须按用户选择的短剧类型调整表达重点，不要自行改判类型：
+- 霸总/甜宠：突出误会、身份差、暧昧拉扯、守护感和情绪反差。
+- 逆袭/复仇：突出羞辱、反击、打脸、身份揭露和爽点升级。
+- 家庭伦理：突出亲情撕扯、秘密、委屈、选择和道德冲突。
+- 古装/权谋：突出身份、局势、算计、立场和反转。
+- 悬疑/犯罪：突出线索、危机、动机和未揭开的疑问。
+- 都市情感：突出关系裂痕、现实压力、误会和情绪拉扯。
+- 年代/乡村：突出家庭处境、人情压力、生活困境和命运转折。
+- 自定义类型：严格服从用户填写的类型方向。
+
+## 开头钩子公式
+开头必须使用“高能反转 + 情绪冲突 + 悬念钩子”：
+1. 强身份或强处境：兵王、单亲妈妈、被赶出家门的女人、被全家看不起的人等。
+2. 致命反差：刚立功就被迫退役、刚回家就发现钱被输光、刚结婚就遇到孩子/婆婆阻挠。
+3. 后续悬念：真正的噩梦才开始、他要讨回的不是钱、这段关系真正难的不是相爱。
+
+## 写作规则
+1. 必须使用 ${narration_language}。
+2. 严格基于剧情理解和字幕事实，不编造核心情节、身份、结局。
+3. 先写完整故事线，再写金句；不要只堆爆点。
+4. 每句话只表达一个信息点，适合后续按句匹配画面。
+5. 句子尽量短，单句优先 15-35 字；信息复杂时拆成多句。
+6. 每 2-3 句要有明确因果承接，让观众知道为什么从上一幕来到下一幕。
+7. 总长度控制在 300-650 字；短素材取下限，长素材取上限。
+8. 不要使用编号、项目符号、章节标题或括号说明。
+
+## 输出要求
+只输出解说正文。不要输出 JSON、时间戳、代码块或任何解释。"""
--- a/app/services/prompts/short_drama_narration/plot_analysis.py
+++ b/app/services/prompts/short_drama_narration/plot_analysis.py
@ -19,72 +19,79 @@ class PlotAnalysisPrompt(TextPrompt):
        metadata = PromptMetadata(
            name="plot_analysis",
            category="short_drama_narration",
-            version="v1.0",
-            description="分析短剧字幕内容，提供详细的剧情分析和分段解析",
+            version="v1.1",
+            description="结合字幕和可选联网检索上下文，输出适合短剧解说脚本生成的结构化剧情理解",
            model_type=ModelType.TEXT,
            output_format=OutputFormat.TEXT,
-            tags=["短剧", "剧情分析", "字幕解析", "分段分析"],
+            tags=["短剧", "剧情分析", "字幕解析", "分段分析", "联网检索", "解说脚本素材"],
            parameters=["subtitle_content"]
        )
        super().__init__(metadata)
        
-        self._system_prompt = "你是一位专业的剧本分析师和剧情概括助手。"
+        self._system_prompt = "你是一位专业的短剧解说策划和剧本分析师。请输出克制、结构化、可直接供下游解说脚本生成使用的剧情理解材料。"
        
    def get_template(self) -> str:
        return """# 角色
-你是一位专业的剧本分析师和剧情概括助手。
+你是一位专业的短剧解说策划和剧本分析师。你的输出不是给观众看的成片文案，而是给下游“短剧解说脚本生成器”使用的结构化剧情理解材料。

-# 任务
-我将为你提供一部短剧的完整字幕文本。请你基于这些字幕，完成以下任务：
-1.  **整体剧情分析**：简要概括整个短剧的核心剧情脉络、主要冲突和结局（如果有的话）。
-2.  **分段剧情解析与时间戳定位**：
-    *   将整个短剧划分为若干个关键的剧情段落（例如：开端、发展、转折、高潮、结局，或根据具体情节自然划分）。
-    *   段落数应该与字幕长度成正比。
-    *   对于每一个剧情段落：
-        *   **概括该段落的主要内容**：用简洁的语言描述这段剧情发生了什么。
-        *   **标注对应的时间戳范围**：明确指出该剧情段落对应的开始字幕时间戳和结束字幕时间戳。请直接从字幕中提取时间信息。
+# 输入说明
+下面的输入可能只包含一个视频的原始字幕，也可能包含多个视频文件的字幕；也可能同时包含 Tavily 联网检索结果和原始字幕。
+- 联网检索结果只能用于辅助识别短剧名称、人物关系、时代背景、公开剧情梗概。
+- 原始字幕是唯一可信的当前片段事实来源。
+- 如果联网检索结果与字幕冲突，必须以字幕为准。
+- 如果联网检索结果包含当前字幕尚未出现的后续剧情，只能放在“字幕未覆盖/需谨慎信息”中，不能写进当前剧情事实。
+- 多个视频字幕会以“视频 1: 文件名”“视频 2: 文件名”等标题分隔。时间戳均为对应视频内部时间，不是拼接后的累计时间。

-# 输入格式
-字幕内容通常包含时间戳和对话，例如：
-```
-00:00:05,000 --> 00:00:10,000
-[角色A]: 你好吗？
-00:00:10,500 --> 00:00:15,000
-[角色B]: 我很好，谢谢。发生了一些有趣的事情。
-... (更多字幕内容) ...
-```
-我将把实际字幕粘贴在下方。
+# 核心任务
+请基于输入完成剧情理解，目标是帮助后续生成高质量短剧解说脚本：
+1. 识别短剧名称、当前字幕范围、视频来源、联网检索辅助信息和字幕事实边界。
+2. 统一人物称呼，避免同一人物出现多个名字写法。
+3. 用 100-180 字概括当前字幕覆盖的剧情，不提前剧透字幕未出现的内容。
+4. 按视频来源和字幕时间顺序拆分关键剧情段落，并为每段标注准确 video_id / video_name / 时间戳。
+5. 提炼解说创作可用的钩子、冲突、爽点/泪点/悬念点和建议保留原声片段。

-# 输出格式要求
-请按照以下格式清晰地呈现分析结果：
+# 强制输出规则
+1. 禁止输出寒暄、解释身份或“好的，我将……”等聊天式开场。
+2. 禁止编造字幕中没有的具体事件、对白、关系进展或结局。
+3. 时间戳必须直接来自对应视频字幕；无法确定时写“字幕未明确”，不要猜测。
+4. 多视频场景下必须明确每段来自哪个视频文件，禁止把不同视频的同名时间戳混在一起。
+5. 人名必须统一：优先采用联网检索中的正式名称；如果字幕写法不同，在人物表中保留“字幕称呼”。
+6. 内容要简洁、客观、可复用，避免散文化长段落。
+7. 必须严格按照下面的 Markdown 格式输出，不要添加额外章节。

-**一、整体剧情概括：**
-[此处填写对整个短剧剧情的概括]
+# 输出格式
+## 一、基础识别
+- 短剧名称：[如输入可判断则填写，否则写“未知”]
+- 当前字幕范围：[开始时间戳] --> [结束时间戳]；无法确定则写“字幕未明确”
+- 视频来源：[列出视频编号、文件名和各自字幕时间范围；单视频也要写]
+- 联网检索确认：[仅写可辅助理解的公开信息；没有联网结果则写“未启用/未提供”]
+- 字幕内实际出现：[列出当前字幕真实出现的关键事实，2-4 条]
+- 字幕未覆盖/需谨慎信息：[列出联网结果提到但当前字幕未发生的内容；没有则写“无”]

-**二、分段剧情解析：**
+## 二、人物与关系
+| 统一称呼 | 字幕称呼 | 身份/关系 | 当前剧情作用 | 确定性 |
+|---|---|---|---|---|
+| [人物名] | [字幕原文称呼] | [身份或关系] | [在当前片段中的作用] | 字幕明确/联网辅助/合理推断 |

-**剧情段落 1：[段落主题/概括，例如：主角登场与背景介绍]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 三、整体剧情概括
+[100-180 字，只概括当前字幕覆盖的剧情。必须包含核心冲突、人物动机和当前悬念。]

-**剧情段落 2：[段落主题/概括，例如：第一个冲突出现]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 四、分段剧情解析
+| 视频 | 时间戳 | 段落主题 | 剧情事件 | 情绪/冲突功能 |
+|---|---|---|---|---|
+| [video_id + video_name] | [开始] --> [结束] | [简短主题] | [当前段落发生了什么] | [铺垫/冲突升级/人物塑造/反转/悬念/情绪爆发等] |

-... (根据实际剧情段落数量继续) ...
+## 五、解说创作重点
+- 开场钩子：[用一句话指出最适合开场抓人的冲突或疑问]
+- 核心冲突：[当前片段最主要的矛盾]
+- 爽点/泪点/情绪点：[列 1-3 条，没有则写“无明显”]
+- 悬念点：[当前片段留下的疑问或后续期待]
+- 建议保留原声片段：
+  1. [video_id + video_name + 时间戳]：[保留理由；如果没有合适原声，写“无明显”]

-**剧情段落 N：[段落主题/概括，例如：结局与反思]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 六、联网信息校验
+- 可用于辅助理解的信息：[联网结果中可帮助理解当前字幕的信息；没有则写“无”]
+- 与字幕不一致或字幕未覆盖的信息：[必须列出，不要混入当前剧情事实；没有则写“无”]

-# 注意事项
-*   请确保时间戳的准确性，直接引用字幕中的时间。
-*   剧情段落的划分应合乎逻辑，能够反映剧情的起承转合。
-*   语言表达应简洁、准确、客观。
-
-# 限制
-1. 严禁输出与分析结果无关的内容
-2. 时间戳必须严格按照字幕中的实际时间
-
-# 请处理以下字幕：
+# 输入内容
 ${subtitle_content}"""
--- a/app/services/prompts/short_drama_narration/script_generation.py
+++ b/app/services/prompts/short_drama_narration/script_generation.py
@ -19,222 +19,112 @@ class ScriptGenerationPrompt(ParameterizedPrompt):
        metadata = PromptMetadata(
            name="script_generation",
            category="short_drama_narration",
-            version="v2.0",
-            description="基于短剧解说创作核心要素，生成高质量解说脚本，包含黄金开场、爽点放大、个性吐槽等专业技巧",
+            version="v2.1",
+            description="基于已规划片段生成高质量短剧解说脚本，重点补足剧情承接、因果解释和观众理解路径",
            model_type=ModelType.TEXT,
            output_format=OutputFormat.JSON,
            tags=["短剧", "解说脚本", "文案生成", "原声片段", "黄金开场", "爽点放大", "个性吐槽", "悬念预埋"],
-            parameters=["drama_name", "plot_analysis", "subtitle_content"]
+            parameters=[
+                "drama_name",
+                "drama_genre",
+                "plot_analysis",
+                "subtitle_content",
+                "segment_plan",
+                "narration_language",
+            ]
        )
-        super().__init__(metadata, required_parameters=["drama_name", "plot_analysis"])
+        super().__init__(metadata, required_parameters=["drama_name", "plot_analysis", "segment_plan"])
        
-        self._system_prompt = "你是一位顶级的短剧解说up主，精通短视频创作的所有核心技巧。你必须严格按照JSON格式输出，绝不能包含任何其他文字、说明或代码块标记。"
+        self._system_prompt = (
+            "你是一位短剧解说文案写手。你必须严格按照JSON格式输出，"
+            "只能补充picture和narration，不能改动上游片段规划中的_id、video_id、video_name、timestamp和OST。"
+        )
        
    def get_template(self) -> str:
-        return """# 短剧解说脚本创作任务
+        return """# 短剧解说脚本文案生成任务

 ## 任务目标
-我是一位专业的短剧解说up主，需要为短剧《${drama_name}》创作一份高质量的解说脚本。目标是让观众在短时间内了解剧情精华，并产生强烈的继续观看欲望。
+为短剧《${drama_name}》生成最终可剪辑解说脚本。片段已经由上游规划完成，你只能补充 picture 和 narration，不能改变片段来源和时间戳。

-## 素材信息
+## 输入材料

 ### 剧情概述
 <plot>
 ${plot_analysis}
 </plot>

-### 原始字幕（含精确时间戳）
+### 已规划片段（必须逐项照抄结构字段）
+<segment_plan>
+${segment_plan}
+</segment_plan>
+
+### 原始字幕（含视频编号和精确时间戳）
 <subtitles>
 ${subtitle_content}
 </subtitles>

-## 短剧解说创作核心要素
+### 解说台词语言
+<narration_language>
+${narration_language}
+</narration_language>

-### 1. 黄金开场（3秒法则）
-**开头3秒内必须制造强烈钩子，激发"想知道后续发展"的强烈好奇心**
- **悬念设置**：直接抛出最核心的冲突或疑问
-  * 示例："身为一个名声恶臭的政客，他知道自己早晚会被暗杀"
-  * 技巧：直接定性角色身份和处境，制造紧张感
- **冲突展示**：展现最激烈的对立关系
-  * 示例："而这一天，就在他刚露头的时候..."
-  * 技巧：用时间节点强调关键时刻的到来
- **情感共鸣**：触及观众内心的普遍情感
- **反转预告**：暗示即将发生的惊人转折
-  * 技巧：使用"没想到"、"原来"、"竟然"等词汇预告反转
+### 用户选择的短剧类型
+<drama_genre>
+${drama_genre}
+</drama_genre>

-### 2. 主线提炼（去繁就简）
-**快节奏解说，速度超越原剧，专注核心主线**
- 舍弃次要情节和配角，只保留推动主线的关键人物
- 突出核心矛盾冲突，每个片段都要推进主要故事线
- 快速跳过铺垫，直击剧情要害
- 确保每个解说片段都有明确的剧情推进作用
- **转折技巧**：大量使用"而这时"、"就在这时"、"没多久"等时间转折词
+字幕可能来自多个视频文件。每个字幕分段标题会以“视频 1: 文件名”“视频 2: 文件名”等形式标识来源。
+生成脚本时必须把每个片段绑定到对应视频来源，时间戳表示该视频文件内部的局部时间，不是把多个视频拼接后的全局时间。
+所有 OST=0 的 narration 字段必须使用上方指定的解说台词语言输出；不要因为原始字幕是其他语言就切回字幕原语言。
+OST=1 的原声片段 narration 字段必须继续使用“播放原片+序号”格式，不要翻译这个固定标记。

-### 3. 爽点放大（情绪引爆）
-**精准识别剧中"爽点"并用富有感染力的语言放大**
- **主角逆袭**：突出弱者变强、反败为胜的瞬间
- **反派被打脸**：强调恶人得到报应的痛快感
- **智商在线**：赞美角色的机智和策略
-  * 示例："豺狼已经提前数日跟踪这名清洁工，并在他身上放了窃听器"
-  * 技巧：展现角色的深谋远虑和专业能力
- **情感爆发**：放大感人、愤怒、震撼等强烈情绪
- 使用激昂语气和富有感染力的词汇调动观众情绪
+## 绝对绑定规则
+1. 输出 items 数量、顺序和 _id 必须与 segment_plan 完全一致。
+2. 每个 item 的 _id、video_id、video_name、timestamp、OST 必须逐字复制 segment_plan，不得新增、删除、合并、拆分或改动。
+3. 你只能补充 picture 和 narration 两个字段。
+4. OST=1 的 narration 必须写成“播放原片+_id”，例如 _id 为 5 时写“播放原片5”。
+5. OST=0 的 narration 必须使用 ${narration_language}，并严格基于剧情和字幕，不虚构字幕外的具体事件。

-### 4. 个性吐槽（增加趣味）
-**以观众视角进行犀利点评，体现解说员独特人设**
- 避免单纯复述剧情，要有自己的观点和态度
- **"上帝视角"分析技巧**：
-  * 揭示角色内心："他莫名地笑了一下"
-  * 分析动机："豺狼的这几步都是事先算好的"
-  * 预判后果："这又会有何代价呢"
- 适当吐槽剧情的套路或角色的愚蠢行为
- 用幽默、犀利的语言增加观看趣味
- 站在观众立场，说出观众想说的话
- **心理活动描述**：深入角色内心，增强代入感
+## 叙事连续性要求
+- 你必须把每个 OST=0 当成“观众理解剧情的桥”，不能只概括当前画面。
+- 每个 OST=0 narration 要尽量回答：上一段发生了什么、为什么会发展到这一段、这一段带来什么新矛盾。
+- 跨 video_id 或跨时间大跳跃时，OST=0 必须明确补出承接句，例如“可这段婚姻真正难的不是相爱，而是两个孩子和婆婆都还没接纳她”。
+- 原声片段前后的 OST=0 要解释原声的重要性，避免观众只看到对白片段合集。
+- 如果 segment_plan 中有 story_role、intent、transition 字段，必须利用它们组织 narration，但不要把这些字段输出到最终 JSON。
+- 结尾 OST=0 要留下后续阻力或悬念；如果结尾是 OST=1，则前一个 OST=0 必须提前点出这段原声会把矛盾推向哪里。

-### 5. 悬念预埋（引导互动）
-**在关键节点和结尾处"卖关子"，激发互动欲望**
- 在剧情高潮前停止，留下"接下来会发生什么"的疑问
- **悬念设置技巧**：
-  * 问题抛出："那么，UDC究竟是谁呢？"
-  * 反转预告："而从这句话开始，所有的专业、体面和虚伪的平静都将分崩瓦解"
-  * 时间悬念："几分钟后..."、"不久之后..."
- 提出引导性问题："你们觉得他会怎么做？"
- 预告后续精彩："更劲爆的还在后面"
- 为后续内容预热，激发评论、点赞、关注
+## 开头钩子要求
+- 第一段必须是 OST=0 解说钩子，不能直接播放原片。
+- 开头用“高能反转 + 情绪冲突 + 悬念钩子”：强身份/强处境 + 致命反差 + 后续悬念。
+- 写法示例方向：一个刚立功的兵王，下一秒却被迫脱下军装；他回家的第一天，家里的钱和尊严都被赌桌吞了。
+- 示例只用于理解公式，必须基于当前字幕事实原创，不要夸大到字幕没有的情节。

-### 6. 卡点配合（视听协调）
-**考虑文案与画面、音乐的完美结合**
- 在情感高潮处预设BGM卡点
- 解说节奏要配合画面节奏
- 重要台词处保留原声，解说适时停顿
- 追求文案+画面+音乐的协同效应
+## 解说密度与画面节奏
+- OST=0 文案必须能被当前 timestamp 的画面承载，按“解说字数 / 5 = 所需视频秒数”估算。
+- 如果画面只有 6 秒，就不要写 80 字；应压缩到约 30 字，或依赖 segment_plan 选择更长画面。
+- 优先短句，单句只表达一个信息点；不要把人物介绍、前因、反转和悬念全塞进一个短画面。
+- 长信息要拆成多段，每段只承担一个叙事功能，让画面节奏跟上解说。

-## 专业解说语言技巧
+## 用户选择类型文案规则
+短剧类型由用户手动选择为 ${drama_genre}，不得自行改判。必须按对应方向写：
+- 霸总/甜宠：突出误会、身份差、暧昧拉扯、守护感和情绪反差。
+- 逆袭/复仇：突出羞辱、反击、打脸、身份揭露和爽点升级。
+- 家庭伦理：突出亲情撕扯、秘密、委屈、选择和道德冲突。
+- 古装/权谋：突出身份、局势、算计、立场和反转。
+- 悬疑/犯罪：突出线索、危机、动机和未揭开的疑问。
+- 都市情感：突出关系裂痕、现实压力、误会和情绪拉扯。
+- 年代/乡村：突出家庭处境、人情压力、生活困境和命运转折。
+- 自定义类型：严格服从用户填写的类型方向。

-### 1. 氛围营造技巧
-**通过环境和细节描述增强画面感和代入感**
- **环境描述**："在这个距离，枪声都无法传到那边"
- **细节刻画**："他的床头有酒，身边的纸碟堆满烟头"
- **氛围渲染**："黑暗树林里有一间仓房"
- **情绪描述**："孤独又无助的豺狼，竟在这时露出了反常的一面"
+## 文案质量要求
+- 开场片段要有强钩子，直接点出冲突、悬念或情绪爆点。
+- 每段解说优先 25-90 字，具体长度必须服从画面时长；短画面宁可少说，不要密集灌信息。
+- 可以使用“没想到”“可下一秒”“而这时”“真正的问题来了”等短剧转折语，但不要堆砌。
+- picture 要描述画面和人物状态，便于后期识别素材。
+- 少用孤立信息句，多用承接句；不要让观众感觉剧情突然跳场。
+- 不要解释规则，不要输出 Markdown，不要输出代码块。

-### 2. 情感词汇运用
-**使用富有感染力的词汇调动观众情绪**
- **紧张感**："名声恶臭"、"早晚会被暗杀"、"动用军警资源"
- **神秘感**："尘封的传奇"、"高度机密"、"暗藏玄机"
- **震撼感**："空前绝后的一枪"、"天衣无缝"、"神不知鬼不觉"
- **悲伤感**："目光非常悲伤"、"注定永远无法哀悼"
-
-### 3. 节奏控制技巧
-**通过语言节奏控制观众注意力**
- **快节奏推进**：使用短句，密集信息
- **慢节奏渲染**：使用长句，详细描述
- **停顿技巧**：在关键信息前适当停顿
- **重复强调**：重要信息适当重复
-
-## 严格技术要求
-
-### 时间戳管理（绝对不能违反）
- **时间戳绝对不能重叠**，确保剪辑后无重复画面
- **时间段必须连续且不交叉**，严格按时间顺序排列
- **每个时间戳都必须在原始字幕中找到对应范围**
- 可以拆分原时间片段，但必须保持时间连续性
- 时间戳的格式必须与原始字幕中的格式完全一致
-
-### 时长控制（1/3原则）
- **解说视频总长度 = 原视频长度的 1/3**
- 精确控制节奏和密度，既不能过短也不能过长
- 合理分配解说和原声的时间比例
-
-### 剧情连贯性
- **保持故事逻辑完整**，确保情节发展自然流畅
- **严格按照时间顺序**，禁止跳跃式叙述
- **符合因果逻辑**：先发生A，再发生B，A导致B
-
-## 原声片段使用规范
-
-### 原声片段格式要求
-原声片段必须严格按照以下JSON格式：
-```json
-{
-  "_id": 序号,
-  "timestamp": "开始时间-结束时间",
-  "picture": "画面内容描述",
-  "narration": "播放原片+序号",
-  "OST": 1
-}
-```
-
-### 原声片段插入策略
-
-#### 1. 关键情绪爆发点
-**在角色强烈情绪表达时必须保留原声，增强观众代入感**
- **愤怒爆发**：角色愤怒咆哮、情绪失控的瞬间
-  * 参考："Come on, you bastard. Reaching."（愤怒对峙）
- **感动落泪**：角色感动哭泣、情感宣泄的时刻
- **震惊反应**：角色震惊、不敢置信的表情和台词
-  * 参考："Are you sure about that?"（质疑震惊）
- **绝望崩溃**：角色绝望、崩溃的情感表达
-  * 参考："Charles you're scaring me, what's wrong"（恐惧绝望）
- **狂欢庆祝**：角色兴奋、狂欢的情绪高潮
-
-#### 2. 重要对白时刻
-**保留推动剧情发展的关键台词和对话**
- **身份揭露**：揭示角色真实身份的重要台词
- **真相大白**：揭晓谜底、真相的关键对话
- **情感告白**：爱情告白、情感表达的重要台词
-  * 参考："i'm really not good"（情感表达）
- **威胁警告**：反派威胁、警告的重要对白
-  * 参考："You do not want to make enemies of these people"（威胁警告）
- **决定宣布**：角色做出重要决定的宣告
-
-#### 3. 爽点瞬间
-**在"爽点"时刻保留原声增强痛快感**
- **主角逆袭**：弱者反击、逆转局面的台词
- **反派被打脸**：恶人得到报应、被揭穿的瞬间
- **智商碾压**：主角展现智慧、碾压对手的台词
-  * 参考："That is a fucking work of art guys"（技能展示）
- **正义伸张**：正义得到伸张、恶有恶报的时刻
- **实力展现**：主角展现真实实力、震撼全场
-
-#### 4. 悬念节点
-**在制造悬念或揭晓答案的关键时刻保留原声**
- **悬念制造**：制造悬念、留下疑问的台词
- **答案揭晓**：揭晓答案、解开谜团的对话
- **转折预告**：暗示即将发生转折的重要台词
- **危机降临**：危机来临、紧张时刻的对白
-
-#### 5. 经典台词时刻
-**保留具有强烈感染力和记忆点的经典台词**
- **哲理感悟**：角色的人生感悟和哲理思考
- **幽默调侃**：轻松幽默的对话增加趣味性
- **专业术语**：体现角色专业性的术语和对话
-  * 参考："The scanner will pick up the metal components"（专业解释）
- **情感共鸣**：能引起观众共鸣的经典表达
-
-### 原声片段技术规范
-
-#### 格式规范
- **OST字段**：设置为1表示保留原声（解说片段设置为0）
- **narration格式**：严格使用"播放原片+序号"（如"播放原片26"）
- **picture字段**：详细描述画面内容，便于后期剪辑参考
- **时间戳精度**：必须与字幕中的重要对白时间精确匹配
-
-#### 比例控制
- **原声与解说比例**：7:3（原声70%，解说30%）
- **分布均匀**：原声片段要在整个视频中均匀分布
- **长度适中**：单个原声片段时长控制在3-8秒
- **衔接自然**：原声片段与解说片段之间衔接自然流畅
-
-#### 选择原则
- **情感优先**：优先选择情感强烈的台词和对话
- **剧情关键**：必须是推动剧情发展的重要内容
- **观众共鸣**：选择能引起观众共鸣的经典台词
- **视听效果**：考虑台词的声音效果和表演张力
- **代入感强**：选择能让观众产生强烈代入感的对话
-
-## 输出格式要求
+## 输出格式

 请严格按照以下JSON格式输出，绝不添加任何其他文字、说明或代码块标记：

@ -242,6 +132,8 @@ ${subtitle_content}
  "items": [
    {
        "_id": 1,
+        "video_id": 1,
+        "video_name": "1.mp4",
        "timestamp": "00:00:01,000-00:00:05,500",
        "picture": "女主角林小雨慌张地道歉，男主角沈墨轩冷漠地看着她",
        "narration": "一个普通女孩的命运即将因为一杯咖啡彻底改变！她撞到的这个男人，竟然是...",
@ -249,6 +141,8 @@ ${subtitle_content}
    },
    {
        "_id": 2,
+        "video_id": 1,
+        "video_name": "1.mp4",
        "timestamp": "00:00:05,500-00:00:08,000",
        "picture": "沈墨轩质问林小雨，语气冷厉威严",
        "narration": "播放原片2",
@ -256,6 +150,8 @@ ${subtitle_content}
    },
    {
        "_id": 3,
+        "video_id": 2,
+        "video_name": "2.mp4",
        "timestamp": "00:00:08,000-00:00:12,000",
        "picture": "林小雨惊慌失措，沈墨轩眼中闪过一丝兴趣",
        "narration": "霸道总裁的经典开场！一杯咖啡引发的爱情故事就这样开始了...",
@ -264,44 +160,4 @@ ${subtitle_content}
  ]
 }

-## 质量标准
-
-### 解说文案要求：
- **字数控制**：每段解说文案80-150字
- **语言风格**：生动有趣，富有感染力，符合短视频观众喜好
-  * 参考风格："身为一个名声恶臭的政客，他知道自己早晚会被暗杀"
-  * 直接定性，制造紧张感和代入感
- **情感调动**：能够有效调动观众情绪，产生代入感
-  * 使用"而这时"、"没想到"、"原来"等转折词增强戏剧性
- **节奏把控**：快节奏但不失条理，紧凑但不混乱
-  * 短句推进剧情，长句渲染氛围
-
-### 技术规范：
- **解说与原片比例**：3:7（解说30%，原片70%）
- **原声片段标识**：OST=1表示原声，OST=0表示解说
- **原声格式规范**：narration字段必须使用"播放原片+序号"格式
- **关键情绪点**：必须保留原片原声，增强观众代入感
- **时间戳精度**：精确到毫秒级别，确保与字幕完美匹配
- **逻辑连贯性**：严格遵循剧情发展顺序
-
-### 创作原则：
-1. **只输出JSON内容**，不要任何说明性文字
-2. **严格基于提供的剧情和字幕**，不虚构内容
-3. **突出核心冲突**，舍弃无关细节
-4. **强化观众体验**，始终考虑观看感受
-5. **保持专业水准**，体现解说up主的专业素养
-6. **融入经典解说技巧**：
-   - 大量使用"上帝视角"分析
-   - 适时插入心理活动描述
-   - 运用悬念设置和反转技巧
-   - 保持强烈的画面感和代入感
-
-### 参考解说风格示例：
- **开场悬念**："身为一个名声恶臭的政客，他知道自己早晚会被暗杀"
- **转折技巧**："而这一天，就在他刚露头的时候..."
- **上帝视角**："豺狼已经提前数日跟踪这名清洁工"
- **情感渲染**："孤独又无助的豺狼，竟在这时露出了反常的一面"
- **悬念设置**："那么，UDC究竟是谁呢？"
- **反转预告**："而从这句话开始，所有的专业、体面和虚伪的平静都将分崩瓦解"
-
 现在请基于以上要求，为短剧《${drama_name}》创作解说脚本："""
--- a/app/services/prompts/short_drama_narration/script_matching.py
+++ b/app/services/prompts/short_drama_narration/script_matching.py
@ -0,0 +1,131 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 短剧解说-文案画面匹配
+@File   : script_matching.py
+@Description: 将用户审核后的解说文案匹配到字幕时间戳并生成最终剪辑脚本
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class ScriptMatchingPrompt(ParameterizedPrompt):
+    """短剧解说文案画面匹配提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="script_matching",
+            category="short_drama_narration",
+            version="v1.0",
+            description="将审核后的解说文案按叙事节奏拆分，并匹配到字幕时间戳生成最终剪辑JSON",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.JSON,
+            tags=["短剧", "画面匹配", "剪辑脚本", "时间戳", "用户文案"],
+            parameters=[
+                "drama_name",
+                "drama_genre",
+                "plot_analysis",
+                "subtitle_content",
+                "narration_copy",
+                "narration_language",
+                "original_sound_ratio",
+            ],
+        )
+        super().__init__(
+            metadata,
+            required_parameters=["drama_name", "subtitle_content", "narration_copy"],
+        )
+
+        self._system_prompt = (
+            "你是一位懂叙事节奏的短剧剪辑师。你必须严格输出JSON，"
+            "核心任务是把用户审核后的解说文案逐句匹配到最合适的原视频字幕时间戳。"
+        )
+
+    def get_template(self) -> str:
+        return """# 短剧解说文案画面匹配任务
+
+## 目标
+用户已经审核并修改了解说文案。请根据这份文案和原始字幕，生成最终可剪辑 JSON 脚本。
+
+## 剧名
+${drama_name}
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 用户审核后的解说文案
+<narration_copy>
+${narration_copy}
+</narration_copy>
+
+## 原始字幕（含视频编号和局部时间戳）
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 输出语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的短剧类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 用户选择的原片占比
+<original_sound_ratio>
+${original_sound_ratio}%
+</original_sound_ratio>
+
+## 匹配流程
+1. 先按句号、问号、感叹号、省略号切分解说文案，得到候选解说句。
+2. 逗号只在明显分割两个动作、场景、观点或描述对象时切分；不要切出没有独立意义的碎片。
+3. 不要求每个候选句都单独输出为 OST=0；可以合并、压缩相邻候选句作为剧情桥段，但不能改变用户文案的核心意思。
+4. 为每个解说片段寻找最匹配的原始字幕画面，优先选择能表达该句核心含义的画面。
+5. 使用公式估算所需画面时长：所需秒数 = 解说字数 / 5。匹配画面时长尽量接近，误差优先控制在 ±0.5 秒。
+6. 如果一句解说太长，必须拆成多个 OST=0 片段，分别匹配不同或连续画面。
+7. timestamp 必须使用对应 video_id 内部局部时间戳，不得换算为多个视频拼接后的累计时间。
+8. 同一 video_id 内时间段不得交叉或重叠。
+9. 第一段必须是 OST=0 解说钩子，不能直接播放原片。
+10. OST=1 原声片段的总时长占比要尽量接近用户选择的 ${original_sound_ratio}%。这里按最终 items 的 timestamp 总时长估算，不按片段数量估算。
+11. 不要自行判断或改写短剧类型；画面匹配和 picture 描述要服务用户选择的 ${drama_genre} 叙事重点。
+
+## 原片占比规则
+- ${original_sound_ratio}% = 0% 时，不要输出 OST=1，全部使用解说承接。
+- ${original_sound_ratio}% 在 10%-30% 时，只保留关键对白、反转、情绪爆发或爽点原声。
+- ${original_sound_ratio}% 在 40%-60% 时，解说负责串联因果，原片负责承载关键场面和对白。
+- ${original_sound_ratio}% 在 70%-90% 时，以原片对白和表演为主，解说只做开场钩子、转场桥和必要补充。
+- 如果原片占比与“第一段必须 OST=0”冲突，优先保证第一段是 OST=0，然后在后续片段提高 OST=1 时长占比。
+- 选择高原片占比时，可以把用户文案合并成更少的 OST=0 桥段，不要为了逐句使用文案而压低原片占比。
+
+## 字段规则
+- _id：从 1 开始连续递增。
+- video_id：来自字幕分段标题，例如“视频 2”就填 2。
+- video_name：对应视频文件名，必须从字幕分段标题提取。
+- timestamp：格式为 "HH:MM:SS,mmm-HH:MM:SS,mmm"。
+- picture：描述匹配画面中人物、动作、情绪和场景。
+- narration：OST=0 时填写用户文案片段；OST=1 时填写“播放原片+_id”。
+- OST：解说片段填 0，原声片段填 1。
+
+## 输出格式
+只输出严格 JSON：
+
+{
+  "items": [
+    {
+      "_id": 1,
+      "video_id": 1,
+      "video_name": "1.mp4",
+      "timestamp": "00:00:01,000-00:00:06,000",
+      "picture": "主角站在门口，震惊地看着屋内混乱的场面",
+      "narration": "一个刚立功的兵王，回家的第一天就发现家里四百万被亲爹输光。",
+      "OST": 0
+    }
+  ]
+}
+
+现在请基于用户审核后的解说文案生成最终剪辑脚本。"""
--- a/app/services/prompts/short_drama_narration/script_repair.py
+++ b/app/services/prompts/short_drama_narration/script_repair.py
@ -0,0 +1,96 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 短剧解说-脚本修复
+@File   : script_repair.py
+@Description: 短剧解说脚本校验失败后的JSON修复提示词
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class ScriptRepairPrompt(ParameterizedPrompt):
+    """短剧解说脚本修复提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="script_repair",
+            category="short_drama_narration",
+            version="v1.0",
+            description="根据确定性校验错误修复短剧解说脚本JSON，优先修正时间戳、视频来源和格式问题",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.JSON,
+            tags=["短剧", "解说脚本", "JSON修复", "时间戳校验", "多视频"],
+            parameters=[
+                "drama_name",
+                "drama_genre",
+                "plot_analysis",
+                "subtitle_content",
+                "invalid_script",
+                "validation_errors",
+                "narration_language",
+            ],
+        )
+        super().__init__(
+            metadata,
+            required_parameters=["drama_name", "subtitle_content", "invalid_script", "validation_errors"],
+        )
+
+        self._system_prompt = (
+            "你是一位短剧解说脚本JSON修复器。你只能根据校验错误修复JSON，"
+            "必须输出严格JSON，不能输出解释、Markdown或代码块。"
+        )
+
+    def get_template(self) -> str:
+        return """# 短剧解说脚本修复任务
+
+## 修复目标
+下面的短剧《${drama_name}》解说脚本未通过剪辑校验。请只根据校验错误和字幕内容修复它，输出一个完整可剪辑的 JSON。
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 校验错误
+<validation_errors>
+${validation_errors}
+</validation_errors>
+
+## 当前无效脚本
+<invalid_script>
+${invalid_script}
+</invalid_script>
+
+## 可用字幕窗口
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 解说台词目标语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的短剧类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 修复规则
+1. 只输出 JSON，不要任何解释、标题、Markdown 或代码块。
+2. 输出根对象必须是 {"items": [...]}。
+3. 每个 item 必须包含 _id、video_id、video_name、timestamp、picture、narration、OST。
+4. video_id、video_name 和 timestamp 必须来自对应字幕窗口；不得把不同视频的同名时间戳混用。
+5. 同一 video_id 内片段不得交叉或重叠。
+6. OST=1 的 narration 必须是“播放原片+序号”；OST=0 的 narration 必须使用 ${narration_language}。
+7. 禁止连续 3 个或更多 OST=1；必须插入或改写 OST=0 解说片段承接剧情。
+8. 跨 video_id 切换前后不能都是 OST=1；必须至少有一个 OST=0 片段解释场景和剧情为什么切换。
+9. OST=0 narration 要补足因果承接，不要只概括当前画面。
+10. 第一段必须是 OST=0 解说钩子，按“高能反转 + 情绪冲突 + 悬念钩子”写，不要直接播放原片。
+11. OST=0 文案必须匹配画面时长，按“解说字数 / 5 = 所需视频秒数”估算；过密时要缩短文案、延长时间戳或拆成多个片段。
+12. 不要自行改判短剧类型；如需改写 narration，必须按用户选择的 ${drama_genre} 保持表达重点。
+13. 尽量保留原脚本中没有错误的片段；无法修复的片段可以删除，但剩余片段必须重新按 1 开始编号。
+
+请输出修复后的完整 JSON。"""
--- a/app/services/prompts/short_drama_narration/segment_planning.py
+++ b/app/services/prompts/short_drama_narration/segment_planning.py
@ -0,0 +1,104 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""
+@Project: 短剧解说-片段规划
+@File   : segment_planning.py
+@Description: 短剧解说脚本片段规划提示词
+"""
+
+from ..base import ParameterizedPrompt, PromptMetadata, ModelType, OutputFormat
+
+
+class SegmentPlanningPrompt(ParameterizedPrompt):
+    """短剧解说片段规划提示词"""
+
+    def __init__(self):
+        metadata = PromptMetadata(
+            name="segment_planning",
+            category="short_drama_narration",
+            version="v1.1",
+            description="基于剧情理解和原始字幕规划可剪辑片段，优先保证叙事连续性、跨视频承接和原声解说节奏",
+            model_type=ModelType.TEXT,
+            output_format=OutputFormat.JSON,
+            tags=["短剧", "解说脚本", "片段规划", "时间戳", "多视频", "原声"],
+            parameters=["drama_name", "drama_genre", "plot_analysis", "subtitle_content", "narration_language"],
+        )
+        super().__init__(metadata, required_parameters=["drama_name", "plot_analysis", "subtitle_content"])
+
+        self._system_prompt = (
+            "你是一位短剧解说剪辑规划师。你的任务是从字幕中选择可剪辑片段，"
+            "必须严格输出JSON，不能写解说文案，不能输出Markdown或额外说明。"
+        )
+
+    def get_template(self) -> str:
+        return """# 短剧解说片段规划任务
+
+## 目标
+为短剧《${drama_name}》规划一组可直接剪辑的视频片段。你只负责选片段和标注用途，不写最终解说台词。
+
+## 剧情理解材料
+<plot>
+${plot_analysis}
+</plot>
+
+## 原始字幕（含视频编号和局部时间戳）
+<subtitles>
+${subtitle_content}
+</subtitles>
+
+## 解说台词目标语言
+<narration_language>
+${narration_language}
+</narration_language>
+
+## 用户选择的短剧类型
+<drama_genre>
+${drama_genre}
+</drama_genre>
+
+## 叙事规划目标
+你不是在挑精彩片段合集，而是在规划一条观众能顺着看懂的短剧解说故事线。必须先想清楚“人物困境 -> 冲突触发 -> 关系变化 -> 新阻力 -> 悬念”的因果链，再选片段。
+
+## 爆款开头钩子规则
+第一段必须是 OST=0 解说开场，不要直接播放原片。开头参考“高能反转 + 情绪冲突 + 悬念钩子”的公式：
+- 先给人物一个强身份或强处境：兵王、单亲妈妈、被赶出家门的女人、被全家看不起的赘婿。
+- 再给一个反差冲突：刚立功就被迫退役、刚回家就发现钱被输光、刚结婚就遇到孩子/婆婆阻挠。
+- 最后抛出悬念：真正的噩梦才开始、他要讨回的不是钱、这场婚姻真正难的不是相爱。
+- 不要照抄示例，要基于字幕事实改写成当前剧情自己的钩子。
+
+## 规划规则
+1. 只能使用原始字幕中真实存在的视频编号、视频文件名和时间范围。
+2. timestamp 必须是对应 video_id 内部的局部时间戳，禁止换算成多个视频拼接后的累计时间。
+3. 同一个 video_id 内的片段不得交叉或重叠；尽量按故事顺序排列。
+4. 每个片段必须推动主线、制造情绪点、承接原声或保留关键对白。
+5. OST=1 表示保留原声，适合关键对白、情绪爆发、身份揭露、反转和爽点；OST=0 表示后续需要配解说。
+6. 原声片段单段优先控制在 3-8 秒；解说片段可以更长，但必须能从字幕范围中定位。
+7. 短剧类型由用户手动选择为 ${drama_genre}，不得自行改判；选片段时优先服务该类型的主要看点。
+8. 禁止连续 3 个或更多 OST=1；每 1-2 个原声片段后必须安排 OST=0 解说片段承接剧情。
+9. 跨 video_id 切换前后必须至少有一个 OST=0 片段作为剧情桥段，解释为什么从上一场转到下一场。
+10. 每个 OST=0 片段必须承担明确叙事功能：开场钩子、人物介绍、因果过渡、冲突升级、关系转折、阻力解释、结尾悬念。
+11. 不要跳过关键因果：例如从求婚直接跳到孩子/婆婆阻挠，中间必须用 OST=0 解释“婚姻真正的难题变成家庭接纳”。
+12. 结尾优先选择能留下后续阻力或新矛盾的片段，不要只停在原声对白堆叠上。
+13. 解说画面必须给足时长：按“解说字数 / 5 = 所需视频秒数”预估，短画面不要承载长解说。
+14. OST=0 片段如果需要讲清多层信息，应选择更长的连续画面，或拆成多个 OST=0 片段分别承接。
+
+## 输出格式
+只输出严格 JSON：
+
+{
+  "segments": [
+    {
+      "_id": 1,
+      "video_id": 1,
+      "video_name": "1.mp4",
+      "timestamp": "00:00:01,000-00:00:05,500",
+      "OST": 0,
+      "story_role": "开场钩子",
+      "intent": "女主被羞辱，制造逆袭期待",
+      "transition": "从灾后恢复现场切入女主处境，引出她为什么敢和领导硬刚"
+    }
+  ]
+}
+
+现在请规划短剧《${drama_name}》的解说片段。"""
--- a/app/services/script_subtitle.py
+++ b/app/services/script_subtitle.py
@ -0,0 +1,421 @@
+import os
+import re
+import unicodedata
+from typing import Iterable, List, Optional, Sequence, Tuple
+
+from loguru import logger
+
+from app.services.short_drama_narration_validation import build_subtitle_index
+from app.services.subtitle_text import read_subtitle_text
+from app.utils import utils
+
+
+DEFAULT_SUBTITLE_OST_TYPES = (0, 2)
+DEFAULT_ORIGINAL_SUBTITLE_OST_TYPES = (1,)
+DEFAULT_MAX_CHARS_PER_SUBTITLE = 12
+SENTENCE_PART_RE = re.compile(r"[^。！？!?；;，,、\n]+[。！？!?；;，,、]?")
+SubtitleEntry = Tuple[float, float, str]
+
+
+def _normalize_text(text: str) -> str:
+    return re.sub(r"\s+", " ", str(text or "")).strip()
+
+
+def _remove_punctuation(text: str) -> str:
+    return "".join(
+        char for char in str(text or "")
+        if not unicodedata.category(char).startswith("P")
+    )
+
+
+def clean_subtitle_text(text: str) -> str:
+    """Normalize subtitle text for burn-in display."""
+    return _normalize_text(_remove_punctuation(text))
+
+
+def split_narration(text: str, max_chars: int = DEFAULT_MAX_CHARS_PER_SUBTITLE) -> List[str]:
+    """Split narration into readable subtitle chunks."""
+    text = _normalize_text(text)
+    if not text:
+        return []
+
+    max_chars = max(1, int(max_chars or DEFAULT_MAX_CHARS_PER_SUBTITLE))
+    parts = [match.group(0).strip() for match in SENTENCE_PART_RE.finditer(text)]
+    if not parts:
+        parts = [text]
+
+    chunks = []
+    current = ""
+
+    def flush_long_part(part: str) -> str:
+        while len(part) > max_chars:
+            chunks.append(part[:max_chars].strip())
+            part = part[max_chars:].strip()
+        return part
+
+    for part in parts:
+        if not part:
+            continue
+
+        if len(part) > max_chars:
+            if current:
+                chunks.append(current.strip())
+                current = ""
+            current = flush_long_part(part)
+            continue
+
+        candidate = f"{current}{part}" if current else part
+        if len(candidate) <= max_chars:
+            current = candidate
+        else:
+            if current:
+                chunks.append(current.strip())
+            current = part
+
+    if current:
+        chunks.append(current.strip())
+
+    return [cleaned for chunk in chunks if (cleaned := clean_subtitle_text(chunk))]
+
+
+def parse_srt_like_time(time_text: str) -> float:
+    time_text = str(time_text or "").strip().replace(",", ".")
+    parts = time_text.split(":")
+    if len(parts) != 3:
+        raise ValueError(f"不支持的时间格式: {time_text}")
+
+    hours = int(parts[0])
+    minutes = int(parts[1])
+    seconds = float(parts[2])
+    return hours * 3600 + minutes * 60 + seconds
+
+
+def parse_time_range(time_range: str) -> Tuple[float, float]:
+    if not time_range or "-" not in str(time_range):
+        raise ValueError(f"不支持的时间范围: {time_range}")
+
+    start_text, end_text = str(time_range).split("-", 1)
+    start = parse_srt_like_time(start_text)
+    end = parse_srt_like_time(end_text)
+    if end <= start:
+        raise ValueError(f"结束时间必须晚于开始时间: {time_range}")
+
+    return start, end
+
+
+def format_srt_time(seconds: float) -> str:
+    milliseconds_total = max(0, int(round(float(seconds) * 1000)))
+    milliseconds = milliseconds_total % 1000
+    total_seconds = milliseconds_total // 1000
+    hours = total_seconds // 3600
+    minutes = (total_seconds % 3600) // 60
+    secs = total_seconds % 60
+    return f"{hours:02d}:{minutes:02d}:{secs:02d},{milliseconds:03d}"
+
+
+def _safe_ost_value(value) -> Optional[int]:
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return None
+
+
+def _coerce_positive_int(value) -> Optional[int]:
+    try:
+        number = int(value)
+    except (TypeError, ValueError):
+        return None
+    return number if number > 0 else None
+
+
+def _normalize_paths(paths) -> List[str]:
+    if isinstance(paths, str):
+        paths = [paths]
+    if not paths:
+        return []
+
+    normalized_paths = []
+    seen = set()
+    for item in paths:
+        if not isinstance(item, str):
+            continue
+        item = item.strip()
+        if not item or item in seen:
+            continue
+        normalized_paths.append(item)
+        seen.add(item)
+    return normalized_paths
+
+
+def _resolve_script_video_id(item: dict, video_origin_paths: Sequence[str]) -> int:
+    video_id = _coerce_positive_int(item.get("video_id") or item.get("video_index"))
+    if video_id is not None:
+        return video_id
+
+    video_name = os.path.basename(
+        str(item.get("video_name") or item.get("source_video") or "").strip()
+    )
+    if video_name:
+        for index, video_path in enumerate(video_origin_paths, start=1):
+            if os.path.basename(video_path) == video_name:
+                return index
+
+    return 1
+
+
+def _read_subtitle_file(subtitle_path: str) -> str:
+    try:
+        return read_subtitle_text(subtitle_path).text
+    except Exception as e:
+        logger.warning(f"读取原片字幕失败: {subtitle_path}, {e}")
+        return ""
+
+
+def _build_combined_original_subtitle_content(
+    original_subtitle_paths,
+    video_origin_paths=None,
+) -> str:
+    subtitle_paths = _normalize_paths(original_subtitle_paths)
+    video_paths = _normalize_paths(video_origin_paths)
+    sections = []
+
+    for index, subtitle_path in enumerate(subtitle_paths, start=1):
+        if not os.path.exists(subtitle_path):
+            logger.warning(f"原片字幕文件不存在，跳过: {subtitle_path}")
+            continue
+
+        content = _read_subtitle_file(subtitle_path)
+        if not content:
+            logger.warning(f"原片字幕文件为空，跳过: {subtitle_path}")
+            continue
+
+        video_path = video_paths[index - 1] if index <= len(video_paths) else ""
+        if video_path:
+            header = (
+                f"# 视频 {index}: {os.path.basename(video_path)}\n"
+                f"字幕文件: {os.path.basename(subtitle_path)}"
+            )
+        else:
+            header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
+        sections.append(f"{header}\n{content}".strip())
+
+    return "\n\n".join(sections)
+
+
+def _resolve_item_time_range(item: dict, current_time: float) -> Tuple[Optional[Tuple[float, float]], float]:
+    duration = float(item.get("duration", 0.0) or 0.0)
+    if duration > 0:
+        start = current_time
+        end = current_time + duration
+        return (start, end), end
+
+    edited_time_range = item.get("editedTimeRange")
+    if edited_time_range:
+        try:
+            start, end = parse_time_range(edited_time_range)
+            return (start, end), end
+        except ValueError as e:
+            logger.warning(f"解析 editedTimeRange 失败，将尝试使用 duration: {e}")
+
+    return None, current_time
+
+
+def _build_narration_subtitle_entries(
+    list_script: Sequence[dict],
+    include_ost: Iterable[int],
+    max_chars: int,
+) -> List[SubtitleEntry]:
+    include_ost_set = {int(item) for item in include_ost}
+    entries: List[SubtitleEntry] = []
+    current_time = 0.0
+
+    for item in list_script:
+        time_range, current_time = _resolve_item_time_range(item, current_time)
+        if not time_range:
+            continue
+
+        ost = _safe_ost_value(item.get("OST"))
+        if ost not in include_ost_set:
+            continue
+
+        chunks = split_narration(item.get("narration", ""), max_chars=max_chars)
+        if not chunks:
+            continue
+
+        start, end = time_range
+        segment_duration = end - start
+        if segment_duration <= 0:
+            continue
+
+        chunk_duration = segment_duration / len(chunks)
+        for chunk_index, chunk in enumerate(chunks):
+            chunk_start = start + chunk_duration * chunk_index
+            chunk_end = end if chunk_index == len(chunks) - 1 else start + chunk_duration * (chunk_index + 1)
+            entries.append((chunk_start, chunk_end, chunk))
+
+    return entries
+
+
+def _build_original_subtitle_entries(
+    list_script: Sequence[dict],
+    original_subtitle_paths=None,
+    video_origin_paths=None,
+    include_ost: Iterable[int] = DEFAULT_ORIGINAL_SUBTITLE_OST_TYPES,
+) -> List[SubtitleEntry]:
+    original_subtitle_content = _build_combined_original_subtitle_content(
+        original_subtitle_paths,
+        video_origin_paths,
+    )
+    if not original_subtitle_content:
+        return []
+
+    video_paths = _normalize_paths(video_origin_paths)
+    subtitle_index = build_subtitle_index(original_subtitle_content, video_paths)
+    if not subtitle_index:
+        logger.warning("原片字幕索引为空，无法为原声片段生成字幕")
+        return []
+
+    cues_by_video = {}
+    for cue in subtitle_index:
+        cues_by_video.setdefault(cue.video_id, []).append(cue)
+
+    include_ost_set = {int(item) for item in include_ost}
+    entries: List[SubtitleEntry] = []
+    current_time = 0.0
+
+    for item in list_script:
+        time_range, current_time = _resolve_item_time_range(item, current_time)
+        if not time_range:
+            continue
+
+        ost = _safe_ost_value(item.get("OST"))
+        if ost not in include_ost_set:
+            continue
+
+        source_time_range = item.get("sourceTimeRange") or item.get("timestamp")
+        try:
+            source_start, source_end = parse_time_range(source_time_range)
+        except ValueError as e:
+            logger.warning(f"解析原声片段源时间失败，跳过原片字幕: {e}")
+            continue
+
+        target_start, target_end = time_range
+        source_duration = source_end - source_start
+        target_duration = target_end - target_start
+        if source_duration <= 0 or target_duration <= 0:
+            continue
+
+        video_id = _resolve_script_video_id(item, video_paths)
+        video_cues = cues_by_video.get(video_id, [])
+        if not video_cues:
+            logger.warning(f"视频 {video_id} 未找到可用原片字幕，片段 {item.get('_id')} 跳过")
+            continue
+
+        for cue in video_cues:
+            cue_start = cue.start_ms / 1000
+            cue_end = cue.end_ms / 1000
+            overlap_start = max(source_start, cue_start)
+            overlap_end = min(source_end, cue_end)
+            if overlap_end <= overlap_start:
+                continue
+
+            text = clean_subtitle_text(cue.text)
+            if not text:
+                continue
+
+            mapped_start = target_start + (overlap_start - source_start)
+            mapped_end = target_start + (overlap_end - source_start)
+            mapped_start = max(target_start, min(mapped_start, target_end))
+            mapped_end = max(target_start, min(mapped_end, target_end))
+            if mapped_end <= mapped_start:
+                continue
+
+            entries.append((mapped_start, mapped_end, text))
+
+    return entries
+
+
+def _subtitle_entries_to_blocks(entries: Sequence[SubtitleEntry]) -> List[str]:
+    blocks = []
+    sorted_entries = sorted(
+        entries,
+        key=lambda entry: (entry[0], entry[1], entry[2]),
+    )
+
+    for subtitle_index, (start, end, text) in enumerate(sorted_entries, start=1):
+        blocks.append(
+            "\n".join(
+                [
+                    str(subtitle_index),
+                    f"{format_srt_time(start)} --> {format_srt_time(end)}",
+                    text,
+                ]
+            )
+        )
+
+    return blocks
+
+
+def _build_srt_blocks(
+    list_script: Sequence[dict],
+    include_ost: Iterable[int],
+    max_chars: int,
+) -> List[str]:
+    entries = _build_narration_subtitle_entries(
+        list_script,
+        include_ost=include_ost,
+        max_chars=max_chars,
+    )
+    return _subtitle_entries_to_blocks(entries)
+
+
+def create_script_subtitle_file(
+    task_id: str,
+    list_script: Sequence[dict],
+    output_file: Optional[str] = None,
+    include_ost: Optional[Iterable[int]] = None,
+    max_chars: int = DEFAULT_MAX_CHARS_PER_SUBTITLE,
+    original_subtitle_paths=None,
+    video_origin_paths=None,
+    include_original_ost: Optional[Iterable[int]] = None,
+) -> str:
+    """Create a full SRT file from script narration plus original-audio subtitles."""
+    if not list_script:
+        return ""
+
+    if include_ost is None:
+        include_ost = DEFAULT_SUBTITLE_OST_TYPES
+    if include_original_ost is None:
+        include_original_ost = DEFAULT_ORIGINAL_SUBTITLE_OST_TYPES
+
+    entries = _build_narration_subtitle_entries(
+        list_script,
+        include_ost=include_ost,
+        max_chars=max_chars,
+    )
+    entries.extend(
+        _build_original_subtitle_entries(
+            list_script,
+            original_subtitle_paths=original_subtitle_paths,
+            video_origin_paths=video_origin_paths,
+            include_ost=include_original_ost,
+        )
+    )
+
+    blocks = _subtitle_entries_to_blocks(entries)
+    if not blocks:
+        logger.warning("程序化字幕未生成内容")
+        return ""
+
+    if output_file is None:
+        output_file = os.path.join(utils.task_dir(task_id), "script_subtitles.srt")
+
+    output_dir = os.path.dirname(output_file)
+    if output_dir:
+        os.makedirs(output_dir, exist_ok=True)
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write("\n\n".join(blocks))
+        f.write("\n")
+
+    logger.info(f"程序化字幕生成成功: {output_file}, 共 {len(blocks)} 条")
+    return output_file
--- a/app/services/short_drama_narration_validation.py
+++ b/app/services/short_drama_narration_validation.py
@ -0,0 +1,435 @@
+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+
+"""Validation helpers for short drama narration scripts."""
+
+from __future__ import annotations
+
+import os
+import re
+from dataclasses import dataclass
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+
+
+TIMESTAMP_RE = re.compile(r"^\d{2}:\d{2}:\d{2},\d{3}$")
+SCRIPT_RANGE_RE = re.compile(
+    r"^(?P<start>\d{2}:\d{2}:\d{2}[,.]\d{3})-(?P<end>\d{2}:\d{2}:\d{2}[,.]\d{3})$"
+)
+SRT_RANGE_RE = re.compile(
+    r"(?P<start>\d{2}:\d{2}:\d{2}[,.]\d{3})\s*-->\s*"
+    r"(?P<end>\d{2}:\d{2}:\d{2}[,.]\d{3})"
+)
+VIDEO_HEADER_RE = re.compile(r"^#\s*视频\s*(?P<video_id>\d+)(?:\s*[:：]\s*(?P<video_name>.+?))?\s*$")
+NARRATION_CHARS_PER_SECOND = 5.0
+NARRATION_DURATION_TOLERANCE_SECONDS = 0.5
+
+
+@dataclass(frozen=True)
+class SubtitleCue:
+    video_id: int
+    video_name: str
+    start_ms: int
+    end_ms: int
+    text: str
+    timestamp: str
+
+
+@dataclass(frozen=True)
+class ScriptValidationResult:
+    valid: bool
+    errors: List[str]
+    items: List[Dict[str, Any]]
+
+
+class NarrationScriptValidationError(ValueError):
+    """Raised when a narration script cannot be made safe for clipping."""
+
+
+def timestamp_to_ms(timestamp: str) -> int:
+    value = str(timestamp or "").strip().replace(".", ",")
+    if not TIMESTAMP_RE.match(value):
+        raise ValueError(f"时间戳格式错误: {timestamp}")
+
+    hh, mm, rest = value.split(":")
+    ss, ms = rest.split(",")
+    return ((int(hh) * 60 + int(mm)) * 60 + int(ss)) * 1000 + int(ms)
+
+
+def ms_to_timestamp(ms: int) -> str:
+    if ms < 0:
+        raise ValueError("毫秒时间不能为负数")
+
+    hours, remainder = divmod(ms, 60 * 60 * 1000)
+    minutes, remainder = divmod(remainder, 60 * 1000)
+    seconds, millis = divmod(remainder, 1000)
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d},{millis:03d}"
+
+
+def parse_script_timestamp_range(timestamp_range: str) -> Tuple[int, int, str]:
+    value = str(timestamp_range or "").strip().replace(".", ",")
+    match = SCRIPT_RANGE_RE.match(value)
+    if not match:
+        raise ValueError("时间戳格式应为 'HH:MM:SS,mmm-HH:MM:SS,mmm'")
+
+    start = timestamp_to_ms(match.group("start"))
+    end = timestamp_to_ms(match.group("end"))
+    return start, end, f"{ms_to_timestamp(start)}-{ms_to_timestamp(end)}"
+
+
+def _normalize_paths(paths: Optional[Iterable[str]]) -> List[str]:
+    if isinstance(paths, str):
+        paths = [paths]
+    if not paths:
+        return []
+
+    normalized = []
+    for path in paths:
+        if not isinstance(path, str):
+            continue
+        path = path.strip()
+        if path:
+            normalized.append(path)
+    return normalized
+
+
+def _default_video_name(video_id: int, video_paths: Sequence[str]) -> str:
+    if 1 <= video_id <= len(video_paths):
+        return os.path.basename(video_paths[video_id - 1])
+    return ""
+
+
+def _split_subtitle_sections(
+    subtitle_content: str,
+    video_paths: Sequence[str],
+) -> List[Tuple[int, str, str]]:
+    sections: List[Tuple[int, str, str]] = []
+    current_video_id = 1
+    current_video_name = _default_video_name(1, video_paths)
+    current_lines: List[str] = []
+    saw_header = False
+
+    for line in str(subtitle_content or "").splitlines():
+        header_match = VIDEO_HEADER_RE.match(line.strip())
+        if header_match:
+            if current_lines or saw_header:
+                sections.append((current_video_id, current_video_name, "\n".join(current_lines)))
+                current_lines = []
+
+            saw_header = True
+            current_video_id = int(header_match.group("video_id"))
+            header_video_name = str(header_match.group("video_name") or "").strip()
+            current_video_name = header_video_name or _default_video_name(current_video_id, video_paths)
+            continue
+
+        current_lines.append(line)
+
+    if current_lines or not sections:
+        sections.append((current_video_id, current_video_name, "\n".join(current_lines)))
+
+    return sections
+
+
+def _extract_cues_from_section(video_id: int, video_name: str, section_text: str) -> List[SubtitleCue]:
+    lines = str(section_text or "").splitlines()
+    cues: List[SubtitleCue] = []
+    index = 0
+
+    while index < len(lines):
+        match = SRT_RANGE_RE.search(lines[index])
+        if not match:
+            index += 1
+            continue
+
+        start_ms = timestamp_to_ms(match.group("start"))
+        end_ms = timestamp_to_ms(match.group("end"))
+        timestamp = f"{ms_to_timestamp(start_ms)}-{ms_to_timestamp(end_ms)}"
+        index += 1
+
+        text_lines: List[str] = []
+        while index < len(lines) and lines[index].strip():
+            text_lines.append(lines[index].strip())
+            index += 1
+
+        cues.append(
+            SubtitleCue(
+                video_id=video_id,
+                video_name=video_name,
+                start_ms=start_ms,
+                end_ms=end_ms,
+                text=" ".join(text_lines).strip(),
+                timestamp=timestamp,
+            )
+        )
+        index += 1
+
+    return cues
+
+
+def build_subtitle_index(subtitle_content: str, video_paths: Optional[Iterable[str]] = None) -> List[SubtitleCue]:
+    """Build a per-video subtitle index from combined SRT text."""
+    normalized_video_paths = _normalize_paths(video_paths)
+    cues: List[SubtitleCue] = []
+
+    for video_id, video_name, section_text in _split_subtitle_sections(subtitle_content, normalized_video_paths):
+        cues.extend(_extract_cues_from_section(video_id, video_name, section_text))
+
+    return cues
+
+
+def _coerce_positive_int(value: Any) -> Optional[int]:
+    try:
+        number = int(value)
+    except (TypeError, ValueError):
+        return None
+    return number if number > 0 else None
+
+
+def _video_id_by_name(video_name: Any, video_paths: Sequence[str]) -> Optional[int]:
+    normalized_name = os.path.basename(str(video_name or "").strip())
+    if not normalized_name:
+        return None
+
+    for index, path in enumerate(video_paths, start=1):
+        if os.path.basename(path) == normalized_name:
+            return index
+    return None
+
+
+def normalize_script_video_sources(
+    items: Sequence[Dict[str, Any]],
+    video_paths: Optional[Iterable[str]] = None,
+) -> List[Dict[str, Any]]:
+    """Normalize video_name from a valid source without inventing video_id."""
+    normalized_video_paths = _normalize_paths(video_paths)
+    normalized_items: List[Dict[str, Any]] = []
+
+    for raw_item in items:
+        item = dict(raw_item)
+        video_id = _coerce_positive_int(item.get("video_id") or item.get("video_index"))
+        matched_video_id = _video_id_by_name(item.get("video_name") or item.get("source_video"), normalized_video_paths)
+        if matched_video_id is not None:
+            video_id = matched_video_id
+
+        if video_id is not None:
+            item["video_id"] = video_id
+            if 1 <= video_id <= len(normalized_video_paths):
+                item["video_name"] = os.path.basename(normalized_video_paths[video_id - 1])
+
+        normalized_items.append(item)
+
+    return normalized_items
+
+
+def _cues_for_video(cues: Sequence[SubtitleCue], video_id: int) -> List[SubtitleCue]:
+    return [cue for cue in cues if cue.video_id == video_id]
+
+
+def _range_overlaps_subtitle(cues: Sequence[SubtitleCue], start_ms: int, end_ms: int) -> bool:
+    return any(start_ms < cue.end_ms and end_ms > cue.start_ms for cue in cues)
+
+
+def _range_within_subtitle_bounds(cues: Sequence[SubtitleCue], start_ms: int, end_ms: int) -> bool:
+    if not cues:
+        return False
+    return min(cue.start_ms for cue in cues) <= start_ms and end_ms <= max(cue.end_ms for cue in cues)
+
+
+def _item_ost(item: Dict[str, Any]) -> Optional[int]:
+    try:
+        return int(item.get("OST"))
+    except (TypeError, ValueError):
+        return None
+
+
+def _item_video_id(item: Dict[str, Any]) -> Optional[int]:
+    return _coerce_positive_int(item.get("video_id"))
+
+
+def count_narration_chars(text: str) -> int:
+    """Count visible narration characters for rough TTS/video-duration matching."""
+    return len(re.sub(r"\s+", "", str(text or "")))
+
+
+def max_narration_chars_for_duration(start_ms: int, end_ms: int) -> int:
+    duration_seconds = max(0.0, (end_ms - start_ms) / 1000)
+    return max(8, int((duration_seconds + NARRATION_DURATION_TOLERANCE_SECONDS) * NARRATION_CHARS_PER_SECOND))
+
+
+def _validate_story_continuity(items: Sequence[Dict[str, Any]]) -> List[str]:
+    """Validate structural continuity rules that affect viewer comprehension."""
+    errors: List[str] = []
+    consecutive_ost = 0
+    previous_item: Optional[Dict[str, Any]] = None
+
+    for index, item in enumerate(items):
+        if not isinstance(item, dict):
+            consecutive_ost = 0
+            previous_item = None
+            continue
+
+        item_id = item.get("_id", index + 1)
+        ost = _item_ost(item)
+        if index == 0 and ost != 0:
+            errors.append(f"片段 {item_id} 必须是 OST=0 解说开场钩子，不能直接播放原片")
+
+        if ost == 1:
+            consecutive_ost += 1
+            if consecutive_ost > 2:
+                errors.append(f"片段 {item_id} 连续原声过多，必须插入 OST=0 解说承接剧情")
+        else:
+            consecutive_ost = 0
+
+        if previous_item is not None:
+            previous_video_id = _item_video_id(previous_item)
+            current_video_id = _item_video_id(item)
+            if (
+                previous_video_id is not None
+                and current_video_id is not None
+                and previous_video_id != current_video_id
+                and _item_ost(previous_item) == 1
+                and ost == 1
+            ):
+                errors.append(
+                    f"片段 {previous_item.get('_id')} 到片段 {item_id} 跨视频切换缺少 OST=0 解说桥段"
+                )
+
+        previous_item = item
+
+    return errors
+
+
+def validate_narration_script_items(
+    items: Any,
+    subtitle_index: Sequence[SubtitleCue],
+    video_paths: Optional[Iterable[str]] = None,
+) -> ScriptValidationResult:
+    """Validate final narration items against subtitle/video source constraints."""
+    errors: List[str] = []
+    if not isinstance(items, list) or not items:
+        return ScriptValidationResult(False, ["解说脚本 items 必须是非空数组"], [])
+
+    normalized_video_paths = _normalize_paths(video_paths)
+    normalized_items = normalize_script_video_sources(items, normalized_video_paths)
+    available_video_ids = {cue.video_id for cue in subtitle_index}
+    if normalized_video_paths:
+        available_video_ids.update(range(1, len(normalized_video_paths) + 1))
+
+    ranges_by_video: Dict[int, List[Tuple[int, int, int]]] = {}
+    seen_ids = set()
+    required_fields = ["_id", "video_id", "video_name", "timestamp", "picture", "narration", "OST"]
+
+    for index, item in enumerate(normalized_items):
+        if not isinstance(item, dict):
+            errors.append(f"第 {index + 1} 个片段必须是对象")
+            continue
+
+        item_id = item.get("_id", index + 1)
+        coerced_item_id = _coerce_positive_int(item_id)
+        if coerced_item_id is None:
+            errors.append(f"第 {index + 1} 个片段缺少有效 _id")
+            coerced_item_id = index + 1
+        elif coerced_item_id in seen_ids:
+            errors.append(f"片段 _id={coerced_item_id} 重复")
+        seen_ids.add(coerced_item_id)
+
+        for field in required_fields:
+            if field not in item:
+                errors.append(f"片段 {item_id} 缺少字段 {field}")
+
+        video_id = _coerce_positive_int(item.get("video_id"))
+        if video_id is None:
+            errors.append(f"片段 {item_id} 缺少有效 video_id")
+            continue
+
+        if available_video_ids and video_id not in available_video_ids:
+            errors.append(f"片段 {item_id} 的 video_id={video_id} 不在已选视频范围内")
+
+        expected_video_name = _default_video_name(video_id, normalized_video_paths)
+        if expected_video_name and os.path.basename(str(item.get("video_name") or "")) != expected_video_name:
+            errors.append(f"片段 {item_id} 的 video_name 必须是 {expected_video_name}")
+
+        try:
+            start_ms, end_ms, normalized_timestamp = parse_script_timestamp_range(item.get("timestamp", ""))
+            item["timestamp"] = normalized_timestamp
+        except ValueError as exc:
+            errors.append(f"片段 {item_id}: {exc}")
+            continue
+
+        if start_ms >= end_ms:
+            errors.append(f"片段 {item_id} 的开始时间必须早于结束时间")
+            continue
+
+        video_cues = _cues_for_video(subtitle_index, video_id)
+        if not _range_within_subtitle_bounds(video_cues, start_ms, end_ms):
+            errors.append(f"片段 {item_id} 的时间戳不在视频 {video_id} 的字幕范围内")
+        elif not _range_overlaps_subtitle(video_cues, start_ms, end_ms):
+            errors.append(f"片段 {item_id} 的时间戳没有命中视频 {video_id} 的字幕内容")
+
+        for text_field in ["picture", "narration"]:
+            if not isinstance(item.get(text_field), str) or not item[text_field].strip():
+                errors.append(f"片段 {item_id} 的 {text_field} 不能为空")
+
+        ost = _item_ost(item)
+        if item.get("OST") not in [0, 1, 2]:
+            errors.append(f"片段 {item_id} 的 OST 必须是 0、1 或 2")
+        if ost == 1 and not str(item.get("narration", "")).startswith("播放原片"):
+            errors.append(f"片段 {item_id} 是原声片段，narration 必须使用“播放原片+序号”")
+        if ost == 0:
+            narration_chars = count_narration_chars(item.get("narration", ""))
+            max_chars = max_narration_chars_for_duration(start_ms, end_ms)
+            if narration_chars > max_chars:
+                duration_seconds = (end_ms - start_ms) / 1000
+                errors.append(
+                    f"片段 {item_id} 解说过密：{narration_chars} 字需要约 {narration_chars / NARRATION_CHARS_PER_SECOND:.1f} 秒，"
+                    f"但画面只有 {duration_seconds:.1f} 秒，建议不超过 {max_chars} 字或延长画面"
+                )
+
+        ranges_by_video.setdefault(video_id, []).append((start_ms, end_ms, coerced_item_id))
+
+    for video_id, ranges in ranges_by_video.items():
+        sorted_ranges = sorted(ranges, key=lambda item: (item[0], item[1], item[2]))
+        previous_start, previous_end, previous_id = sorted_ranges[0]
+        for start_ms, end_ms, item_id in sorted_ranges[1:]:
+            if start_ms < previous_end:
+                errors.append(f"视频 {video_id} 的片段 {item_id} 与片段 {previous_id} 时间戳重叠")
+            if end_ms > previous_end:
+                previous_start, previous_end, previous_id = start_ms, end_ms, item_id
+
+    errors.extend(_validate_story_continuity(normalized_items))
+
+    return ScriptValidationResult(not errors, errors, normalized_items)
+
+
+def require_valid_narration_script_items(
+    items: Any,
+    subtitle_index: Sequence[SubtitleCue],
+    video_paths: Optional[Iterable[str]] = None,
+) -> List[Dict[str, Any]]:
+    result = validate_narration_script_items(items, subtitle_index, video_paths)
+    if not result.valid:
+        raise NarrationScriptValidationError("\n".join(result.errors))
+    return result.items
+
+
+def summarize_subtitle_window(
+    subtitle_index: Sequence[SubtitleCue],
+    max_cues_per_video: int = 80,
+) -> str:
+    """Return compact subtitle context for a repair prompt."""
+    lines: List[str] = []
+    by_video: Dict[int, List[SubtitleCue]] = {}
+    for cue in subtitle_index:
+        by_video.setdefault(cue.video_id, []).append(cue)
+
+    for video_id in sorted(by_video):
+        cues = by_video[video_id][:max_cues_per_video]
+        video_name = cues[0].video_name if cues else ""
+        header = f"# 视频 {video_id}: {video_name}" if video_name else f"# 视频 {video_id}"
+        lines.append(header)
+        for cue in cues:
+            text = cue.text.replace("\n", " ").strip()
+            lines.append(f"{cue.timestamp} {text}")
+        if len(by_video[video_id]) > max_cues_per_video:
+            lines.append(f"... 已省略 {len(by_video[video_id]) - max_cues_per_video} 条字幕")
+
+    return "\n".join(lines)
--- a/app/services/subtitle_corrector.py
+++ b/app/services/subtitle_corrector.py
@ -0,0 +1,231 @@
+"""LLM-powered SRT subtitle correction."""
+
+from __future__ import annotations
+
+import json
+import os
+import re
+from dataclasses import dataclass
+from typing import Any
+
+from loguru import logger
+
+from app.services.llm.manager import LLMServiceManager
+from app.services.llm.migration_adapter import _run_async_safely
+from app.services.llm.unified_service import UnifiedLLMService
+from app.services.subtitle_text import has_timecodes, normalize_subtitle_text, read_subtitle_text
+from app.utils import utils
+
+
+class SubtitleCorrectionError(RuntimeError):
+    """Raised when subtitle correction cannot produce a valid SRT."""
+
+
+_TIME_LINE_RE = re.compile(
+    r"^\s*\d{2}:\d{2}:\d{2}[,.]\d{3}\s*-->\s*\d{2}:\d{2}:\d{2}[,.]\d{3}(?:\s+.*)?$"
+)
+_JSON_BLOCK_RE = re.compile(r"```(?:json)?\s*(.*?)\s*```", re.DOTALL | re.IGNORECASE)
+
+
+@dataclass(frozen=True)
+class SubtitleBlock:
+    order: int
+    index_line: str
+    time_line: str
+    text: str
+
+
+def _ensure_llm_providers_registered() -> None:
+    if LLMServiceManager.is_registered():
+        return
+    from app.services.llm.providers import register_all_providers
+
+    register_all_providers()
+
+
+def parse_srt_blocks(srt_content: str) -> list[SubtitleBlock]:
+    normalized = normalize_subtitle_text(srt_content)
+    if not normalized or not has_timecodes(normalized):
+        raise SubtitleCorrectionError("字幕内容为空或未检测到有效 SRT 时间轴")
+
+    blocks: list[SubtitleBlock] = []
+    raw_blocks = re.split(r"\n\s*\n", normalized)
+    for raw_block in raw_blocks:
+        lines = [line.rstrip() for line in raw_block.splitlines() if line.strip()]
+        if not lines:
+            continue
+
+        if len(lines) >= 2 and _TIME_LINE_RE.match(lines[1]):
+            index_line = lines[0].strip()
+            time_line = lines[1].strip()
+            text = "\n".join(lines[2:]).strip()
+        elif _TIME_LINE_RE.match(lines[0]):
+            index_line = str(len(blocks) + 1)
+            time_line = lines[0].strip()
+            text = "\n".join(lines[1:]).strip()
+        else:
+            raise SubtitleCorrectionError(f"无法解析字幕块: {raw_block[:80]}")
+
+        blocks.append(
+            SubtitleBlock(
+                order=len(blocks) + 1,
+                index_line=index_line,
+                time_line=time_line,
+                text=text,
+            )
+        )
+
+    if not blocks:
+        raise SubtitleCorrectionError("字幕内容为空或未检测到有效字幕块")
+    return blocks
+
+
+def _build_correction_prompt(blocks: list[SubtitleBlock]) -> str:
+    payload = [
+        {
+            "id": block.order,
+            "time": block.time_line,
+            "text": block.text,
+        }
+        for block in blocks
+    ]
+    return f"""
+请校准以下 SRT 字幕文本中的明显语音识别错误。字幕可能是中文、英文、日文、韩文或其他语言，也可能包含多语言混合内容。
+
+校准要求：
+1. 先结合全部字幕内容识别原语言和语境，保持原语言输出；多语言混合内容也要保持原有语言混合方式。
+2. 只纠正明显的 ASR 错字、拼写错误、同音或近音误识别、词形误识别、专有名词前后不一致。
+3. 不要润色、扩写、改写句意，不要翻译，不要增删剧情信息。
+4. 不要修改时间轴、序号、条目数量或条目顺序。
+5. 不确定的内容保持原样。
+6. 保留必要的说话人标记、标点和换行。
+
+只输出严格 JSON，不要输出 Markdown 或解释文字。格式必须为：
+{{"items":[{{"id":1,"text":"校准后的字幕文本"}}]}}
+
+待校准字幕条目：
+{json.dumps(payload, ensure_ascii=False, indent=2)}
+""".strip()
+
+
+def _extract_json_text(raw_output: str) -> str:
+    text = str(raw_output or "").strip()
+    block_match = _JSON_BLOCK_RE.search(text)
+    if block_match:
+        return block_match.group(1).strip()
+
+    if not text.startswith(("{", "[")):
+        starts = [pos for pos in (text.find("{"), text.find("[")) if pos >= 0]
+        if starts:
+            start = min(starts)
+            end = max(text.rfind("}"), text.rfind("]"))
+            if end > start:
+                return text[start:end + 1]
+    return text
+
+
+def _parse_corrections(raw_output: str, expected_ids: set[int]) -> dict[int, str]:
+    json_text = _extract_json_text(raw_output)
+    try:
+        data = json.loads(json_text)
+    except json.JSONDecodeError as exc:
+        raise SubtitleCorrectionError("LLM 未返回有效 JSON 字幕校准结果") from exc
+
+    if isinstance(data, dict) and "items" in data:
+        items = data["items"]
+    elif isinstance(data, list):
+        items = data
+    elif isinstance(data, dict):
+        items = [{"id": key, "text": value} for key, value in data.items()]
+    else:
+        raise SubtitleCorrectionError("LLM 字幕校准结果格式无效")
+
+    corrections: dict[int, str] = {}
+    if not isinstance(items, list):
+        raise SubtitleCorrectionError("LLM 字幕校准结果缺少 items 列表")
+
+    for item in items:
+        if not isinstance(item, dict):
+            continue
+        try:
+            item_id = int(item.get("id"))
+        except (TypeError, ValueError):
+            continue
+        if item_id in expected_ids:
+            corrections[item_id] = str(item.get("text") or "").strip()
+
+    missing_ids = sorted(expected_ids - set(corrections.keys()))
+    if missing_ids:
+        raise SubtitleCorrectionError(f"LLM 字幕校准结果缺少字幕条目: {missing_ids[:10]}")
+    return corrections
+
+
+def _render_srt(blocks: list[SubtitleBlock], corrections: dict[int, str]) -> str:
+    rendered_blocks = []
+    for block in blocks:
+        corrected_text = corrections.get(block.order, "").strip() or block.text
+        rendered_blocks.append(f"{block.index_line}\n{block.time_line}\n{corrected_text}")
+    return "\n\n".join(rendered_blocks).rstrip() + "\n"
+
+
+def correct_srt_content(
+    srt_content: str,
+    *,
+    provider: str = "",
+    api_key: str = "",
+    base_url: str = "",
+    temperature: float = 0.1,
+) -> str:
+    blocks = parse_srt_blocks(srt_content)
+    _ensure_llm_providers_registered()
+
+    logger.info(f"开始校准字幕，共 {len(blocks)} 条")
+    prompt = _build_correction_prompt(blocks)
+    raw_output = _run_async_safely(
+        UnifiedLLMService.generate_text,
+        prompt=prompt,
+        system_prompt="你是一位专业的多语言字幕校对员，擅长修正 ASR 语音识别造成的明显错字、拼写错误、同音或近音误识别，同时严格保留字幕结构和原语言。",
+        provider=provider,
+        temperature=temperature,
+        response_format="json",
+        api_key=api_key,
+        api_base=base_url,
+    )
+    corrections = _parse_corrections(raw_output, {block.order for block in blocks})
+    corrected_srt = _render_srt(blocks, corrections)
+    logger.info("字幕校准完成")
+    return corrected_srt
+
+
+def write_srt_file(srt_content: str, subtitle_file: str = "") -> str:
+    if not subtitle_file:
+        subtitle_file = os.path.join(utils.subtitle_dir(), "subtitle_corrected.srt")
+    parent = os.path.dirname(subtitle_file)
+    if parent:
+        os.makedirs(parent, exist_ok=True)
+    with open(subtitle_file, "w", encoding="utf-8") as f:
+        f.write(srt_content)
+    return subtitle_file
+
+
+def correct_subtitle_file(
+    subtitle_file: str,
+    output_file: str = "",
+    *,
+    provider: str = "",
+    api_key: str = "",
+    base_url: str = "",
+    temperature: float = 0.1,
+) -> str:
+    if not subtitle_file or not os.path.isfile(subtitle_file):
+        raise SubtitleCorrectionError(f"字幕文件不存在: {subtitle_file}")
+
+    decoded = read_subtitle_text(subtitle_file)
+    corrected_srt = correct_srt_content(
+        decoded.text,
+        provider=provider,
+        api_key=api_key,
+        base_url=base_url,
+        temperature=temperature,
+    )
+    return write_srt_file(corrected_srt, output_file)
--- a/app/services/task.py
+++ b/app/services/task.py
@ -10,11 +10,301 @@ from app.config import config
 from app.config.audio_config import AudioConfig, get_recommended_volumes_for_content
 from app.models import const
 from app.models.schema import VideoClipParams
-from app.services import (voice, audio_merger, subtitle_merger, clip_video, merger_video, update_script, generate_video)
+from app.services import (
+    voice,
+    audio_merger,
+    subtitle_merger,
+    clip_video,
+    merger_video,
+    update_script,
+    generate_video,
+    script_subtitle,
+)
 from app.services import state as sm
 from app.utils import utils


+VIDEO_GENERATION_TOTAL_STEPS = 6
+
+
+def _update_video_generation_task(
+    task_id: str,
+    progress: int,
+    message: str,
+    step_current: int = 0,
+    ffmpeg_progress: float | None = None,
+    state: int = const.TASK_STATE_PROCESSING,
+    **kwargs,
+) -> None:
+    task_fields = {
+        "message": message,
+        "step_current": step_current,
+        "step_total": VIDEO_GENERATION_TOTAL_STEPS,
+        **kwargs,
+    }
+    if ffmpeg_progress is not None:
+        task_fields["ffmpeg_progress"] = round(
+            max(0.0, min(100.0, float(ffmpeg_progress))),
+            1,
+        )
+
+    sm.state.update_task(
+        task_id,
+        state=state,
+        progress=progress,
+        **task_fields,
+    )
+
+
+def _is_auto_transcription_enabled(params: VideoClipParams) -> bool:
+    return bool(
+        getattr(params, "subtitle_enabled", True)
+        and getattr(params, "subtitle_auto_transcribe_enabled", False)
+    )
+
+
+def _get_auto_transcription_backend(params: VideoClipParams) -> str:
+    backend = str(getattr(params, "subtitle_auto_transcribe_backend", "") or "").strip().lower()
+    if backend not in {"local", "firered", "bailian"}:
+        backend = "local"
+    return backend
+
+
+def _get_original_subtitle_paths(params: VideoClipParams) -> list[str]:
+    subtitle_paths = getattr(params, "original_subtitle_paths", []) or []
+    if isinstance(subtitle_paths, str):
+        subtitle_paths = [subtitle_paths]
+
+    normalized_paths = []
+    seen = set()
+    for subtitle_path in subtitle_paths:
+        if not isinstance(subtitle_path, str):
+            continue
+        subtitle_path = subtitle_path.strip()
+        if subtitle_path and subtitle_path not in seen:
+            normalized_paths.append(subtitle_path)
+            seen.add(subtitle_path)
+
+    single_subtitle_path = str(getattr(params, "original_subtitle_path", "") or "").strip()
+    if single_subtitle_path and single_subtitle_path not in seen:
+        normalized_paths.insert(0, single_subtitle_path)
+
+    if not normalized_paths:
+        normalized_paths = _find_original_subtitle_paths_for_videos(_get_video_origin_paths(params))
+
+    return normalized_paths
+
+
+def _get_video_origin_paths(params: VideoClipParams) -> list[str]:
+    video_paths = getattr(params, "video_origin_paths", []) or []
+    if isinstance(video_paths, str):
+        video_paths = [video_paths]
+
+    normalized_paths = []
+    seen = set()
+    for video_path in video_paths:
+        if not isinstance(video_path, str):
+            continue
+        video_path = video_path.strip()
+        if video_path and video_path not in seen:
+            normalized_paths.append(video_path)
+            seen.add(video_path)
+
+    single_video_path = str(getattr(params, "video_origin_path", "") or "").strip()
+    if single_video_path and single_video_path not in seen:
+        normalized_paths.insert(0, single_video_path)
+
+    return normalized_paths
+
+
+def _video_stem_candidates(video_path: str) -> list[str]:
+    stem = path.splitext(path.basename(str(video_path or "").strip()))[0]
+    if not stem:
+        return []
+
+    candidates = [stem]
+    timestamp_stripped = re.sub(r"_[0-9]{14}$", "", stem)
+    if timestamp_stripped and timestamp_stripped not in candidates:
+        candidates.append(timestamp_stripped)
+    return candidates
+
+
+def _find_original_subtitle_paths_for_videos(video_paths: list[str]) -> list[str]:
+    subtitle_dir = utils.subtitle_dir()
+    if not path.isdir(subtitle_dir):
+        return []
+
+    subtitle_files = [
+        path.join(subtitle_dir, filename)
+        for filename in os.listdir(subtitle_dir)
+        if filename.lower().endswith(".srt")
+    ]
+    if not subtitle_files:
+        return []
+
+    resolved_paths = []
+    seen = set()
+    for video_path in video_paths:
+        candidates = _video_stem_candidates(video_path)
+        if not candidates:
+            continue
+
+        matches = []
+        for subtitle_path in subtitle_files:
+            subtitle_stem = path.splitext(path.basename(subtitle_path))[0]
+            for candidate in candidates:
+                if subtitle_stem == candidate or subtitle_stem.startswith(f"{candidate}_"):
+                    matches.append(subtitle_path)
+                    break
+
+        if not matches:
+            continue
+
+        matches.sort(key=lambda item: path.getmtime(item), reverse=True)
+        selected_path = matches[0]
+        if selected_path not in seen:
+            resolved_paths.append(selected_path)
+            seen.add(selected_path)
+
+    if resolved_paths:
+        logger.info(f"未从参数获取原片字幕，已按视频文件名自动匹配: {resolved_paths}")
+    return resolved_paths
+
+
+def _create_programmatic_subtitle_file(
+    task_id: str,
+    list_script: list[dict],
+    params: VideoClipParams,
+) -> str:
+    if not getattr(params, "subtitle_enabled", True):
+        return ""
+
+    original_subtitle_paths = _get_original_subtitle_paths(params)
+    logger.info(f"程序化字幕使用原片字幕路径: {original_subtitle_paths or '未提供'}")
+    return script_subtitle.create_script_subtitle_file(
+        task_id=task_id,
+        list_script=list_script,
+        original_subtitle_paths=original_subtitle_paths,
+        video_origin_paths=_get_video_origin_paths(params),
+    )
+
+
+def _build_subtitle_mask_options(params: VideoClipParams, enabled=None) -> dict:
+    mask_configured = bool(
+        getattr(params, "subtitle_enabled", True)
+        and getattr(params, "subtitle_mask_enabled", False)
+    )
+    mask_enabled = mask_configured if enabled is None else mask_configured and enabled
+    return {
+        'subtitle_mask_enabled': mask_enabled,
+        'subtitle_mask_landscape_x_percent': getattr(params, "subtitle_mask_landscape_x_percent", 10.0),
+        'subtitle_mask_landscape_y_percent': getattr(params, "subtitle_mask_landscape_y_percent", 78.0),
+        'subtitle_mask_landscape_width_percent': getattr(params, "subtitle_mask_landscape_width_percent", 80.0),
+        'subtitle_mask_landscape_height_percent': getattr(params, "subtitle_mask_landscape_height_percent", 14.0),
+        'subtitle_mask_landscape_blur_radius': getattr(params, "subtitle_mask_landscape_blur_radius", 18),
+        'subtitle_mask_landscape_opacity_percent': getattr(params, "subtitle_mask_landscape_opacity_percent", 82),
+        'subtitle_mask_portrait_x_percent': getattr(params, "subtitle_mask_portrait_x_percent", 8.0),
+        'subtitle_mask_portrait_y_percent': getattr(params, "subtitle_mask_portrait_y_percent", 79.0),
+        'subtitle_mask_portrait_width_percent': getattr(params, "subtitle_mask_portrait_width_percent", 84.0),
+        'subtitle_mask_portrait_height_percent': getattr(params, "subtitle_mask_portrait_height_percent", 16.0),
+        'subtitle_mask_portrait_blur_radius': getattr(params, "subtitle_mask_portrait_blur_radius", 26),
+        'subtitle_mask_portrait_opacity_percent': getattr(params, "subtitle_mask_portrait_opacity_percent", 84),
+        'subtitle_position_landscape_y_percent': getattr(params, "subtitle_position_landscape_y_percent", 85.0),
+        'subtitle_position_portrait_y_percent': getattr(params, "subtitle_position_portrait_y_percent", 82.0),
+    }
+
+
+def _transcribe_final_video(task_id: str, video_path: str, params: VideoClipParams) -> str:
+    """Transcribe the fully merged video into an SRT file."""
+    from app.services import fun_asr_subtitle
+
+    if not video_path or not path.exists(video_path):
+        raise FileNotFoundError(f"自动转录视频不存在: {video_path}")
+
+    backend = _get_auto_transcription_backend(params)
+    subtitle_file = path.join(utils.task_dir(task_id), "auto_transcribed_final.srt")
+    logger.info(f"开始自动转录最终视频: {video_path}, backend={backend}")
+
+    if backend == "local":
+        api_url = str(
+            getattr(params, "subtitle_auto_transcribe_api_url", "")
+            or config.fun_asr.get("api_url", fun_asr_subtitle.LOCAL_FUN_ASR_API_URL)
+        ).strip()
+        if not api_url:
+            raise ValueError("请先输入本地 FunASR-Pack API 地址")
+
+        generated_path = fun_asr_subtitle.create_with_local_fun_asr(
+            local_file=video_path,
+            subtitle_file=subtitle_file,
+            api_url=api_url,
+            hotword=str(getattr(params, "subtitle_auto_transcribe_hotword", "") or "").strip(),
+            enable_spk=bool(getattr(params, "subtitle_auto_transcribe_enable_spk", False)),
+        )
+    elif backend == "firered":
+        api_url = str(
+            getattr(params, "subtitle_auto_transcribe_firered_api_url", "")
+            or config.fun_asr.get("firered_api_url", fun_asr_subtitle.LOCAL_FIRERED_ASR_API_URL)
+        ).strip()
+        if not api_url:
+            raise ValueError("请先输入本地ASR API 地址")
+
+        generated_path = fun_asr_subtitle.create_with_local_firered_asr(
+            local_file=video_path,
+            subtitle_file=subtitle_file,
+            api_url=api_url,
+        )
+    else:
+        api_key = str(
+            getattr(params, "subtitle_auto_transcribe_api_key", "")
+            or config.fun_asr.get("api_key", "")
+        ).strip()
+        if not api_key:
+            raise ValueError("请先输入阿里百炼 API Key")
+
+        generated_path = fun_asr_subtitle.create_with_fun_asr(
+            local_file=video_path,
+            subtitle_file=subtitle_file,
+            api_key=api_key,
+        )
+
+    if not generated_path or not path.exists(generated_path):
+        raise RuntimeError("自动转录失败：未生成字幕文件")
+
+    logger.info(f"自动转录字幕生成成功: {generated_path}")
+    return generated_path
+
+
+def _merge_auto_transcribed_subtitles(
+    source_video_path: str,
+    output_video_path: str,
+    subtitle_path: str,
+    params: VideoClipParams,
+) -> str:
+    subtitle_options = {
+        'voice_volume': 1.0,
+        'bgm_volume': 0.0,
+        'original_audio_volume': 1.0,
+        'keep_original_audio': True,
+        'subtitle_enabled': True,
+        'subtitle_font': params.font_name,
+        'subtitle_font_size': params.font_size,
+        'subtitle_color': params.text_fore_color,
+        'subtitle_bg_color': None,
+        'subtitle_position': params.subtitle_position,
+        'custom_position': params.custom_position,
+        'threads': params.n_threads,
+        **_build_subtitle_mask_options(params, enabled=True),
+    }
+    return generate_video.merge_materials(
+        video_path=source_video_path,
+        audio_path="",
+        subtitle_path=subtitle_path,
+        bgm_path="",
+        output_path=output_video_path,
+        options=subtitle_options
+    )
+
+
 def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict = None):
    """
    后台任务（统一视频裁剪处理）- 优化版本
@ -108,6 +398,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
    # 使用新的统一裁剪策略
    video_clip_result = clip_video.clip_video_unified(
        video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
        script_list=list_script,
        tts_results=tts_results
    )
@ -139,7 +430,19 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
            logger.info(f"音频文件合并成功->{merged_audio_path}")

            # 合并字幕文件
-            merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
+            merged_subtitle_path = ""
+            if getattr(params, "subtitle_enabled", True):
+                try:
+                    merged_subtitle_path = _create_programmatic_subtitle_file(
+                        task_id,
+                        new_script_list,
+                        params,
+                    )
+                except Exception as e:
+                    logger.warning(f"程序化字幕生成失败，将尝试合并TTS字幕: {e}")
+
+            if not merged_subtitle_path and getattr(params, "subtitle_enabled", True):
+                merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
            if merged_subtitle_path:
                logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
            else:
@ -156,6 +459,15 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
        logger.warning("没有需要合并的音频/字幕")
        merged_audio_path = ""
        merged_subtitle_path = ""
+        if getattr(params, "subtitle_enabled", True):
+            try:
+                merged_subtitle_path = _create_programmatic_subtitle_file(
+                    task_id,
+                    new_script_list,
+                    params,
+                )
+            except Exception as e:
+                logger.warning(f"程序化字幕生成失败: {e}")

    """
    5. 合并视频
@ -200,10 +512,19 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
    6. 合并字幕/BGM/配音/视频
    """
    output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
-    logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {output_video_path}")
+    auto_transcription_enabled = _is_auto_transcription_enabled(params)
+    merge_output_video_path = (
+        path.join(utils.task_dir(task_id), "combined_without_auto_subtitles.mp4")
+        if auto_transcription_enabled
+        else output_video_path
+    )
+    logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}")

    # bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
-    bgm_path = utils.get_bgm_file()
+    bgm_path = utils.get_bgm_file(
+        bgm_type=getattr(params, "bgm_type", "random"),
+        bgm_file=getattr(params, "bgm_file", ""),
+    )

    # 获取优化的音量配置
    optimized_volumes = get_recommended_volumes_for_content('mixed')
@ -232,24 +553,39 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
        'bgm_volume': final_bgm_volume,  # 背景音乐音量（优化后）
        'original_audio_volume': final_original_volume,  # 视频原声音量（优化后）
        'keep_original_audio': True,  # 是否保留原声
-        'subtitle_enabled': params.subtitle_enabled,  # 是否启用字幕 - 修复字幕开关bug
+        'subtitle_enabled': params.subtitle_enabled and not auto_transcription_enabled,
        'subtitle_font': params.font_name,  # 这里使用相对字体路径，会自动在 font_dir() 目录下查找
        'subtitle_font_size': params.font_size,
        'subtitle_color': params.text_fore_color,
        'subtitle_bg_color': None,  # 直接使用None表示透明背景
        'subtitle_position': params.subtitle_position,
        'custom_position': params.custom_position,
-        'threads': params.n_threads
+        'threads': params.n_threads,
+        **_build_subtitle_mask_options(params, enabled=not auto_transcription_enabled),
    }
    generate_video.merge_materials(
        video_path=combined_video_path,
        audio_path=merged_audio_path,
        subtitle_path=merged_subtitle_path,
        bgm_path=bgm_path,
-        output_path=output_video_path,
+        output_path=merge_output_video_path,
        options=options
    )

+    auto_subtitle_path = ""
+    if auto_transcription_enabled:
+        sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=90)
+        logger.info("\n\n## 7. 自动转录最终视频字幕")
+        auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params)
+        sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=95)
+        logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}")
+        _merge_auto_transcribed_subtitles(
+            source_video_path=merge_output_video_path,
+            output_video_path=output_video_path,
+            subtitle_path=auto_subtitle_path,
+            params=params,
+        )
+
    final_video_paths.append(output_video_path)
    combined_video_paths.append(combined_video_path)

@ -259,6 +595,8 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
        "videos": final_video_paths,
        "combined_videos": combined_video_paths
    }
+    if auto_subtitle_path:
+        kwargs["subtitles"] = [auto_subtitle_path]
    sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
    return kwargs

@ -277,12 +615,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
    global merged_audio_path, merged_subtitle_path

    logger.info(f"\n\n## 开始统一视频处理任务: {task_id}")
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
+    _update_video_generation_task(
+        task_id,
+        progress=0,
+        message="正在初始化视频生成任务",
+        step_current=0,
+    )

    """
    1. 加载剪辑脚本
    """
    logger.info("\n\n## 1. 加载视频脚本")
+    _update_video_generation_task(
+        task_id,
+        progress=5,
+        message="正在加载剪辑脚本",
+        step_current=1,
+    )
    video_script_path = path.join(params.video_clip_json_path)

    if path.exists(video_script_path):
@ -308,6 +657,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
    2. 使用 TTS 生成音频素材
    """
    logger.info("\n\n## 2. 根据OST设置生成音频列表")
+    _update_video_generation_task(
+        task_id,
+        progress=10,
+        message="正在生成 TTS 配音",
+        step_current=2,
+    )
    # 只为OST=0 or 2的判断生成音频， OST=0 仅保留解说 OST=2 保留解说和原声
    tts_segments = [
        segment for segment in list_script
@ -324,16 +679,28 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
        voice_pitch=params.voice_pitch,
    )

-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
+    _update_video_generation_task(
+        task_id,
+        progress=20,
+        message="TTS 配音生成完成",
+        step_current=2,
+    )

    """
    3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略
    """
    logger.info("\n\n## 3. 统一视频裁剪（基于OST类型）")
+    _update_video_generation_task(
+        task_id,
+        progress=30,
+        message="正在按脚本裁剪视频片段",
+        step_current=3,
+    )

    # 使用新的统一裁剪策略
    video_clip_result = clip_video.clip_video_unified(
        video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
        script_list=list_script,
        tts_results=tts_results
    )
@ -347,12 +714,23 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):

    logger.info(f"统一裁剪完成，处理了 {len(video_clip_result)} 个视频片段")

-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
+    _update_video_generation_task(
+        task_id,
+        progress=60,
+        message="视频片段裁剪完成",
+        step_current=3,
+    )

    """
    4. 合并音频和字幕
    """
    logger.info("\n\n## 4. 合并音频和字幕")
+    _update_video_generation_task(
+        task_id,
+        progress=65,
+        message="正在合并配音和字幕",
+        step_current=4,
+    )
    total_duration = sum([script["duration"] for script in new_script_list])
    if tts_segments:
        try:
@ -364,8 +742,21 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
            )
            logger.info(f"音频文件合并成功->{merged_audio_path}")

-            # 合并字幕文件
-            merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
+            # 优先基于脚本文案和成片时间线生成字幕，失败时回退到TTS字幕合并
+            merged_subtitle_path = ""
+            if getattr(params, "subtitle_enabled", True):
+                try:
+                    merged_subtitle_path = _create_programmatic_subtitle_file(
+                        task_id,
+                        new_script_list,
+                        params,
+                    )
+                except Exception as e:
+                    logger.warning(f"程序化字幕生成失败，将尝试合并TTS字幕: {e}")
+
+            if not merged_subtitle_path and getattr(params, "subtitle_enabled", True):
+                merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
+
            if merged_subtitle_path:
                logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
            else:
@ -382,6 +773,21 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
        logger.warning("没有需要合并的音频/字幕")
        merged_audio_path = ""
        merged_subtitle_path = ""
+        if getattr(params, "subtitle_enabled", True):
+            try:
+                merged_subtitle_path = _create_programmatic_subtitle_file(
+                    task_id,
+                    new_script_list,
+                    params,
+                )
+            except Exception as e:
+                logger.warning(f"程序化字幕生成失败: {e}")
+    _update_video_generation_task(
+        task_id,
+        progress=70,
+        message="配音和字幕合并完成",
+        step_current=4,
+    )

    """
    5. 合并视频
@ -391,6 +797,12 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):

    combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
    logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
+    _update_video_generation_task(
+        task_id,
+        progress=75,
+        message="正在合并视频片段",
+        step_current=5,
+    )

    # 使用统一裁剪后的视频片段
    video_clips = []
@ -410,15 +822,38 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
        video_aspect=params.video_aspect,
        threads=params.n_threads
    )
-    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
+    _update_video_generation_task(
+        task_id,
+        progress=80,
+        message="视频片段合并完成",
+        step_current=5,
+    )

    """
    6. 合并字幕/BGM/配音/视频
    """
    output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
-    logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {output_video_path}")
+    auto_transcription_enabled = _is_auto_transcription_enabled(params) and not bool(merged_subtitle_path)
+    if _is_auto_transcription_enabled(params) and merged_subtitle_path:
+        logger.info("已生成字幕文件，跳过最终视频自动转录")
+    merge_output_video_path = (
+        path.join(utils.task_dir(task_id), "combined_without_auto_subtitles.mp4")
+        if auto_transcription_enabled
+        else output_video_path
+    )
+    logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {merge_output_video_path}")
+    _update_video_generation_task(
+        task_id,
+        progress=85,
+        message="正在合成最终视频",
+        step_current=6,
+        ffmpeg_progress=0,
+    )

-    bgm_path = utils.get_bgm_file()
+    bgm_path = utils.get_bgm_file(
+        bgm_type=getattr(params, "bgm_type", "random"),
+        bgm_file=getattr(params, "bgm_file", ""),
+    )

    # 获取优化的音量配置
    optimized_volumes = get_recommended_volumes_for_content('mixed')
@ -446,24 +881,66 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
        'bgm_volume': final_bgm_volume,
        'original_audio_volume': final_original_volume,
        'keep_original_audio': True,
-        'subtitle_enabled': params.subtitle_enabled,
+        'subtitle_enabled': params.subtitle_enabled and not auto_transcription_enabled,
        'subtitle_font': params.font_name,
        'subtitle_font_size': params.font_size,
        'subtitle_color': params.text_fore_color,
        'subtitle_bg_color': None,
        'subtitle_position': params.subtitle_position,
        'custom_position': params.custom_position,
-        'threads': params.n_threads
+        'threads': params.n_threads,
+        **_build_subtitle_mask_options(params, enabled=not auto_transcription_enabled),
    }
+    final_merge_progress_start = 85
+    final_merge_progress_end = 89 if auto_transcription_enabled else 99
+
+    def update_final_merge_progress(ffmpeg_progress: float):
+        progress_span = final_merge_progress_end - final_merge_progress_start
+        overall_progress = final_merge_progress_start + int(
+            round((max(0.0, min(100.0, float(ffmpeg_progress))) / 100) * progress_span)
+        )
+        _update_video_generation_task(
+            task_id,
+            progress=overall_progress,
+            message="正在合成最终视频",
+            step_current=6,
+            ffmpeg_progress=ffmpeg_progress,
+        )
+
    generate_video.merge_materials(
        video_path=combined_video_path,
        audio_path=merged_audio_path,
        subtitle_path=merged_subtitle_path,
        bgm_path=bgm_path,
-        output_path=output_video_path,
-        options=options
+        output_path=merge_output_video_path,
+        options=options,
+        progress_callback=update_final_merge_progress,
    )

+    auto_subtitle_path = ""
+    if auto_transcription_enabled:
+        _update_video_generation_task(
+            task_id,
+            progress=90,
+            message="正在自动转录最终视频",
+            step_current=6,
+        )
+        logger.info("\n\n## 7. 自动转录最终视频字幕")
+        auto_subtitle_path = _transcribe_final_video(task_id, merge_output_video_path, params)
+        _update_video_generation_task(
+            task_id,
+            progress=95,
+            message="正在压入自动转录字幕",
+            step_current=6,
+        )
+        logger.info(f"\n\n## 8. 压入自动转录字幕 -> {output_video_path}")
+        _merge_auto_transcribed_subtitles(
+            source_video_path=merge_output_video_path,
+            output_video_path=output_video_path,
+            subtitle_path=auto_subtitle_path,
+            params=params,
+        )
+
    final_video_paths.append(output_video_path)
    combined_video_paths.append(combined_video_path)

@ -473,7 +950,16 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
        "videos": final_video_paths,
        "combined_videos": combined_video_paths
    }
-    sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs)
+    if auto_subtitle_path:
+        kwargs["subtitles"] = [auto_subtitle_path]
+    _update_video_generation_task(
+        task_id,
+        progress=100,
+        message="视频生成完成",
+        step_current=VIDEO_GENERATION_TOTAL_STEPS,
+        state=const.TASK_STATE_COMPLETE,
+        **kwargs
+    )
    return kwargs


--- a/app/services/tavily_search.py
+++ b/app/services/tavily_search.py
@ -0,0 +1,137 @@
+"""Tavily-powered web search helpers for plot analysis."""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import requests
+from loguru import logger
+
+
+TAVILY_API_BASE_URL = "https://api.tavily.com"
+DEFAULT_SEARCH_DEPTH = "basic"
+DEFAULT_MAX_RESULTS = 5
+DEFAULT_TIMEOUT = 20
+
+
+class TavilySearchError(RuntimeError):
+    """Raised when Tavily search cannot be completed."""
+
+
+def _trim_text(value: Any, max_chars: int) -> str:
+    text = str(value or "").strip()
+    if len(text) <= max_chars:
+        return text
+    return f"{text[:max_chars].rstrip()}..."
+
+
+def search_short_drama(
+    short_name: str,
+    api_key: str | None = None,
+    *,
+    search_depth: str = DEFAULT_SEARCH_DEPTH,
+    max_results: int = DEFAULT_MAX_RESULTS,
+    timeout: int = DEFAULT_TIMEOUT,
+) -> dict[str, Any]:
+    """Search web context for a short drama name with Tavily."""
+    return search_story_context(
+        short_name,
+        api_key,
+        search_keywords="短剧 剧情 介绍 人物 结局",
+        empty_name_message="短剧名称不能为空",
+        search_depth=search_depth,
+        max_results=max_results,
+        timeout=timeout,
+    )
+
+
+def search_story_context(
+    title: str,
+    api_key: str | None = None,
+    *,
+    search_keywords: str = "剧情 介绍 人物 结局",
+    empty_name_message: str = "作品名称不能为空",
+    search_depth: str = DEFAULT_SEARCH_DEPTH,
+    max_results: int = DEFAULT_MAX_RESULTS,
+    timeout: int = DEFAULT_TIMEOUT,
+) -> dict[str, Any]:
+    """Search web context for a story title with Tavily."""
+    title = str(title or "").strip()
+    if not title:
+        raise TavilySearchError(empty_name_message)
+
+    api_key = (api_key or os.getenv("TAVILY_API_KEY") or "").strip()
+    if not api_key:
+        raise TavilySearchError("Tavily API Key 未配置")
+
+    query = f"{title} {search_keywords}".strip()
+    payload = {
+        "query": query,
+        "search_depth": search_depth or DEFAULT_SEARCH_DEPTH,
+        "topic": "general",
+        "max_results": max(1, min(int(max_results or DEFAULT_MAX_RESULTS), 10)),
+        "include_answer": True,
+        "include_raw_content": False,
+        "include_images": False,
+    }
+
+    try:
+        response = requests.post(
+            f"{TAVILY_API_BASE_URL}/search",
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            },
+            json=payload,
+            timeout=timeout,
+        )
+    except requests.RequestException as exc:
+        raise TavilySearchError(f"Tavily 请求失败: {exc}") from exc
+
+    if response.status_code >= 400:
+        message = _trim_text(response.text, 500)
+        raise TavilySearchError(f"Tavily 请求失败: HTTP {response.status_code} {message}")
+
+    try:
+        data = response.json()
+    except ValueError as exc:
+        raise TavilySearchError("Tavily 返回内容不是有效 JSON") from exc
+
+    logger.info(
+        "Tavily 剧情检索完成: query={}, results={}",
+        query,
+        len(data.get("results") or []),
+    )
+    return data
+
+def format_search_context(search_data: dict[str, Any], *, max_chars: int = 6000) -> str:
+    """Format Tavily response into compact LLM context."""
+    if not search_data:
+        return ""
+
+    lines = [
+        "# Tavily 联网检索结果",
+        f"检索 query: {search_data.get('query', '')}",
+    ]
+
+    answer = _trim_text(search_data.get("answer"), 1200)
+    if answer:
+        lines.extend(["", "## 综合回答", answer])
+
+    results = search_data.get("results") or []
+    if results:
+        lines.extend(["", "## 搜索来源"])
+    for index, result in enumerate(results, start=1):
+        title = _trim_text(result.get("title"), 120)
+        url = _trim_text(result.get("url"), 240)
+        content = _trim_text(result.get("content") or result.get("raw_content"), 700)
+        lines.extend(
+            [
+                f"{index}. 标题: {title}",
+                f"   来源: {url}",
+                f"   摘要: {content}",
+            ]
+        )
+
+    return _trim_text("\n".join(lines).strip(), max_chars)
--- a/app/services/test_fun_asr_subtitle_unittest.py
+++ b/app/services/test_fun_asr_subtitle_unittest.py
@ -12,9 +12,11 @@ from app.services import fun_asr_subtitle as fasr


 class FakeResponse:
-    def __init__(self, payload=None, status_code=200):
+    def __init__(self, payload=None, status_code=200, text=None):
        self.payload = payload or {}
        self.status_code = status_code
+        self.text = text
+        self.content = text.encode("utf-8") if isinstance(text, str) else b""

    def json(self):
        return self.payload
@ -375,6 +377,195 @@ class FunAsrServiceTests(unittest.TestCase):
            fasr.download_transcription_result("https://result.example/bad.json", session=MalformedDownloadSession({}))


+class LocalFunAsrServiceTests(unittest.TestCase):
+    def test_request_local_fun_asr_posts_file_and_options(self):
+        class LocalSession:
+            def __init__(self):
+                self.calls = []
+
+            def post(self, url, **kwargs):
+                self.calls.append(("POST", url, kwargs))
+                return FakeResponse({"text": "你好", "srt_file": "/tmp/out.srt"})
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            local_file = Path(tmp_dir) / "audio.wav"
+            local_file.write_bytes(b"audio")
+            session = LocalSession()
+
+            result = fasr.request_local_fun_asr(
+                str(local_file),
+                api_url="127.0.0.1:7860",
+                hotword="NarratoAI",
+                enable_spk=True,
+                timeout=123,
+                session=session,
+            )
+
+        self.assertEqual("你好", result["text"])
+        self.assertEqual("POST", session.calls[0][0])
+        self.assertEqual("http://127.0.0.1:7860/asr", session.calls[0][1])
+        self.assertEqual({"hotword": "NarratoAI", "enable_spk": "true"}, session.calls[0][2]["data"])
+        self.assertEqual(123, session.calls[0][2]["timeout"])
+        self.assertIn("file", session.calls[0][2]["files"])
+
+    def test_create_with_local_fun_asr_copies_pack_srt_file(self):
+        class LocalSession:
+            def __init__(self, srt_file):
+                self.srt_file = srt_file
+                self.calls = []
+
+            def post(self, url, **kwargs):
+                self.calls.append(("POST", url, kwargs))
+                return FakeResponse({"text": "你好", "srt_file": str(self.srt_file)})
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            local_file = Path(tmp_dir) / "audio.wav"
+            local_file.write_bytes(b"audio")
+            pack_srt = Path(tmp_dir) / "pack.srt"
+            pack_srt.write_text("1\n00:00:00,000 --> 00:00:01,000\n你好\n", encoding="utf-8")
+            subtitle_file = Path(tmp_dir) / "out.srt"
+
+            result_path = fasr.create_with_local_fun_asr(
+                str(local_file),
+                subtitle_file=str(subtitle_file),
+                api_url="http://127.0.0.1:7860",
+                session=LocalSession(pack_srt),
+            )
+
+            self.assertEqual(str(subtitle_file), result_path)
+            self.assertEqual(pack_srt.read_text(encoding="utf-8"), subtitle_file.read_text(encoding="utf-8"))
+
+    def test_create_with_local_fun_asr_downloads_relative_srt(self):
+        class LocalSession:
+            def __init__(self):
+                self.calls = []
+
+            def post(self, url, **kwargs):
+                self.calls.append(("POST", url, kwargs))
+                return FakeResponse({"text": "你好", "downloads": {"srt": "/download/result.srt"}})
+
+            def get(self, url, **kwargs):
+                self.calls.append(("GET", url, kwargs))
+                return FakeResponse(text="1\n00:00:00,000 --> 00:00:01,000\n你好\n")
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            local_file = Path(tmp_dir) / "audio.wav"
+            local_file.write_bytes(b"audio")
+            subtitle_file = Path(tmp_dir) / "out.srt"
+            session = LocalSession()
+
+            result_path = fasr.create_with_local_fun_asr(
+                str(local_file),
+                subtitle_file=str(subtitle_file),
+                api_url="http://127.0.0.1:7860/asr",
+                session=session,
+            )
+
+            self.assertEqual(str(subtitle_file), result_path)
+            self.assertEqual("http://127.0.0.1:7860/download/result.srt", session.calls[1][1])
+            self.assertIn("你好", subtitle_file.read_text(encoding="utf-8"))
+
+    def test_local_fun_asr_result_to_srt_uses_raw_timestamps(self):
+        result = {
+            "raw": [
+                {
+                    "text": "你好，世界。",
+                    "timestamp": [[0, 300], [300, 600], [600, 900], [900, 1200]],
+                }
+            ]
+        }
+
+        srt = fasr.local_fun_asr_result_to_srt(result, max_chars=20)
+
+        self.assertIn("00:00:00,000 --> 00:00:00,600\n你好，", srt)
+        self.assertIn("世界。", srt)
+
+
+class LocalFireRedAsrServiceTests(unittest.TestCase):
+    def test_request_local_firered_asr_posts_file_and_options(self):
+        class LocalSession:
+            def __init__(self):
+                self.calls = []
+
+            def post(self, url, **kwargs):
+                self.calls.append(("POST", url, kwargs))
+                return FakeResponse({"text": "你好", "srt_url": "/outputs/out.srt"})
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            local_file = Path(tmp_dir) / "audio.wav"
+            local_file.write_bytes(b"audio")
+            session = LocalSession()
+
+            result = fasr.request_local_firered_asr(
+                str(local_file),
+                api_url="127.0.0.1:7867",
+                enable_vad=True,
+                enable_lid=False,
+                enable_punc=True,
+                return_timestamp=True,
+                timeout=456,
+                session=session,
+            )
+
+        self.assertEqual("你好", result["text"])
+        self.assertEqual("POST", session.calls[0][0])
+        self.assertEqual("http://127.0.0.1:7867/asr", session.calls[0][1])
+        self.assertEqual(
+            {
+                "enable_vad": "true",
+                "enable_lid": "false",
+                "enable_punc": "true",
+                "return_timestamp": "true",
+            },
+            session.calls[0][2]["data"],
+        )
+        self.assertEqual(456, session.calls[0][2]["timeout"])
+        self.assertIn("file", session.calls[0][2]["files"])
+
+    def test_create_with_local_firered_asr_downloads_srt_url(self):
+        class LocalSession:
+            def __init__(self):
+                self.calls = []
+
+            def post(self, url, **kwargs):
+                self.calls.append(("POST", url, kwargs))
+                return FakeResponse({"text": "你好", "srt_url": "/outputs/result.srt"})
+
+            def get(self, url, **kwargs):
+                self.calls.append(("GET", url, kwargs))
+                return FakeResponse(text="1\n00:00:00,000 --> 00:00:01,000\n你好\n")
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            local_file = Path(tmp_dir) / "audio.wav"
+            local_file.write_bytes(b"audio")
+            subtitle_file = Path(tmp_dir) / "out.srt"
+            session = LocalSession()
+
+            result_path = fasr.create_with_local_firered_asr(
+                str(local_file),
+                subtitle_file=str(subtitle_file),
+                api_url="http://127.0.0.1:7867",
+                session=session,
+            )
+
+            self.assertEqual(str(subtitle_file), result_path)
+            self.assertEqual("http://127.0.0.1:7867/outputs/result.srt", session.calls[1][1])
+            self.assertIn("你好", subtitle_file.read_text(encoding="utf-8"))
+
+    def test_firered_asr_result_to_srt_uses_sentence_timestamps(self):
+        result = {
+            "sentences": [
+                {"text": "你好。", "start_ms": 40, "end_ms": 900},
+                {"text": "欢迎观看。", "start_ms": 900, "end_ms": 2100},
+            ]
+        }
+
+        srt = fasr.firered_asr_result_to_srt(result)
+
+        self.assertIn("1\n00:00:00,040 --> 00:00:00,900\n你好。", srt)
+        self.assertIn("2\n00:00:00,900 --> 00:00:02,100\n欢迎观看。", srt)
+
+
 class FunAsrConfigTests(unittest.TestCase):
    def test_save_config_persists_fun_asr_section(self):
        original_config_file = cfg.config_file
@ -395,6 +586,9 @@ class FunAsrConfigTests(unittest.TestCase):

    def test_config_example_fun_asr_section_parses(self):
        config_data = tomllib.loads(Path("config.example.toml").read_text(encoding="utf-8"))
+        self.assertEqual("local", config_data["fun_asr"]["backend"])
+        self.assertEqual("http://127.0.0.1:7860", config_data["fun_asr"]["api_url"])
+        self.assertEqual("http://127.0.0.1:7867", config_data["fun_asr"]["firered_api_url"])
        self.assertEqual("fun-asr", config_data["fun_asr"]["model"])
        self.assertIn("api_key", config_data["fun_asr"])

--- a/app/services/test_jianying_task_unittest.py
+++ b/app/services/test_jianying_task_unittest.py
@ -0,0 +1,426 @@
+import json
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from app.models.schema import VideoClipParams
+from app.services import jianying_draft_builder, jianying_task
+
+
+DraftPathPlaceholder = "##_draftpath_placeholder_0E685133-18CE-45ED-8CB8-2904A212EC80_##"
+
+
+class JianyingTaskTests(unittest.TestCase):
+    def test_normalize_indextts_uses_valid_param_reference(self):
+        with tempfile.NamedTemporaryFile(suffix=".wav") as ref:
+            params = VideoClipParams(tts_engine="indextts", voice_name=ref.name)
+
+            jianying_task._normalize_indextts_reference_audio(params)
+
+            self.assertEqual(f"indextts:{ref.name}", params.voice_name)
+
+    def test_normalize_indextts_uses_config_reference_when_param_is_stale(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            ref_path = Path(temp_dir) / "reference.wav"
+            ref_path.write_bytes(b"fake wav")
+            params = VideoClipParams(tts_engine="indextts", voice_name="zh-CN-YunjianNeural")
+
+            with patch.dict(jianying_task.config.indextts, {"reference_audio": str(ref_path)}, clear=False):
+                jianying_task._normalize_indextts_reference_audio(params)
+
+            self.assertEqual(f"indextts:{ref_path}", params.voice_name)
+
+    def test_normalize_indextts2_uses_valid_param_reference(self):
+        with tempfile.NamedTemporaryFile(suffix=".wav") as ref:
+            params = VideoClipParams(tts_engine="indextts2", voice_name=f"indextts2:{ref.name}")
+
+            jianying_task._normalize_indextts_reference_audio(params)
+
+            self.assertEqual("indextts2", params.tts_engine)
+            self.assertEqual(f"indextts2:{ref.name}", params.voice_name)
+
+    def test_normalize_indextts2_uses_config_reference_when_param_is_stale(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            ref_path = Path(temp_dir) / "reference.wav"
+            ref_path.write_bytes(b"fake wav")
+            params = VideoClipParams(tts_engine="indextts2", voice_name="zh-CN-YunjianNeural")
+
+            with patch.dict(jianying_task.config.indextts2, {"reference_audio": str(ref_path)}, clear=False):
+                jianying_task._normalize_indextts_reference_audio(params)
+
+            self.assertEqual(f"indextts2:{ref_path}", params.voice_name)
+
+    def test_normalize_omnivoice_clone_uses_valid_param_reference(self):
+        with tempfile.NamedTemporaryFile(suffix=".wav") as ref:
+            params = VideoClipParams(tts_engine="omnivoice", voice_name=f"omnivoice:{ref.name}")
+
+            with patch.dict(jianying_task.config.omnivoice, {"mode": "voice_clone"}, clear=False):
+                jianying_task._normalize_indextts_reference_audio(params)
+
+            self.assertEqual(f"omnivoice:{ref.name}", params.voice_name)
+
+    def test_normalize_omnivoice_auto_does_not_require_reference(self):
+        params = VideoClipParams(tts_engine="omnivoice", voice_name="omnivoice:auto")
+
+        with patch.dict(jianying_task.config.omnivoice, {"mode": "auto", "reference_audio": ""}, clear=False):
+            jianying_task._normalize_indextts_reference_audio(params)
+
+        self.assertEqual("omnivoice:auto", params.voice_name)
+
+    def test_normalize_indextts_requires_existing_reference_audio(self):
+        params = VideoClipParams(tts_engine="indextts", voice_name="zh-CN-YunjianNeural")
+
+        with patch.dict(jianying_task.config.indextts, {"reference_audio": ""}, clear=False):
+            with self.assertRaisesRegex(ValueError, "IndexTTS-1.5 参考音频不存在"):
+                jianying_task._normalize_indextts_reference_audio(params)
+
+    def test_floor_duration_to_milliseconds(self):
+        self.assertAlmostEqual(6.997, jianying_task._floor_duration_to_milliseconds(6.997333))
+        self.assertAlmostEqual(7.0, jianying_task._floor_duration_to_milliseconds(7.000999))
+
+    def test_clamp_duration_to_media_uses_actual_media_duration(self):
+        duration_cache = {}
+
+        with patch.object(jianying_task, "get_media_duration_ffprobe", return_value=4.2809):
+            duration = jianying_task._clamp_duration_to_media(
+                requested_duration=4.31,
+                media_file="/tmp/clip.mp4",
+                duration_cache=duration_cache,
+                media_label="视频素材",
+            )
+
+        self.assertAlmostEqual(4.28, duration)
+
+    def test_clamp_duration_to_media_respects_source_start_time(self):
+        duration_cache = {}
+
+        with patch.object(jianying_task, "get_media_duration_ffprobe", return_value=10.0):
+            duration = jianying_task._clamp_duration_to_media(
+                requested_duration=4.0,
+                media_file="/tmp/original.mp4",
+                duration_cache=duration_cache,
+                media_label="原始视频素材",
+                source_start_time=8.5,
+            )
+
+        self.assertAlmostEqual(1.5, duration)
+
+    def test_format_seconds_for_trange_uses_millisecond_precision(self):
+        self.assertEqual("4.280s", jianying_task._format_seconds_for_trange(4.28))
+
+    def test_write_plaintext_jianying_draft_creates_root_package(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir) / "drafts"
+            output_dir = Path(temp_dir) / "task"
+            root_path.mkdir()
+            output_dir.mkdir()
+            video_path = output_dir / "clip:01.mp4"
+            audio_path = output_dir / "audio_00_00_00,000-00_00_04,310.mp3"
+            video_path.write_bytes(b"fake video")
+            audio_path.write_bytes(b"fake audio")
+
+            params = VideoClipParams(
+                video_origin_path=str(video_path),
+                original_volume=0.4,
+                tts_volume=0.9,
+            )
+            script = [
+                {
+                    "OST": 0,
+                    "start_time": 0.0,
+                    "duration": 4.31,
+                    "timestamp": "00:00:00,000-00:00:04,310",
+                    "video": str(video_path),
+                    "audio": str(audio_path),
+                }
+            ]
+
+            def fake_duration(file_path):
+                return 4.2809 if file_path == str(video_path) else 5.0
+
+            with (
+                patch.object(jianying_draft_builder, "_get_media_duration_ffprobe", side_effect=fake_duration),
+                patch.object(
+                    jianying_draft_builder,
+                    "_get_video_metadata_ffprobe",
+                    return_value=(4_280_000, 720, 1280),
+                ),
+            ):
+                draft_path, draft_name = jianying_draft_builder.write_plaintext_jianying_draft(
+                    str(root_path),
+                    "NarratoAI_test",
+                    script,
+                    params,
+                    str(output_dir),
+                )
+
+            draft_dir = Path(draft_path)
+            self.assertEqual("NarratoAI_test", draft_name)
+            self.assertTrue((draft_dir / "draft_info.json").exists())
+            self.assertTrue((draft_dir / "template-2.tmp").exists())
+            self.assertTrue((draft_dir / "template.tmp").exists())
+            self.assertTrue((draft_dir / "draft_cover.jpg").exists())
+            self.assertFalse((draft_dir / "draft_content_legacy.json").exists())
+            self.assertFalse((draft_dir / "Timelines" / "project.json").exists())
+            self.assertTrue((draft_dir / "assets" / "video" / "clip_01.mp4").exists())
+            self.assertTrue((draft_dir / "assets" / "audio" / audio_path.name).exists())
+
+            draft_info = json.loads((draft_dir / "draft_info.json").read_text(encoding="utf-8"))
+            self.assertEqual("169.0.0", draft_info["new_version"])
+            self.assertEqual("NarratoAI_test", draft_info["name"])
+            self.assertEqual(54, len(draft_info["materials"]))
+            self.assertEqual(
+                f"{DraftPathPlaceholder}/assets/video/clip_01.mp4",
+                draft_info["materials"]["videos"][0]["path"],
+            )
+            self.assertEqual(
+                f"{DraftPathPlaceholder}/assets/audio/{audio_path.name}",
+                draft_info["materials"]["audios"][0]["path"],
+            )
+            self.assertEqual(4_280_000, draft_info["tracks"][0]["segments"][0]["source_timerange"]["duration"])
+            self.assertEqual(4_280_000, draft_info["tracks"][1]["segments"][0]["source_timerange"]["duration"])
+
+            attachment_editing = json.loads((draft_dir / "attachment_editing.json").read_text(encoding="utf-8"))
+            self.assertEqual("1.0.0", attachment_editing["editing_draft"]["version"])
+            self.assertFalse(attachment_editing["editing_draft"]["is_use_audio_separation"])
+
+            empty_template = json.loads((draft_dir / "template.tmp").read_text(encoding="utf-8"))
+            self.assertEqual("75.0.0", empty_template["new_version"])
+            self.assertEqual([], empty_template["tracks"])
+
+            root_meta = json.loads((root_path / "root_meta_info.json").read_text(encoding="utf-8"))
+            self.assertEqual("NarratoAI_test", root_meta["all_draft_store"][0]["draft_name"])
+            self.assertEqual(str(draft_dir / "draft_info.json"), root_meta["all_draft_store"][0]["draft_json_file"])
+
+    def test_write_plaintext_jianying_draft_uses_source_timerange_and_writes_subtitles(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir) / "drafts"
+            output_dir = Path(temp_dir) / "task"
+            root_path.mkdir()
+            output_dir.mkdir()
+            video_path = output_dir / "source.mp4"
+            audio_path = output_dir / "audio_00_00_02,000-00_00_04,000.mp3"
+            subtitle_path = output_dir / "script_subtitles.srt"
+            video_path.write_bytes(b"fake source video")
+            audio_path.write_bytes(b"fake audio")
+            subtitle_path.write_text(
+                "1\n00:00:00,000 --> 00:00:01,500\n测试字幕\n",
+                encoding="utf-8",
+            )
+
+            params = VideoClipParams(
+                video_origin_path=str(video_path),
+                original_volume=0.4,
+                tts_volume=0.9,
+                subtitle_enabled=True,
+                font_size=60,
+                text_fore_color="#FFFFFF",
+            )
+            script = [
+                {
+                    "OST": 0,
+                    "start_time": 2.0,
+                    "source_start_time": 2.0,
+                    "duration": 3.0,
+                    "timestamp": "00:00:02,000-00:00:05,000",
+                    "video": str(video_path),
+                    "audio": str(audio_path),
+                    "use_source_timerange": True,
+                }
+            ]
+
+            def fake_duration(file_path):
+                return 10.0 if file_path == str(video_path) else 3.0
+
+            with (
+                patch.object(jianying_draft_builder, "_get_media_duration_ffprobe", side_effect=fake_duration),
+                patch.object(
+                    jianying_draft_builder,
+                    "_get_video_metadata_ffprobe",
+                    return_value=(10_000_000, 1920, 1080),
+                ),
+            ):
+                draft_path, _ = jianying_draft_builder.write_plaintext_jianying_draft(
+                    str(root_path),
+                    "NarratoAI_source",
+                    script,
+                    params,
+                    str(output_dir),
+                    subtitle_path=str(subtitle_path),
+                )
+
+            draft_info = json.loads((Path(draft_path) / "draft_info.json").read_text(encoding="utf-8"))
+            self.assertEqual(1, len(draft_info["materials"]["videos"]))
+            self.assertEqual(1, len(draft_info["materials"]["texts"]))
+            self.assertIn("测试字幕", draft_info["materials"]["texts"][0]["content"])
+
+            video_segment = draft_info["tracks"][0]["segments"][0]
+            self.assertEqual(2_000_000, video_segment["source_timerange"]["start"])
+            self.assertEqual(3_000_000, video_segment["source_timerange"]["duration"])
+            self.assertEqual(0.0, video_segment["volume"])
+
+            text_tracks = [track for track in draft_info["tracks"] if track["type"] == "text"]
+            self.assertEqual(1, len(text_tracks))
+            self.assertEqual(1, len(text_tracks[0]["segments"]))
+            self.assertEqual(1_500_000, text_tracks[0]["segments"][0]["target_timerange"]["duration"])
+
+    def test_build_jianying_draft_script_references_original_video(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            video_one = Path(temp_dir) / "one.mp4"
+            video_two = Path(temp_dir) / "two.mp4"
+            audio_path = Path(temp_dir) / "audio.mp3"
+            video_one.write_bytes(b"one")
+            video_two.write_bytes(b"two")
+            audio_path.write_bytes(b"audio")
+
+            params = VideoClipParams(
+                video_origin_path=str(video_one),
+                video_origin_paths=[str(video_one), str(video_two)],
+            )
+            script = [
+                {
+                    "_id": 9,
+                    "video_id": 2,
+                    "timestamp": "00:00:05,000-00:00:07,000",
+                    "narration": "解说",
+                    "OST": 0,
+                }
+            ]
+            tts_results = [
+                {
+                    "_id": 9,
+                    "timestamp": "00:00:05,000-00:00:07,000",
+                    "audio_file": str(audio_path),
+                    "subtitle_file": "",
+                    "duration": 1.25,
+                }
+            ]
+
+            draft_script = jianying_task._build_jianying_draft_script(script, params, tts_results)
+
+            self.assertEqual(str(video_two), draft_script[0]["video"])
+            self.assertEqual(str(audio_path), draft_script[0]["audio"])
+            self.assertEqual(5.0, draft_script[0]["source_start_time"])
+            self.assertEqual(1.25, draft_script[0]["duration"])
+            self.assertTrue(draft_script[0]["use_source_timerange"])
+
+    def test_get_original_subtitle_paths_falls_back_to_matching_video_name(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+            video_path = temp_path / "episode_20260608010240.mp4"
+            older_subtitle = temp_path / "episode_fun_asr_20260608000100.srt"
+            newer_subtitle = temp_path / "episode_fun_asr_20260608010100.srt"
+            video_path.write_bytes(b"video")
+            older_subtitle.write_text("old", encoding="utf-8")
+            newer_subtitle.write_text("new", encoding="utf-8")
+
+            params = VideoClipParams(video_origin_path=str(video_path))
+
+            with patch.object(jianying_task.utils, "subtitle_dir", return_value=str(temp_path)):
+                subtitle_paths = jianying_task._get_original_subtitle_paths(params)
+
+            self.assertEqual([str(newer_subtitle)], subtitle_paths)
+
+    def test_create_jianying_subtitle_file_includes_original_audio_subtitles(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+            task_dir = temp_path / "task"
+            task_dir.mkdir()
+            video_path = temp_path / "episode.mp4"
+            subtitle_path = temp_path / "episode.srt"
+            video_path.write_bytes(b"video")
+            subtitle_path.write_text(
+                "1\n00:00:05,000 --> 00:00:06,500\n原片对白\n",
+                encoding="utf-8",
+            )
+
+            params = VideoClipParams(video_origin_path=str(video_path), subtitle_enabled=True)
+            draft_script = jianying_task._build_jianying_draft_script(
+                [
+                    {
+                        "_id": 1,
+                        "timestamp": "00:00:05,000-00:00:07,000",
+                        "narration": "播放原片1",
+                        "OST": 1,
+                    }
+                ],
+                params,
+                [],
+            )
+
+            with (
+                patch.object(jianying_task.utils, "subtitle_dir", return_value=str(temp_path)),
+                patch.object(jianying_task.utils, "task_dir", return_value=str(task_dir)),
+            ):
+                output_path = jianying_task._create_jianying_subtitle_file(
+                    "task-id",
+                    draft_script,
+                    params,
+                )
+
+            self.assertTrue(output_path)
+            self.assertIn("原片对白", Path(output_path).read_text(encoding="utf-8"))
+
+    def test_start_export_jianying_draft_does_not_clip_video(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            root_path = Path(temp_dir) / "drafts"
+            task_dir = Path(temp_dir) / "task"
+            root_path.mkdir()
+            task_dir.mkdir()
+            video_path = Path(temp_dir) / "source.mp4"
+            audio_path = task_dir / "audio.mp3"
+            script_path = Path(temp_dir) / "script.json"
+            subtitle_path = task_dir / "script_subtitles.srt"
+            video_path.write_bytes(b"video")
+            audio_path.write_bytes(b"audio")
+            script_path.write_text(
+                json.dumps([
+                    {
+                        "_id": 1,
+                        "timestamp": "00:00:01,000-00:00:03,000",
+                        "narration": "测试解说",
+                        "OST": 0,
+                    }
+                ], ensure_ascii=False),
+                encoding="utf-8",
+            )
+
+            params = VideoClipParams(
+                video_clip_json_path=str(script_path),
+                video_origin_path=str(video_path),
+                tts_engine="edge_tts",
+                voice_name="zh-CN-YunjianNeural",
+                subtitle_enabled=True,
+                draft_name="NarratoAI_no_clip",
+            )
+            tts_results = [
+                {
+                    "_id": 1,
+                    "timestamp": "00:00:01,000-00:00:03,000",
+                    "audio_file": str(audio_path),
+                    "subtitle_file": "",
+                    "duration": 1.5,
+                }
+            ]
+
+            with (
+                patch.dict(jianying_task.config.ui, {"jianying_draft_path": str(root_path)}, clear=False),
+                patch.object(jianying_task.utils, "task_dir", return_value=str(task_dir)),
+                patch.object(jianying_task.voice, "tts_multiple", return_value=tts_results),
+                patch.object(jianying_task, "_create_jianying_subtitle_file", return_value=str(subtitle_path)),
+                patch.object(jianying_task, "write_plaintext_jianying_draft", return_value=(str(root_path / "draft"), "NarratoAI_no_clip")) as write_draft,
+                patch.object(jianying_task.clip_video, "clip_video_unified") as clip_video_unified,
+            ):
+                result = jianying_task.start_export_jianying_draft("task-id", params)
+
+            clip_video_unified.assert_not_called()
+            write_kwargs = write_draft.call_args.kwargs
+            self.assertTrue(write_kwargs["new_script_list"][0]["use_source_timerange"])
+            self.assertEqual(str(audio_path), write_kwargs["new_script_list"][0]["audio"])
+            self.assertEqual(str(subtitle_path), write_kwargs["subtitle_path"])
+            self.assertEqual(str(subtitle_path), result["subtitles"][0])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/test_merger_video_concat_unittest.py
+++ b/app/services/test_merger_video_concat_unittest.py
@ -0,0 +1,120 @@
+import subprocess
+import unittest
+from unittest import mock
+
+from app.services import merger_video
+
+
+class MergerVideoConcatTests(unittest.TestCase):
+    def test_can_concat_video_copy_when_signatures_match(self):
+        signature = {
+            "codec_name": "h264",
+            "profile": "High",
+            "width": 1080,
+            "height": 1920,
+            "pix_fmt": "yuv420p",
+            "r_frame_rate": "30/1",
+            "avg_frame_rate": "30/1",
+            "time_base": "1/15360",
+            "sample_aspect_ratio": "1:1",
+        }
+
+        with mock.patch.object(
+            merger_video,
+            "_get_video_stream_signature",
+            side_effect=[signature, dict(signature)],
+        ):
+            self.assertTrue(merger_video._can_concat_video_copy(["1.mp4", "2.mp4"]))
+
+    def test_can_concat_video_copy_rejects_mismatched_signature(self):
+        base_signature = {
+            "codec_name": "h264",
+            "profile": "High",
+            "width": 1080,
+            "height": 1920,
+            "pix_fmt": "yuv420p",
+            "r_frame_rate": "30/1",
+            "avg_frame_rate": "30/1",
+            "time_base": "1/15360",
+            "sample_aspect_ratio": "1:1",
+        }
+        mismatch_signature = dict(base_signature, r_frame_rate="24000/1001")
+
+        with mock.patch.object(
+            merger_video,
+            "_get_video_stream_signature",
+            side_effect=[base_signature, mismatch_signature],
+        ):
+            self.assertFalse(merger_video._can_concat_video_copy(["1.mp4", "2.mp4"]))
+
+    def test_concat_video_streams_prefers_copy_when_compatible(self):
+        completed = subprocess.CompletedProcess(args=["ffmpeg"], returncode=0)
+
+        with (
+            mock.patch.object(merger_video, "_can_concat_video_copy", return_value=True),
+            mock.patch.object(merger_video, "_concat_duration_matches", return_value=True),
+            mock.patch.object(merger_video.subprocess, "run", return_value=completed) as run_mock,
+        ):
+            merger_video._concat_video_streams(
+                ["1.mp4", "2.mp4"],
+                "concat.txt",
+                "video_concat.mp4",
+                threads=4,
+            )
+
+        cmd = run_mock.call_args.args[0]
+        self.assertEqual("copy", cmd[cmd.index("-c:v") + 1])
+        self.assertNotIn("libx264", cmd)
+
+    def test_concat_video_streams_falls_back_when_copy_duration_mismatches(self):
+        completed = subprocess.CompletedProcess(args=["ffmpeg"], returncode=0)
+
+        with (
+            mock.patch.object(merger_video, "_can_concat_video_copy", return_value=True),
+            mock.patch.object(merger_video, "_concat_duration_matches", return_value=False),
+            mock.patch.object(merger_video.os.path, "exists", return_value=False),
+            mock.patch.object(merger_video.subprocess, "run", return_value=completed) as run_mock,
+        ):
+            merger_video._concat_video_streams(
+                ["1.mp4", "2.mp4"],
+                "concat.txt",
+                "video_concat.mp4",
+                threads=6,
+            )
+
+        self.assertEqual(2, run_mock.call_count)
+        fallback_cmd = run_mock.call_args_list[1].args[0]
+        self.assertEqual("libx264", fallback_cmd[fallback_cmd.index("-c:v") + 1])
+        self.assertEqual("6", fallback_cmd[fallback_cmd.index("-threads") + 1])
+
+    def test_concat_video_streams_falls_back_to_reencode_when_copy_fails(self):
+        copy_error = subprocess.CalledProcessError(
+            returncode=1,
+            cmd=["ffmpeg"],
+            stderr=b"copy failed",
+        )
+        completed = subprocess.CompletedProcess(args=["ffmpeg"], returncode=0)
+
+        with (
+            mock.patch.object(merger_video, "_can_concat_video_copy", return_value=True),
+            mock.patch.object(
+                merger_video.subprocess,
+                "run",
+                side_effect=[copy_error, completed],
+            ) as run_mock,
+        ):
+            merger_video._concat_video_streams(
+                ["1.mp4", "2.mp4"],
+                "concat.txt",
+                "video_concat.mp4",
+                threads=8,
+            )
+
+        self.assertEqual(2, run_mock.call_count)
+        fallback_cmd = run_mock.call_args_list[1].args[0]
+        self.assertEqual("libx264", fallback_cmd[fallback_cmd.index("-c:v") + 1])
+        self.assertEqual("8", fallback_cmd[fallback_cmd.index("-threads") + 1])
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/test_multi_video_script_sources_unittest.py
+++ b/app/services/test_multi_video_script_sources_unittest.py
@ -0,0 +1,101 @@
+import json
+import os
+import tempfile
+import unittest
+from unittest import mock
+
+from app.services import clip_video
+from app.utils import check_script
+
+
+class TestMultiVideoScriptSources(unittest.TestCase):
+    def test_clip_command_uses_input_fast_seek(self):
+        encoder_config = clip_video.get_safe_encoder_config(None)
+
+        cmd = clip_video._build_ffmpeg_command_with_audio_control(
+            input_path="/tmp/input.mp4",
+            output_path="/tmp/output.mp4",
+            start_time="00:27:32.240",
+            end_time="00:27:38.240",
+            encoder_config=encoder_config,
+            hwaccel_args=[],
+            remove_audio=False,
+        )
+
+        self.assertLess(cmd.index("-ss"), cmd.index("-i"))
+        self.assertEqual("6", cmd[cmd.index("-t") + 1])
+        self.assertNotIn("-to", cmd)
+
+    def test_check_format_accepts_optional_video_source_fields(self):
+        script = [
+            {
+                "_id": 1,
+                "video_id": 2,
+                "video_name": "2.mp4",
+                "timestamp": "00:00:00,000-00:00:03,000",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            }
+        ]
+
+        result = check_script.check_format(json.dumps(script, ensure_ascii=False))
+
+        self.assertTrue(result["success"])
+
+    def test_clip_video_unified_resolves_source_by_video_id_and_name(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            video_1 = os.path.join(temp_dir, "1.mp4")
+            video_2 = os.path.join(temp_dir, "2.mp4")
+            for video_path in [video_1, video_2]:
+                with open(video_path, "wb") as file:
+                    file.write(b"video")
+
+            output_dir = os.path.join(temp_dir, "clips")
+            used_sources = []
+
+            def fake_process(source_video_path, script_item, output_dir_arg, *_args):
+                used_sources.append(source_video_path)
+                output_path = os.path.join(output_dir_arg, f"{script_item['_id']}.mp4")
+                with open(output_path, "wb") as file:
+                    file.write(b"clip")
+                return output_path
+
+            script_list = [
+                {
+                    "_id": 1,
+                    "video_id": 2,
+                    "timestamp": "00:00:00,000-00:00:03,000",
+                    "picture": "视频2画面",
+                    "narration": "播放原片1",
+                    "OST": 1,
+                },
+                {
+                    "_id": 2,
+                    "video_name": "1.mp4",
+                    "timestamp": "00:00:03,000-00:00:06,000",
+                    "picture": "视频1画面",
+                    "narration": "播放原片2",
+                    "OST": 1,
+                },
+            ]
+
+            with (
+                mock.patch.object(clip_video, "check_hardware_acceleration", return_value=None),
+                mock.patch.object(clip_video, "_process_original_audio_segment", side_effect=fake_process),
+            ):
+                result = clip_video.clip_video_unified(
+                    video_origin_path=video_1,
+                    video_origin_paths=[video_1, video_2],
+                    script_list=script_list,
+                    tts_results=[],
+                    output_dir=output_dir,
+                    task_id="multi-video-test",
+                )
+
+            self.assertEqual([video_2, video_1], used_sources)
+            self.assertEqual({1, 2}, set(result.keys()))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/test_script_subtitle_unittest.py
+++ b/app/services/test_script_subtitle_unittest.py
@ -0,0 +1,192 @@
+import tempfile
+import unittest
+from pathlib import Path
+
+from app.services import script_subtitle
+
+
+class ScriptSubtitleTests(unittest.TestCase):
+    def test_split_narration_prefers_punctuation_boundaries(self):
+        chunks = script_subtitle.split_narration(
+            "她终于意识到，这场婚姻不是爱情，而是一场交易。",
+            max_chars=12,
+        )
+
+        self.assertEqual(
+            ["她终于意识到", "这场婚姻不是爱情", "而是一场交易"],
+            chunks,
+        )
+
+    def test_time_range_parsing_supports_milliseconds(self):
+        start, end = script_subtitle.parse_time_range("00:00:01,500-00:00:03,250")
+
+        self.assertAlmostEqual(1.5, start)
+        self.assertAlmostEqual(3.25, end)
+
+    def test_create_script_subtitle_file_skips_original_audio_segments(self):
+        list_script = [
+            {
+                "_id": 1,
+                "OST": 0,
+                "narration": "第一句解说。第二句解说。",
+                "editedTimeRange": "00:00:00-00:00:04",
+                "duration": 4,
+            },
+            {
+                "_id": 2,
+                "OST": 1,
+                "narration": "这句是原声，不应该默认生成。",
+                "editedTimeRange": "00:00:04-00:00:08",
+                "duration": 4,
+            },
+            {
+                "_id": 3,
+                "OST": 2,
+                "narration": "混合片段也保留解说字幕。",
+                "editedTimeRange": "00:00:08-00:00:12",
+                "duration": 4,
+            },
+        ]
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_file = Path(temp_dir) / "script_subtitles.srt"
+            result = script_subtitle.create_script_subtitle_file(
+                task_id="test",
+                list_script=list_script,
+                output_file=str(output_file),
+                max_chars=16,
+            )
+
+            self.assertEqual(str(output_file), result)
+            content = output_file.read_text(encoding="utf-8")
+
+        self.assertIn("00:00:00,000 -->", content)
+        self.assertIn("第一句解说", content)
+        self.assertIn("混合片段也保留解说字幕", content)
+        self.assertNotIn("这句是原声", content)
+        self.assertNotIn("。", content)
+        self.assertNotIn("，", content)
+
+    def test_create_script_subtitle_file_uses_duration_when_edited_range_missing(self):
+        list_script = [
+            {
+                "_id": 1,
+                "OST": 0,
+                "narration": "没有 editedTimeRange 时使用 duration。",
+                "duration": 3,
+            }
+        ]
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_file = Path(temp_dir) / "script_subtitles.srt"
+            script_subtitle.create_script_subtitle_file(
+                task_id="test",
+                list_script=list_script,
+                output_file=str(output_file),
+            )
+            content = output_file.read_text(encoding="utf-8")
+
+        self.assertIn("00:00:00,000 -->", content)
+        self.assertIn("--> 00:00:03,000", content)
+
+    def test_create_script_subtitle_file_includes_original_audio_subtitles(self):
+        list_script = [
+            {
+                "_id": 1,
+                "OST": 0,
+                "narration": "前情解说。",
+                "editedTimeRange": "00:00:00-00:00:02",
+                "duration": 2,
+            },
+            {
+                "_id": 2,
+                "video_id": 1,
+                "video_name": "source.mp4",
+                "OST": 1,
+                "narration": "播放原片2",
+                "timestamp": "00:00:10,000-00:00:14,000",
+                "sourceTimeRange": "00:00:10,000-00:00:14,000",
+                "editedTimeRange": "00:00:02-00:00:06",
+                "duration": 4,
+            },
+        ]
+        original_srt = """1
+00:00:09,000 --> 00:00:11,000
+开头会被裁掉一秒。
+
+2
+00:00:11,500 --> 00:00:13,000
+这句原声对白应该出现！
+
+3
+00:00:13,500 --> 00:00:15,000
+结尾只保留半秒。
+"""
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            subtitle_file = Path(temp_dir) / "source.srt"
+            subtitle_file.write_text(original_srt, encoding="utf-8")
+            output_file = Path(temp_dir) / "script_subtitles.srt"
+            script_subtitle.create_script_subtitle_file(
+                task_id="test",
+                list_script=list_script,
+                output_file=str(output_file),
+                original_subtitle_paths=[str(subtitle_file)],
+                video_origin_paths=["source.mp4"],
+                max_chars=16,
+            )
+            content = output_file.read_text(encoding="utf-8")
+
+        self.assertIn("前情解说", content)
+        self.assertIn("开头会被裁掉一秒", content)
+        self.assertIn("这句原声对白应该出现", content)
+        self.assertIn("结尾只保留半秒", content)
+        self.assertIn("00:00:02,000 --> 00:00:03,000", content)
+        self.assertIn("00:00:03,500 --> 00:00:05,000", content)
+        self.assertIn("00:00:05,500 --> 00:00:06,000", content)
+        self.assertNotIn("播放原片2", content)
+
+    def test_create_script_subtitle_file_uses_matching_video_id_for_original_subtitles(self):
+        list_script = [
+            {
+                "_id": 1,
+                "video_id": 2,
+                "video_name": "second.mp4",
+                "OST": 1,
+                "narration": "播放原片1",
+                "timestamp": "00:00:01,000-00:00:03,000",
+                "sourceTimeRange": "00:00:01,000-00:00:03,000",
+                "editedTimeRange": "00:00:00-00:00:02",
+                "duration": 2,
+            },
+        ]
+        first_srt = """1
+00:00:01,000 --> 00:00:03,000
+第一个视频的字幕不应该出现。
+"""
+        second_srt = """1
+00:00:01,000 --> 00:00:03,000
+第二个视频的字幕应该出现。
+"""
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            first_file = Path(temp_dir) / "first.srt"
+            second_file = Path(temp_dir) / "second.srt"
+            output_file = Path(temp_dir) / "script_subtitles.srt"
+            first_file.write_text(first_srt, encoding="utf-8")
+            second_file.write_text(second_srt, encoding="utf-8")
+            script_subtitle.create_script_subtitle_file(
+                task_id="test",
+                list_script=list_script,
+                output_file=str(output_file),
+                original_subtitle_paths=[str(first_file), str(second_file)],
+                video_origin_paths=["first.mp4", "second.mp4"],
+            )
+            content = output_file.read_text(encoding="utf-8")
+
+        self.assertIn("第二个视频的字幕应该出现", content)
+        self.assertNotIn("第一个视频的字幕不应该出现", content)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/test_short_drama_narration_validation_unittest.py
+++ b/app/services/test_short_drama_narration_validation_unittest.py
@ -0,0 +1,290 @@
+import unittest
+
+from app.services.short_drama_narration_validation import (
+    build_subtitle_index,
+    normalize_script_video_sources,
+    validate_narration_script_items,
+)
+
+
+SUBTITLE_CONTENT = """# 视频 1: first.mp4
+字幕文件: first.srt
+1
+00:00:01,000 --> 00:00:04,000
+女主被众人误会。
+
+2
+00:00:04,000 --> 00:00:08,000
+男主冷眼看着她。
+
+# 视频 2: second.mp4
+字幕文件: second.srt
+1
+00:00:02,000 --> 00:00:05,000
+女主终于拿出证据。
+
+2
+00:00:05,000 --> 00:00:09,000
+众人震惊，反派慌了。
+"""
+
+
+class ShortDramaNarrationValidationTests(unittest.TestCase):
+    def setUp(self):
+        self.video_paths = ["/tmp/first.mp4", "/tmp/second.mp4"]
+        self.subtitle_index = build_subtitle_index(SUBTITLE_CONTENT, self.video_paths)
+
+    def test_build_subtitle_index_preserves_multi_video_sources(self):
+        self.assertEqual(4, len(self.subtitle_index))
+        self.assertEqual({1, 2}, {cue.video_id for cue in self.subtitle_index})
+        self.assertEqual("first.mp4", self.subtitle_index[0].video_name)
+        self.assertEqual("second.mp4", self.subtitle_index[2].video_name)
+        self.assertEqual("00:00:02,000-00:00:05,000", self.subtitle_index[2].timestamp)
+
+    def test_valid_script_passes_and_normalizes_video_name(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 1,
+                "video_name": "wrong-name.mp4",
+                "timestamp": "00:00:01,000-00:00:04,000",
+                "picture": "女主被误会",
+                "narration": "她被当众误会。",
+                "OST": 0,
+            },
+            {
+                "_id": 2,
+                "video_name": "second.mp4",
+                "timestamp": "00:00:02,000-00:00:05,000",
+                "picture": "女主拿出证据",
+                "narration": "播放原片2",
+                "OST": 1,
+            },
+        ]
+
+        normalized = normalize_script_video_sources(items, self.video_paths)
+        result = validate_narration_script_items(normalized, self.subtitle_index, self.video_paths)
+
+        self.assertTrue(result.valid, result.errors)
+        self.assertEqual(2, result.items[1]["video_id"])
+        self.assertEqual("second.mp4", result.items[1]["video_name"])
+
+    def test_invalid_timestamp_and_overlap_fail(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:01,000-00:00:05,000",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            },
+            {
+                "_id": 2,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:04,500-00:00:08,000",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            },
+            {
+                "_id": 3,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "bad",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            },
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertFalse(result.valid)
+        self.assertTrue(any("重叠" in error for error in result.errors))
+        self.assertTrue(any("时间戳格式" in error for error in result.errors))
+
+    def test_invalid_video_id_does_not_default_to_first_video(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 99,
+                "video_name": "missing.mp4",
+                "timestamp": "00:00:01,000-00:00:04,000",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            }
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertFalse(result.valid)
+        self.assertTrue(any("video_id=99" in error for error in result.errors))
+
+    def test_out_of_range_timestamp_fails(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 2,
+                "video_name": "second.mp4",
+                "timestamp": "00:00:20,000-00:00:25,000",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            }
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertFalse(result.valid)
+        self.assertTrue(any("不在视频 2 的字幕范围内" in error for error in result.errors))
+
+    def test_three_consecutive_original_audio_segments_fail(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:01,000-00:00:04,000",
+                "picture": "女主被误会",
+                "narration": "她被当众误会。",
+                "OST": 0,
+            },
+            {
+                "_id": 2,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:04,000-00:00:05,000",
+                "picture": "男主看着她",
+                "narration": "播放原片2",
+                "OST": 1,
+            },
+            {
+                "_id": 3,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:05,000-00:00:06,000",
+                "picture": "男主看着她",
+                "narration": "播放原片3",
+                "OST": 1,
+            },
+            {
+                "_id": 4,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:06,000-00:00:08,000",
+                "picture": "男主继续观察",
+                "narration": "播放原片4",
+                "OST": 1,
+            },
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertFalse(result.valid)
+        self.assertTrue(any("连续原声过多" in error for error in result.errors))
+
+    def test_cross_video_original_audio_requires_narration_bridge(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:01,000-00:00:04,000",
+                "picture": "女主被误会",
+                "narration": "她被当众误会。",
+                "OST": 0,
+            },
+            {
+                "_id": 2,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:04,000-00:00:08,000",
+                "picture": "男主看着她",
+                "narration": "播放原片2",
+                "OST": 1,
+            },
+            {
+                "_id": 3,
+                "video_id": 2,
+                "video_name": "second.mp4",
+                "timestamp": "00:00:02,000-00:00:05,000",
+                "picture": "女主拿出证据",
+                "narration": "播放原片3",
+                "OST": 1,
+            },
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertFalse(result.valid)
+        self.assertTrue(any("跨视频切换缺少 OST=0 解说桥段" in error for error in result.errors))
+
+    def test_cross_video_switch_with_narration_bridge_passes(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:01,000-00:00:04,000",
+                "picture": "女主被误会",
+                "narration": "她被当众误会。",
+                "OST": 0,
+            },
+            {
+                "_id": 2,
+                "video_id": 2,
+                "video_name": "second.mp4",
+                "timestamp": "00:00:02,000-00:00:05,000",
+                "picture": "女主拿出证据",
+                "narration": "播放原片2",
+                "OST": 1,
+            },
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertTrue(result.valid, result.errors)
+
+    def test_first_segment_must_be_narration_hook(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:01,000-00:00:04,000",
+                "picture": "女主被误会",
+                "narration": "播放原片1",
+                "OST": 1,
+            }
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertFalse(result.valid)
+        self.assertTrue(any("解说开场钩子" in error for error in result.errors))
+
+    def test_dense_narration_fails_when_video_duration_is_too_short(self):
+        items = [
+            {
+                "_id": 1,
+                "video_id": 1,
+                "video_name": "first.mp4",
+                "timestamp": "00:00:01,000-00:00:04,000",
+                "picture": "女主被误会",
+                "narration": "她明明什么都没做却被所有人推到风口浪尖只能独自承受委屈",
+                "OST": 0,
+            }
+        ]
+
+        result = validate_narration_script_items(items, self.subtitle_index, self.video_paths)
+
+        self.assertFalse(result.valid)
+        self.assertTrue(any("解说过密" in error for error in result.errors))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/test_subtitle_corrector_unittest.py
+++ b/app/services/test_subtitle_corrector_unittest.py
@ -0,0 +1,100 @@
+import json
+import tempfile
+import unittest
+from pathlib import Path
+from unittest import mock
+
+from app.services import subtitle_corrector as corrector
+
+
+SAMPLE_SRT = """1
+00:00:01,000 --> 00:00:03,000
+今天我们来看张三的顾是
+
+2
+00:00:04,000 --> 00:00:06,000
+他来到北精找李四
+"""
+
+
+class SubtitleCorrectorTests(unittest.TestCase):
+    def test_correct_srt_content_preserves_timecodes_and_rebuilds_text(self):
+        llm_output = {
+            "items": [
+                {"id": 1, "text": "今天我们来看张三的故事"},
+                {"id": 2, "text": "他来到北京找李四"},
+            ]
+        }
+
+        with (
+            mock.patch("app.services.subtitle_corrector._ensure_llm_providers_registered"),
+            mock.patch(
+                "app.services.subtitle_corrector._run_async_safely",
+                return_value=json.dumps(llm_output, ensure_ascii=False),
+            ) as run_llm,
+        ):
+            corrected = corrector.correct_srt_content(
+                SAMPLE_SRT,
+                provider="openai",
+                api_key="sk-test",
+                base_url="https://llm.example/v1",
+            )
+
+        self.assertIn("00:00:01,000 --> 00:00:03,000", corrected)
+        self.assertIn("今天我们来看张三的故事", corrected)
+        self.assertIn("他来到北京找李四", corrected)
+        self.assertNotIn("顾是", corrected)
+
+        call_kwargs = run_llm.call_args.kwargs
+        self.assertEqual("openai", call_kwargs["provider"])
+        self.assertEqual("sk-test", call_kwargs["api_key"])
+        self.assertEqual("https://llm.example/v1", call_kwargs["api_base"])
+        self.assertEqual("json", call_kwargs["response_format"])
+        self.assertIn("多语言字幕校对员", call_kwargs["system_prompt"])
+        self.assertIn("保持原语言", call_kwargs["prompt"])
+
+    def test_correct_srt_content_rejects_missing_items(self):
+        llm_output = {"items": [{"id": 1, "text": "今天我们来看张三的故事"}]}
+
+        with (
+            mock.patch("app.services.subtitle_corrector._ensure_llm_providers_registered"),
+            mock.patch(
+                "app.services.subtitle_corrector._run_async_safely",
+                return_value=json.dumps(llm_output, ensure_ascii=False),
+            ),
+        ):
+            with self.assertRaises(corrector.SubtitleCorrectionError):
+                corrector.correct_srt_content(SAMPLE_SRT, provider="openai")
+
+    def test_correct_subtitle_file_writes_corrected_srt(self):
+        llm_output = {
+            "items": [
+                {"id": 1, "text": "今天我们来看张三的故事"},
+                {"id": 2, "text": "他来到北京找李四"},
+            ]
+        }
+
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            input_file = Path(tmp_dir) / "input.srt"
+            output_file = Path(tmp_dir) / "output.srt"
+            input_file.write_text(SAMPLE_SRT, encoding="utf-8")
+
+            with (
+                mock.patch("app.services.subtitle_corrector._ensure_llm_providers_registered"),
+                mock.patch(
+                    "app.services.subtitle_corrector._run_async_safely",
+                    return_value=json.dumps(llm_output, ensure_ascii=False),
+                ),
+            ):
+                result_path = corrector.correct_subtitle_file(
+                    str(input_file),
+                    str(output_file),
+                    provider="openai",
+                )
+
+            self.assertEqual(str(output_file), result_path)
+            self.assertIn("北京", output_file.read_text(encoding="utf-8"))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/test_task_subtitle_resolution_unittest.py
+++ b/app/services/test_task_subtitle_resolution_unittest.py
@ -0,0 +1,46 @@
+import tempfile
+import time
+import unittest
+from pathlib import Path
+
+from app.models.schema import VideoClipParams
+from app.services import task
+
+
+class TaskSubtitleResolutionTests(unittest.TestCase):
+    def test_get_original_subtitle_paths_falls_back_to_matching_video_name(self):
+        original_subtitle_dir = task.utils.subtitle_dir
+
+        with tempfile.TemporaryDirectory() as temp_dir:
+            temp_path = Path(temp_dir)
+            older = temp_path / "01_1080p_fun_asr.srt"
+            newer = temp_path / "01_1080p_fun_asr_20260608010240.srt"
+            unrelated = temp_path / "other_fun_asr.srt"
+            older.write_text("older", encoding="utf-8")
+            unrelated.write_text("other", encoding="utf-8")
+            time.sleep(0.01)
+            newer.write_text("newer", encoding="utf-8")
+
+            task.utils.subtitle_dir = lambda: str(temp_path)
+            params = VideoClipParams(
+                video_origin_path="/tmp/01_1080p_20260608113314.mp4",
+            )
+
+            try:
+                subtitle_paths = task._get_original_subtitle_paths(params)
+            finally:
+                task.utils.subtitle_dir = original_subtitle_dir
+
+        self.assertEqual([str(newer)], subtitle_paths)
+
+    def test_get_original_subtitle_paths_keeps_explicit_params(self):
+        params = VideoClipParams(
+            video_origin_path="/tmp/01_1080p_20260608113314.mp4",
+            original_subtitle_paths=["/tmp/provided.srt"],
+        )
+
+        self.assertEqual(["/tmp/provided.srt"], task._get_original_subtitle_paths(params))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/voice.py
+++ b/app/services/voice.py
@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import re
 import json
@ -6,6 +8,7 @@ import edge_tts
 import asyncio
 import requests
 import uuid
+from functools import lru_cache
 from loguru import logger
 from typing import List, Union, Tuple
 from datetime import datetime
@ -20,6 +23,7 @@ except ImportError:
    MOVIEPY_AVAILABLE = False
    logger.warning("moviepy 未安装，将使用估算方法计算音频时长")
 import time
+from urllib.parse import urljoin

 from app.config import config
 from app.utils import utils
@ -282,7 +286,7 @@ Gender: Male
 Name: en-AU-NatashaNeural
 Gender: Female

-Name: en-AU-WilliamNeural
+Name: en-AU-WilliamMultilingualNeural
 Gender: Male

 Name: en-CA-ClaraNeural
@ -369,21 +373,33 @@ Gender: Female
 Name: en-US-AndrewNeural
 Gender: Male

+Name: en-US-AndrewMultilingualNeural
+Gender: Male
+
 Name: en-US-AriaNeural
 Gender: Female

 Name: en-US-AvaNeural
 Gender: Female

+Name: en-US-AvaMultilingualNeural
+Gender: Female
+
 Name: en-US-BrianNeural
 Gender: Male

+Name: en-US-BrianMultilingualNeural
+Gender: Male
+
 Name: en-US-ChristopherNeural
 Gender: Male

 Name: en-US-EmmaNeural
 Gender: Female

+Name: en-US-EmmaMultilingualNeural
+Gender: Female
+
 Name: en-US-EricNeural
 Gender: Male

@ -666,12 +682,24 @@ Gender: Male
 Name: it-IT-ElsaNeural
 Gender: Female

-Name: it-IT-GiuseppeNeural
+Name: it-IT-GiuseppeMultilingualNeural
 Gender: Male

 Name: it-IT-IsabellaNeural
 Gender: Female

+Name: iu-Cans-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Cans-CA-TaqqiqNeural
+Gender: Male
+
+Name: iu-Latn-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Latn-CA-TaqqiqNeural
+Gender: Male
+
 Name: ja-JP-KeitaNeural
 Gender: Male

@ -708,7 +736,7 @@ Gender: Male
 Name: kn-IN-SapnaNeural
 Gender: Female

-Name: ko-KR-HyunsuNeural
+Name: ko-KR-HyunsuMultilingualNeural
 Gender: Male

 Name: ko-KR-InJoonNeural
@ -822,7 +850,7 @@ Gender: Male
 Name: pt-BR-FranciscaNeural
 Gender: Female

-Name: pt-BR-ThalitaNeural
+Name: pt-BR-ThalitaMultilingualNeural
 Gender: Female

 Name: pt-PT-DuarteNeural
@ -1238,6 +1266,8 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
 def tts(
    text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str, tts_engine: str
 ) -> Union[SubMaker, None]:
+    tts_engine = config.normalize_tts_engine_name(tts_engine)
+    voice_name = config.normalize_indextts_voice_prefix(voice_name)
    logger.info(f"使用 TTS 引擎: '{tts_engine}', 语音: '{voice_name}'")

    if tts_engine == "tencent_tts":
@ -1263,9 +1293,17 @@ def tts(
        logger.info("分发到 Edge TTS")
        return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
    
-    if tts_engine == "indextts2":
-        logger.info("分发到 IndexTTS2")
-        return indextts2_tts(text, voice_name, voice_file, speed=voice_rate)
+    if tts_engine == config.INDEXTTS_ENGINE:
+        logger.info("分发到 IndexTTS-1.5")
+        return indextts_tts(text, voice_name, voice_file, speed=voice_rate)
+
+    if tts_engine == config.INDEXTTS2_ENGINE:
+        logger.info("分发到 IndexTTS-2")
+        return indextts2_tts(text, voice_name, voice_file)
+
+    if tts_engine == config.OMNIVOICE_ENGINE:
+        logger.info("分发到 OmniVoice")
+        return omnivoice_tts(text, voice_name, voice_file, speed=voice_rate)
    
    if tts_engine == "doubaotts":
        logger.info("分发到豆包语音 TTS")
@ -1306,6 +1344,52 @@ def get_edge_tts_proxy() -> str | None:
    return proxy_url or None


+def _run_async_safely(coro_func, *args, **kwargs):
+    """在同步代码里安全运行异步 edge_tts 调用。"""
+    def run_in_new_loop():
+        loop = asyncio.new_event_loop()
+        asyncio.set_event_loop(loop)
+        try:
+            return loop.run_until_complete(coro_func(*args, **kwargs))
+        finally:
+            loop.close()
+            asyncio.set_event_loop(None)
+
+    try:
+        asyncio.get_running_loop()
+    except RuntimeError:
+        return run_in_new_loop()
+
+    import concurrent.futures
+    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
+        return executor.submit(run_in_new_loop).result()
+
+
+@lru_cache(maxsize=8)
+def _get_all_edge_voices_cached(proxy: str | None) -> list[str]:
+    async def _list_voices():
+        return await edge_tts.list_voices(proxy=proxy)
+
+    voices = []
+    for item in _run_async_safely(_list_voices):
+        name = item.get("ShortName", "").strip()
+        gender = item.get("Gender", "").strip()
+        if name and gender:
+            voices.append(f"{name}-{gender}")
+
+    voices.sort()
+    return voices
+
+
+def get_all_edge_voices() -> list[str]:
+    """获取 Edge TTS 当前支持的全部语言和音色，失败时回退到内置列表。"""
+    try:
+        return _get_all_edge_voices_cached(get_edge_tts_proxy())
+    except Exception as e:
+        logger.warning(f"获取 Edge TTS 在线音色列表失败，使用内置音色列表: {e}")
+        return [v for v in get_all_azure_voices(filter_locals=[]) if "-V2" not in v]
+
+
 def azure_tts_v1(
    text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
 ) -> Union[SubMaker, None]:
@ -1701,15 +1785,21 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
    :param tts_engine: TTS 引擎
    :return: 生成的音频文件列表
    """
-    voice_name = parse_voice_name(voice_name)
+    tts_engine = config.normalize_tts_engine_name(tts_engine)
+    voice_name = config.normalize_indextts_voice_prefix(parse_voice_name(voice_name))
    output_dir = utils.task_dir(task_id)
    tts_results = []
+    audio_extension = ".wav" if tts_engine in (
+        config.INDEXTTS_ENGINE,
+        config.INDEXTTS2_ENGINE,
+        config.OMNIVOICE_ENGINE,
+    ) else ".mp3"

    for item in list_script:
        if item['OST'] != 1:
            # 将时间戳中的冒号替换为下划线
            timestamp = item['timestamp'].replace(':', '_')
-            audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
+            audio_file = os.path.join(output_dir, f"audio_{timestamp}{audio_extension}")
            subtitle_file = os.path.join(output_dir, f"subtitle_{timestamp}.srt")

            text = item['narration']
@ -1729,8 +1819,13 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
                             f"或者使用其他 tts 引擎")
                continue
            else:
-                # SoulVoice、Qwen3、IndexTTS2、豆包语音 引擎不生成字幕文件
-                if is_soulvoice_voice(voice_name) or is_qwen_engine(tts_engine) or tts_engine == "indextts2" or tts_engine == "doubaotts":
+                # SoulVoice、Qwen3、IndexTTS、OmniVoice、豆包语音 引擎不生成精确字幕文件
+                if (
+                    is_soulvoice_voice(voice_name)
+                    or is_qwen_engine(tts_engine)
+                    or tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE, config.OMNIVOICE_ENGINE)
+                    or tts_engine == "doubaotts"
+                ):
                    # 获取实际音频文件的时长
                    duration = get_audio_duration_from_file(audio_file)
                    if duration <= 0:
@ -2148,24 +2243,47 @@ def parse_soulvoice_voice(voice_name: str) -> str:
    return voice_name


-def parse_indextts2_voice(voice_name: str) -> str:
+def parse_indextts_voice(voice_name: str) -> str:
    """
-    解析 IndexTTS2 语音名称
-    支持格式：indextts2:reference_audio_path
+    解析 IndexTTS-1.5 语音名称
+    支持格式：indextts:reference_audio_path
    返回参考音频文件路径
    """
-    if voice_name.startswith("indextts2:"):
-        return voice_name[10:]  # 移除 "indextts2:" 前缀
+    voice_name = config.normalize_indextts_voice_prefix(voice_name)
+    if voice_name.startswith(config.INDEXTTS_VOICE_PREFIX):
+        return voice_name[len(config.INDEXTTS_VOICE_PREFIX):]
    return voice_name


-def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
+def parse_indextts2_voice(voice_name: str) -> str:
    """
-    使用 IndexTTS2 API 进行零样本语音克隆
+    解析 IndexTTS-2 语音名称
+    支持格式：indextts2:reference_audio_path
+    返回参考音频文件路径
+    """
+    if isinstance(voice_name, str) and voice_name.startswith(config.INDEXTTS2_VOICE_PREFIX):
+        return voice_name[len(config.INDEXTTS2_VOICE_PREFIX):]
+    return voice_name
+
+
+def parse_omnivoice_voice(voice_name: str) -> str:
+    """
+    解析 OmniVoice 语音名称
+    支持格式：omnivoice:reference_audio_path
+    返回参考音频文件路径或模式名
+    """
+    if isinstance(voice_name, str) and voice_name.startswith(config.OMNIVOICE_VOICE_PREFIX):
+        return voice_name[len(config.OMNIVOICE_VOICE_PREFIX):]
+    return voice_name
+
+
+def indextts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
+    """
+    使用 IndexTTS-1.5 API 进行零样本语音克隆

    Args:
        text: 要转换的文本
-        voice_name: 参考音频路径（格式：indextts2:path/to/audio.wav）
+        voice_name: 参考音频文件（格式：indextts:path/to/audio.wav）
        voice_file: 输出音频文件路径
        speed: 语音速度（此引擎暂不支持速度调节）

@ -2173,20 +2291,20 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
        SubMaker: 包含时间戳信息的字幕制作器，失败时返回 None
    """
    # 获取配置
-    api_url = config.indextts2.get("api_url", "http://192.168.3.6:8081/tts")
-    infer_mode = config.indextts2.get("infer_mode", "普通推理")
-    temperature = config.indextts2.get("temperature", 1.0)
-    top_p = config.indextts2.get("top_p", 0.8)
-    top_k = config.indextts2.get("top_k", 30)
-    do_sample = config.indextts2.get("do_sample", True)
-    num_beams = config.indextts2.get("num_beams", 3)
-    repetition_penalty = config.indextts2.get("repetition_penalty", 10.0)
+    api_url = config.indextts.get("api_url", "http://192.168.3.6:8081/tts")
+    infer_mode = config.indextts.get("infer_mode", "普通推理")
+    temperature = config.indextts.get("temperature", 1.0)
+    top_p = config.indextts.get("top_p", 0.8)
+    top_k = config.indextts.get("top_k", 30)
+    do_sample = config.indextts.get("do_sample", True)
+    num_beams = config.indextts.get("num_beams", 3)
+    repetition_penalty = config.indextts.get("repetition_penalty", 10.0)

-    # 解析参考音频路径
-    reference_audio_path = parse_indextts2_voice(voice_name)
+    # 解析参考音频文件
+    reference_audio_path = parse_indextts_voice(voice_name)
    
    if not reference_audio_path or not os.path.exists(reference_audio_path):
-        logger.error(f"IndexTTS2 参考音频文件不存在: {reference_audio_path}")
+        logger.error(f"IndexTTS-1.5 参考音频文件不存在: {reference_audio_path}")
        return None

    # 准备请求数据
@ -2208,7 +2326,7 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
    # 重试机制
    for attempt in range(3):
        try:
-            logger.info(f"第 {attempt + 1} 次调用 IndexTTS2 API")
+            logger.info(f"第 {attempt + 1} 次调用 IndexTTS-1.5 API")

            # 设置代理
            proxies = {}
@ -2224,7 +2342,7 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
                files=files,
                data=data,
                proxies=proxies,
-                timeout=120  # IndexTTS2 推理可能需要较长时间
+                timeout=120  # IndexTTS-1.5 推理可能需要较长时间
            )

            if response.status_code == 200:
@ -2232,9 +2350,9 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
                with open(voice_file, 'wb') as f:
                    f.write(response.content)

-                logger.info(f"IndexTTS2 成功生成音频: {voice_file}, 大小: {len(response.content)} 字节")
+                logger.info(f"IndexTTS-1.5 成功生成音频: {voice_file}, 大小: {len(response.content)} 字节")

-                # IndexTTS2 不支持精确字幕生成，返回简单的 SubMaker 对象
+                # IndexTTS-1.5 不支持精确字幕生成，返回简单的 SubMaker 对象
                sub_maker = new_sub_maker()
                # 估算音频时长（基于文本长度）
                estimated_duration_ms = max(1000, int(len(text) * 200))
@ -2243,14 +2361,14 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
                return sub_maker

            else:
-                logger.error(f"IndexTTS2 API 调用失败: {response.status_code} - {response.text}")
+                logger.error(f"IndexTTS-1.5 API 调用失败: {response.status_code} - {response.text}")

        except requests.exceptions.Timeout:
-            logger.error(f"IndexTTS2 API 调用超时 (尝试 {attempt + 1}/3)")
+            logger.error(f"IndexTTS-1.5 API 调用超时 (尝试 {attempt + 1}/3)")
        except requests.exceptions.RequestException as e:
-            logger.error(f"IndexTTS2 API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
+            logger.error(f"IndexTTS-1.5 API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
        except Exception as e:
-            logger.error(f"IndexTTS2 TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
+            logger.error(f"IndexTTS-1.5 TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
        finally:
            # 确保关闭文件
            try:
@ -2267,5 +2385,270 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
                except:
                    pass

-    logger.error("IndexTTS2 TTS 生成失败，已达到最大重试次数")
+    logger.error("IndexTTS-1.5 TTS 生成失败，已达到最大重试次数")
+    return None
+
+
+def _normalize_indextts2_api_url(api_url: str) -> str:
+    api_url = (api_url or "http://192.168.3.6:7863/tts").strip()
+    if api_url.endswith("/tts"):
+        return api_url
+    return f"{api_url.rstrip('/')}/tts"
+
+
+def _get_configured_proxies() -> dict:
+    if not config.proxy.get("http"):
+        return {}
+    return {
+        "http": config.proxy.get("http"),
+        "https": config.proxy.get("https", config.proxy.get("http")),
+    }
+
+
+def _download_indextts2_audio(response: requests.Response, api_url: str, voice_file: str, proxies: dict) -> bool:
+    content_type = response.headers.get("content-type", "").lower()
+    if "application/json" not in content_type:
+        with open(voice_file, "wb") as f:
+            f.write(response.content)
+        return os.path.getsize(voice_file) > 0
+
+    result = response.json()
+    downloads = result.get("downloads") if isinstance(result, dict) else {}
+    download_url = downloads.get("wav") if isinstance(downloads, dict) else ""
+    if not download_url:
+        logger.error(f"IndexTTS-2 API 响应中没有音频下载地址: {result}")
+        return False
+
+    audio_url = urljoin(api_url, download_url)
+    audio_response = requests.get(audio_url, proxies=proxies, timeout=120)
+    if audio_response.status_code != 200:
+        logger.error(f"IndexTTS-2 音频下载失败: {audio_response.status_code} - {audio_response.text}")
+        return False
+
+    with open(voice_file, "wb") as f:
+        f.write(audio_response.content)
+    return os.path.getsize(voice_file) > 0
+
+
+def indextts2_tts(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
+    """
+    使用 IndexTTS-2 API 进行零样本语音克隆。
+    接口兼容 IndexTTS2-Pack 的 POST /tts multipart form。
+    """
+    api_url = _normalize_indextts2_api_url(config.indextts2.get("api_url", "http://192.168.3.6:7863/tts"))
+    reference_audio_path = parse_indextts2_voice(voice_name)
+
+    if not reference_audio_path or not os.path.exists(reference_audio_path):
+        logger.error(f"IndexTTS-2 参考音频文件不存在: {reference_audio_path}")
+        return None
+
+    emotion_mode = config.indextts2.get("emotion_mode", "speaker")
+    emotion_audio_path = config.indextts2.get("emotion_audio", "")
+    data = {
+        "text": text.strip(),
+        "emotion_mode": emotion_mode,
+        "emotion_alpha": config.indextts2.get("emotion_alpha", 0.65),
+        "emotion_text": config.indextts2.get("emotion_text", ""),
+        "use_random": str(bool(config.indextts2.get("use_random", False))).lower(),
+        "max_text_tokens_per_segment": config.indextts2.get("max_text_tokens_per_segment", 120),
+        "vec_happy": config.indextts2.get("vec_happy", 0.0),
+        "vec_angry": config.indextts2.get("vec_angry", 0.0),
+        "vec_sad": config.indextts2.get("vec_sad", 0.0),
+        "vec_afraid": config.indextts2.get("vec_afraid", 0.0),
+        "vec_disgusted": config.indextts2.get("vec_disgusted", 0.0),
+        "vec_melancholic": config.indextts2.get("vec_melancholic", 0.0),
+        "vec_surprised": config.indextts2.get("vec_surprised", 0.0),
+        "vec_calm": config.indextts2.get("vec_calm", 0.8),
+        "temperature": config.indextts2.get("temperature", 0.8),
+        "top_p": config.indextts2.get("top_p", 0.8),
+        "top_k": config.indextts2.get("top_k", 30),
+        "num_beams": config.indextts2.get("num_beams", 3),
+        "repetition_penalty": config.indextts2.get("repetition_penalty", 10.0),
+        "max_mel_tokens": config.indextts2.get("max_mel_tokens", 1500),
+    }
+
+    proxies = _get_configured_proxies()
+    for attempt in range(3):
+        files = {}
+        try:
+            files["speaker_audio"] = open(reference_audio_path, "rb")
+            if emotion_mode == "audio":
+                if not emotion_audio_path or not os.path.exists(emotion_audio_path):
+                    logger.error(f"IndexTTS-2 情感参考音频文件不存在: {emotion_audio_path}")
+                    return None
+                files["emotion_audio"] = open(emotion_audio_path, "rb")
+
+            logger.info(f"第 {attempt + 1} 次调用 IndexTTS-2 API: {api_url}")
+            response = requests.post(
+                api_url,
+                files=files,
+                data=data,
+                proxies=proxies,
+                timeout=180,
+            )
+
+            if response.status_code == 200 and _download_indextts2_audio(response, api_url, voice_file, proxies):
+                logger.info(f"IndexTTS-2 成功生成音频: {voice_file}, 大小: {os.path.getsize(voice_file)} 字节")
+                sub_maker = new_sub_maker()
+                duration = get_audio_duration_from_file(voice_file)
+                duration_ms = int(duration * 1000) if duration > 0 else max(1000, int(len(text) * 200))
+                add_subtitle_event(sub_maker, 0, duration_ms * 10000, text)
+                return sub_maker
+
+            logger.error(f"IndexTTS-2 API 调用失败: {response.status_code} - {response.text}")
+        except requests.exceptions.Timeout:
+            logger.error(f"IndexTTS-2 API 调用超时 (尝试 {attempt + 1}/3)")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"IndexTTS-2 API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
+        except Exception as e:
+            logger.error(f"IndexTTS-2 TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
+        finally:
+            for file_obj in files.values():
+                try:
+                    file_obj.close()
+                except Exception:
+                    pass
+
+        if attempt < 2:
+            time.sleep(2)
+
+    logger.error("IndexTTS-2 TTS 生成失败，已达到最大重试次数")
+    return None
+
+
+def _normalize_omnivoice_api_url(api_url: str) -> str:
+    api_url = (api_url or "http://127.0.0.1:7866/tts").strip()
+    if api_url.endswith("/tts"):
+        return api_url
+    if api_url.endswith("/tts/json"):
+        return f"{api_url[:-len('/tts/json')]}/tts"
+    return f"{api_url.rstrip('/')}/tts"
+
+
+def _download_omnivoice_audio(response: requests.Response, api_url: str, voice_file: str, proxies: dict) -> bool:
+    content_type = response.headers.get("content-type", "").lower()
+    if "application/json" not in content_type:
+        with open(voice_file, "wb") as f:
+            f.write(response.content)
+        return os.path.getsize(voice_file) > 0
+
+    result = response.json()
+    audio_url = result.get("audio_url") if isinstance(result, dict) else ""
+    if not audio_url:
+        logger.error(f"OmniVoice API 响应中没有音频下载地址: {result}")
+        return False
+
+    audio_response = requests.get(urljoin(api_url, audio_url), proxies=proxies, timeout=180)
+    if audio_response.status_code != 200:
+        logger.error(f"OmniVoice 音频下载失败: {audio_response.status_code} - {audio_response.text}")
+        return False
+
+    with open(voice_file, "wb") as f:
+        f.write(audio_response.content)
+    return os.path.getsize(voice_file) > 0
+
+
+def _optional_omnivoice_generation_data(voice_speed: float) -> dict:
+    omnivoice_config = getattr(config, "omnivoice", {}) or {}
+    data = {
+        "speed": voice_speed or omnivoice_config.get("speed", 1.0),
+    }
+
+    optional_fields = {
+        "num_step": omnivoice_config.get("num_step"),
+        "guidance_scale": omnivoice_config.get("guidance_scale"),
+        "duration": omnivoice_config.get("duration"),
+    }
+    for key, value in optional_fields.items():
+        if value not in (None, ""):
+            data[key] = value
+
+    for key in ("denoise", "postprocess_output", "preprocess_prompt"):
+        if key in omnivoice_config:
+            data[key] = str(bool(omnivoice_config.get(key))).lower()
+
+    return data
+
+
+def omnivoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
+    """
+    使用 OmniVoice-Pack FastAPI 服务进行语音合成。
+    支持自动音色、指令音色和参考音频克隆三种模式。
+    """
+    omnivoice_config = getattr(config, "omnivoice", {}) or {}
+    api_url = _normalize_omnivoice_api_url(omnivoice_config.get("api_url", "http://127.0.0.1:7866/tts"))
+    mode = omnivoice_config.get("mode", "auto")
+    language = (omnivoice_config.get("language", "zh") or "").strip()
+    instruct = (omnivoice_config.get("instruct", "") or "").strip()
+    ref_text = (omnivoice_config.get("ref_text", "") or "").strip()
+    parsed_voice = parse_omnivoice_voice(voice_name)
+    if mode != "voice_clone" and parsed_voice and os.path.isfile(parsed_voice):
+        mode = "voice_clone"
+
+    reference_audio_path = ""
+    if mode == "voice_clone":
+        candidate = parsed_voice
+        if candidate and os.path.isfile(candidate):
+            reference_audio_path = candidate
+        else:
+            reference_audio_path = parse_omnivoice_voice(omnivoice_config.get("reference_audio", "") or "")
+
+        if not reference_audio_path or not os.path.exists(reference_audio_path):
+            logger.error(f"OmniVoice 参考音频文件不存在: {reference_audio_path}")
+            return None
+    elif mode != "voice_design":
+        instruct = ""
+
+    data = {
+        "text": text.strip(),
+        "language": language,
+        **_optional_omnivoice_generation_data(speed),
+    }
+    if mode == "voice_design" and instruct:
+        data["instruct"] = instruct
+    if mode == "voice_clone" and ref_text:
+        data["ref_text"] = ref_text
+
+    proxies = _get_configured_proxies()
+    for attempt in range(3):
+        files = {}
+        try:
+            if reference_audio_path:
+                files["ref_audio"] = open(reference_audio_path, "rb")
+
+            logger.info(f"第 {attempt + 1} 次调用 OmniVoice API: {api_url}, mode={mode}")
+            response = requests.post(
+                api_url,
+                files=files or None,
+                data=data,
+                proxies=proxies,
+                timeout=240,
+            )
+
+            if response.status_code == 200 and _download_omnivoice_audio(response, api_url, voice_file, proxies):
+                logger.info(f"OmniVoice 成功生成音频: {voice_file}, 大小: {os.path.getsize(voice_file)} 字节")
+                sub_maker = new_sub_maker()
+                duration = get_audio_duration_from_file(voice_file)
+                duration_ms = int(duration * 1000) if duration > 0 else max(1000, int(len(text) * 200))
+                add_subtitle_event(sub_maker, 0, duration_ms * 10000, text)
+                return sub_maker
+
+            logger.error(f"OmniVoice API 调用失败: {response.status_code} - {response.text}")
+        except requests.exceptions.Timeout:
+            logger.error(f"OmniVoice API 调用超时 (尝试 {attempt + 1}/3)")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"OmniVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
+        except Exception as e:
+            logger.error(f"OmniVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
+        finally:
+            for file_obj in files.values():
+                try:
+                    file_obj.close()
+                except Exception:
+                    pass
+
+        if attempt < 2:
+            time.sleep(2)
+
+    logger.error("OmniVoice TTS 生成失败，已达到最大重试次数")
    return None
--- a/app/utils/check_script.py
+++ b/app/utils/check_script.py
@ -57,6 +57,23 @@ def check_format(script_content: str) -> Dict[str, Any]:
                    'details': f'当前值: {clip["_id"]} (类型: {type(clip["_id"]).__name__})'
                }

+            # 验证可选视频来源字段。旧脚本可以不包含，新脚本用于多视频定位。
+            if 'video_id' in clip and clip['video_id'] not in ("", None):
+                if not isinstance(clip['video_id'], int) or clip['video_id'] <= 0:
+                    return {
+                        'success': False,
+                        'message': f'第{i+1}个片段的video_id必须是正整数',
+                        'details': f'当前值: {clip["video_id"]} (类型: {type(clip["video_id"]).__name__})'
+                    }
+
+            if 'video_name' in clip and clip['video_name'] not in ("", None):
+                if not isinstance(clip['video_name'], str):
+                    return {
+                        'success': False,
+                        'message': f'第{i+1}个片段的video_name必须是字符串',
+                        'details': f'当前值: {clip["video_name"]} (类型: {type(clip["video_name"]).__name__})'
+                    }
+
            # 验证 timestamp 字段格式
            timestamp_pattern = r'^\d{2}:\d{2}:\d{2},\d{3}-\d{2}:\d{2}:\d{2},\d{3}$'
            if not isinstance(clip['timestamp'], str) or not re.match(timestamp_pattern, clip['timestamp']):
--- a/app/utils/ffmpeg_detector.py
+++ b/app/utils/ffmpeg_detector.py
@ -0,0 +1,493 @@
+"""FFmpeg engine discovery and capability diagnostics."""
+
+from __future__ import annotations
+
+import os
+import platform
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any
+
+from loguru import logger
+
+
+_FFMPEG_EXE = "ffmpeg.exe" if os.name == "nt" else "ffmpeg"
+_FFPROBE_EXE = "ffprobe.exe" if os.name == "nt" else "ffprobe"
+_SOURCE_PRIORITY = {
+    "Configured": 0,
+    "NarratoAI packaged runtime": 1,
+    "Integrated runtime": 2,
+    "System PATH": 3,
+    "Homebrew": 4,
+    "Python environment": 5,
+    "Python executable folder": 6,
+    "IMAGEIO_FFMPEG_EXE": 7,
+    "imageio-ffmpeg": 8,
+    "System": 9,
+}
+
+
+@dataclass(frozen=True)
+class FFmpegEngine:
+    """A discovered FFmpeg executable."""
+
+    path: str
+    source: str
+    ffprobe_path: str
+    available: bool
+    version_line: str
+
+    @property
+    def label(self) -> str:
+        status = "OK" if self.available else "Unavailable"
+        version = self.version_line.replace("ffmpeg version", "").strip() or "unknown version"
+        return f"{self.source} - {version} - {self.path} ({status})"
+
+    def to_dict(self) -> dict[str, Any]:
+        payload = asdict(self)
+        payload["label"] = self.label
+        return payload
+
+
+def _run_command(args: list[str], timeout: int = 10) -> subprocess.CompletedProcess[str]:
+    return subprocess.run(
+        args,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+        text=True,
+        check=False,
+        timeout=timeout,
+    )
+
+
+def _first_line(text: str) -> str:
+    for line in (text or "").splitlines():
+        stripped = line.strip()
+        if stripped:
+            return stripped
+    return ""
+
+
+def _is_executable(path: str) -> bool:
+    if not path:
+        return False
+    if os.name == "nt":
+        return os.path.isfile(path)
+    return os.path.isfile(path) and os.access(path, os.X_OK)
+
+
+def _normalize_path(path: str) -> str:
+    return str(Path(path).expanduser().resolve())
+
+
+def _ffmpeg_version_line(ffmpeg_path: str) -> tuple[bool, str]:
+    if not _is_executable(ffmpeg_path):
+        return False, ""
+    try:
+        result = _run_command([ffmpeg_path, "-version"], timeout=8)
+    except Exception as exc:
+        logger.debug(f"FFmpeg version check failed for {ffmpeg_path}: {exc}")
+        return False, ""
+
+    output = result.stdout or result.stderr
+    return result.returncode == 0, _first_line(output)
+
+
+def _paired_ffprobe_path(ffmpeg_path: str) -> str:
+    ffmpeg = Path(ffmpeg_path)
+    sibling = ffmpeg.with_name(_FFPROBE_EXE)
+    if _is_executable(str(sibling)):
+        return _normalize_path(str(sibling))
+
+    scoped_path = os.pathsep.join([str(ffmpeg.parent), os.environ.get("PATH", "")])
+    discovered = shutil.which(_FFPROBE_EXE, path=scoped_path)
+    return _normalize_path(discovered) if discovered else ""
+
+
+def _candidate_paths(root_dir: str = "", include_system: bool = True) -> list[tuple[str, str]]:
+    candidates: list[tuple[str, str]] = []
+    root = Path(root_dir).expanduser().resolve() if root_dir else Path.cwd().resolve()
+    project_parent = root.parent
+
+    candidates.extend(
+        [
+            ("Integrated runtime", str(root / "runtime" / "python" / "bin" / _FFMPEG_EXE)),
+            ("Integrated runtime", str(root.parent / "runtime" / "python" / "bin" / _FFMPEG_EXE)),
+            (
+                "NarratoAI packaged runtime",
+                str(
+                    project_parent
+                    / "NarratoAI-Pack"
+                    / "dist"
+                    / "NarratoAI-macos-arm64"
+                    / "runtime"
+                    / "python"
+                    / "bin"
+                    / _FFMPEG_EXE
+                ),
+            ),
+            ("Python environment", str(Path(sys.prefix) / "bin" / _FFMPEG_EXE)),
+            ("Python executable folder", str(Path(sys.executable).with_name(_FFMPEG_EXE))),
+        ]
+    )
+
+    env_ffmpeg = os.environ.get("IMAGEIO_FFMPEG_EXE", "")
+    if env_ffmpeg:
+        candidates.append(("IMAGEIO_FFMPEG_EXE", env_ffmpeg))
+
+    if include_system:
+        path_ffmpeg = shutil.which(_FFMPEG_EXE)
+        if path_ffmpeg:
+            candidates.append(("System PATH", path_ffmpeg))
+
+        for source, path in (
+            ("Homebrew", f"/opt/homebrew/bin/{_FFMPEG_EXE}"),
+            ("Homebrew", f"/usr/local/bin/{_FFMPEG_EXE}"),
+            ("System", f"/usr/bin/{_FFMPEG_EXE}"),
+        ):
+            candidates.append((source, path))
+
+    try:
+        import imageio_ffmpeg
+
+        candidates.append(("imageio-ffmpeg", imageio_ffmpeg.get_ffmpeg_exe()))
+    except Exception as exc:
+        logger.debug(f"imageio-ffmpeg discovery skipped: {exc}")
+
+    return candidates
+
+
+def discover_ffmpeg_engines(
+    configured_path: str = "",
+    root_dir: str = "",
+    include_system: bool = True,
+) -> list[dict[str, Any]]:
+    """Discover available FFmpeg engines from config, packaged runtime and PATH."""
+
+    candidates: list[tuple[str, str]] = []
+    if configured_path:
+        candidates.append(("Configured", configured_path))
+    candidates.extend(_candidate_paths(root_dir=root_dir, include_system=include_system))
+
+    engines: list[FFmpegEngine] = []
+    seen: set[str] = set()
+    for source, raw_path in candidates:
+        if not raw_path:
+            continue
+        try:
+            path = _normalize_path(raw_path)
+        except Exception:
+            path = str(Path(raw_path).expanduser())
+        key = os.path.normcase(path)
+        if key in seen:
+            continue
+        seen.add(key)
+
+        available, version_line = _ffmpeg_version_line(path)
+        if not available and source not in {"Configured", "IMAGEIO_FFMPEG_EXE"}:
+            continue
+        engines.append(
+            FFmpegEngine(
+                path=path,
+                source=source,
+                ffprobe_path=_paired_ffprobe_path(path),
+                available=available,
+                version_line=version_line,
+            )
+        )
+
+    engines.sort(
+        key=lambda engine: (
+            not engine.available,
+            _SOURCE_PRIORITY.get(engine.source, 99),
+            engine.path,
+        )
+    )
+    return [engine.to_dict() for engine in engines]
+
+
+def _parse_hwaccels(output: str) -> list[str]:
+    values: list[str] = []
+    for line in output.splitlines():
+        item = line.strip().lower()
+        if not item or item.startswith("hardware acceleration"):
+            continue
+        if re.fullmatch(r"[a-z0-9_]+", item):
+            values.append(item)
+    return sorted(set(values))
+
+
+def _parse_ffmpeg_table_names(output: str) -> set[str]:
+    names: set[str] = set()
+    for line in output.splitlines():
+        match = re.match(r"\s*[A-Z.]{2,}\s+([A-Za-z0-9_]+)\b", line)
+        if match:
+            names.add(match.group(1).lower())
+    return names
+
+
+def _run_optional(args: list[str], timeout: int = 15, max_output_chars: int = 1200) -> tuple[bool, str]:
+    try:
+        result = _run_command(args, timeout=timeout)
+    except subprocess.TimeoutExpired:
+        return False, "Command timed out"
+    except Exception as exc:
+        return False, str(exc)
+
+    output = "\n".join(part for part in (result.stderr, result.stdout) if part)
+    if max_output_chars > 0:
+        output = output[-max_output_chars:]
+    return result.returncode == 0, output
+
+
+def _hardware_candidates() -> list[tuple[str, str, list[str]]]:
+    system = platform.system().lower()
+    if system == "darwin":
+        return [
+            ("videotoolbox", "h264_videotoolbox", ["-c:v", "h264_videotoolbox", "-q:v", "65"]),
+        ]
+    if system == "windows":
+        return [
+            ("nvenc", "h264_nvenc", ["-c:v", "h264_nvenc", "-preset", "fast"]),
+            ("qsv", "h264_qsv", ["-c:v", "h264_qsv", "-preset", "fast"]),
+            ("amf", "h264_amf", ["-c:v", "h264_amf"]),
+        ]
+    return [
+        ("nvenc", "h264_nvenc", ["-c:v", "h264_nvenc", "-preset", "fast"]),
+        ("qsv", "h264_qsv", ["-vf", "format=nv12", "-c:v", "h264_qsv"]),
+        ("vaapi", "h264_vaapi", ["-vf", "format=nv12,hwupload", "-c:v", "h264_vaapi"]),
+    ]
+
+
+def _detect_hardware_encoding(ffmpeg_path: str, encoders: set[str]) -> dict[str, Any]:
+    tested: list[dict[str, Any]] = []
+    for accel_type, encoder, encoder_args in _hardware_candidates():
+        if encoder.lower() not in encoders:
+            tested.append(
+                {
+                    "type": accel_type,
+                    "encoder": encoder,
+                    "available": False,
+                    "message": "Encoder not listed by this FFmpeg build",
+                }
+            )
+            continue
+
+        cmd = [
+            ffmpeg_path,
+            "-y",
+            "-hide_banner",
+            "-loglevel",
+            "error",
+            "-f",
+            "lavfi",
+            "-i",
+            "testsrc=duration=0.5:size=128x72:rate=15",
+            "-frames:v",
+            "5",
+            *encoder_args,
+            "-pix_fmt",
+            "yuv420p",
+            "-f",
+            "null",
+            "-",
+        ]
+        ok, message = _run_optional(cmd, timeout=18)
+        tested.append(
+            {
+                "type": accel_type,
+                "encoder": encoder,
+                "available": ok,
+                "message": "Hardware encode test passed" if ok else message,
+            }
+        )
+        if ok:
+            return {
+                "available": True,
+                "type": accel_type,
+                "encoder": encoder,
+                "message": "Hardware encode test passed",
+                "tested": tested,
+            }
+
+    return {
+        "available": False,
+        "type": None,
+        "encoder": None,
+        "message": "No hardware encoder passed the runtime test",
+        "tested": tested,
+    }
+
+
+def _escape_filter_path(path: str) -> str:
+    return path.replace("\\", "\\\\").replace(":", "\\:").replace("'", "\\'")
+
+
+def _test_subtitle_burn(ffmpeg_path: str, filters: set[str]) -> dict[str, Any]:
+    filter_status = {
+        "subtitles": "subtitles" in filters,
+        "ass": "ass" in filters,
+        "drawtext": "drawtext" in filters,
+        "overlay": "overlay" in filters,
+    }
+
+    if filter_status["subtitles"]:
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            srt_path = Path(tmp_dir) / "subtitle_test.srt"
+            srt_path.write_text(
+                "1\n00:00:00,000 --> 00:00:00,800\nNarratoAI FFmpeg subtitle test\n",
+                encoding="utf-8",
+            )
+            ok, message = _run_optional(
+                [
+                    ffmpeg_path,
+                    "-y",
+                    "-hide_banner",
+                    "-loglevel",
+                    "error",
+                    "-f",
+                    "lavfi",
+                    "-i",
+                    "color=black:size=320x180:duration=1",
+                    "-vf",
+                    f"subtitles={_escape_filter_path(str(srt_path))}",
+                    "-frames:v",
+                    "1",
+                    "-f",
+                    "null",
+                    "-",
+                ],
+                timeout=18,
+            )
+            if ok:
+                return {
+                    "available": True,
+                    "method": "subtitles",
+                    "message": "SRT subtitle burn-in test passed",
+                    "filters": filter_status,
+                }
+            subtitles_error = message
+    else:
+        subtitles_error = "subtitles filter is not listed by this FFmpeg build"
+
+    if filter_status["drawtext"]:
+        ok, message = _run_optional(
+            [
+                ffmpeg_path,
+                "-y",
+                "-hide_banner",
+                "-loglevel",
+                "error",
+                "-f",
+                "lavfi",
+                "-i",
+                "color=black:size=320x180:duration=1",
+                "-vf",
+                "drawtext=text=NarratoAI:x=10:y=10:fontsize=18:fontcolor=white",
+                "-frames:v",
+                "1",
+                "-f",
+                "null",
+                "-",
+            ],
+            timeout=18,
+        )
+        if ok:
+            return {
+                "available": True,
+                "method": "drawtext",
+                "message": "drawtext burn-in fallback test passed",
+                "filters": filter_status,
+            }
+        drawtext_error = message
+    else:
+        drawtext_error = "drawtext filter is not listed by this FFmpeg build"
+
+    return {
+        "available": False,
+        "method": None,
+        "message": f"{subtitles_error}\n{drawtext_error}".strip(),
+        "filters": filter_status,
+    }
+
+
+def validate_ffmpeg_engine(ffmpeg_path: str) -> dict[str, Any]:
+    """Run runtime checks for a selected FFmpeg engine."""
+
+    path = _normalize_path(ffmpeg_path)
+    report: dict[str, Any] = {
+        "path": path,
+        "ffmpeg_available": False,
+        "version_line": "",
+        "ffprobe_path": "",
+        "ffprobe_available": False,
+        "ffprobe_version_line": "",
+        "hwaccels": [],
+        "hardware_acceleration": {
+            "available": False,
+            "type": None,
+            "encoder": None,
+            "message": "",
+            "tested": [],
+        },
+        "subtitle_burn": {
+            "available": False,
+            "method": None,
+            "message": "",
+            "filters": {},
+        },
+        "software_encoder_available": False,
+        "errors": [],
+    }
+
+    available, version_line = _ffmpeg_version_line(path)
+    report["ffmpeg_available"] = available
+    report["version_line"] = version_line
+    if not available:
+        report["errors"].append("FFmpeg is not executable or failed to run -version")
+        return report
+
+    ffprobe_path = _paired_ffprobe_path(path)
+    report["ffprobe_path"] = ffprobe_path
+    if ffprobe_path:
+        probe_available, probe_version = _ffmpeg_version_line(ffprobe_path)
+        report["ffprobe_available"] = probe_available
+        report["ffprobe_version_line"] = probe_version
+
+    ok, hwaccel_output = _run_optional(
+        [path, "-hide_banner", "-hwaccels"],
+        timeout=10,
+        max_output_chars=0,
+    )
+    if ok:
+        report["hwaccels"] = _parse_hwaccels(hwaccel_output)
+    else:
+        report["errors"].append(f"Failed to list hardware acceleration methods: {hwaccel_output}")
+
+    ok, encoders_output = _run_optional(
+        [path, "-hide_banner", "-encoders"],
+        timeout=10,
+        max_output_chars=0,
+    )
+    encoders = _parse_ffmpeg_table_names(encoders_output) if ok else set()
+    report["software_encoder_available"] = "libx264" in encoders or "libopenh264" in encoders
+    if not ok:
+        report["errors"].append(f"Failed to list encoders: {encoders_output}")
+
+    ok, filters_output = _run_optional(
+        [path, "-hide_banner", "-filters"],
+        timeout=10,
+        max_output_chars=0,
+    )
+    filters = _parse_ffmpeg_table_names(filters_output) if ok else set()
+    if not ok:
+        report["errors"].append(f"Failed to list filters: {filters_output}")
+
+    report["hardware_acceleration"] = _detect_hardware_encoding(path, encoders)
+    report["subtitle_burn"] = _test_subtitle_burn(path, filters)
+    return report
--- a/app/utils/test_ffmpeg_detector_unittest.py
+++ b/app/utils/test_ffmpeg_detector_unittest.py
@ -0,0 +1,76 @@
+import os
+import tempfile
+import unittest
+from pathlib import Path
+
+from app.utils import ffmpeg_detector
+
+
+class FFmpegDetectorTests(unittest.TestCase):
+    def _write_fake_binary(self, path: Path, first_line: str) -> None:
+        path.write_text(
+            "#!/bin/sh\n"
+            "if [ \"$1\" = \"-version\" ]; then\n"
+            f"  echo \"{first_line}\"\n"
+            "  exit 0\n"
+            "fi\n"
+            "if [ \"$2\" = \"-hwaccels\" ]; then\n"
+            "  echo \"Hardware acceleration methods:\"\n"
+            "  echo \"videotoolbox\"\n"
+            "  exit 0\n"
+            "fi\n"
+            "if [ \"$2\" = \"-encoders\" ]; then\n"
+            "  echo \" V....D h264_videotoolbox Apple VideoToolbox H.264\"\n"
+            "  echo \" V....D h264_nvenc NVIDIA NVENC H.264\"\n"
+            "  echo \" V....D h264_qsv Intel QSV H.264\"\n"
+            "  echo \" V....D libx264 libx264 H.264\"\n"
+            "  exit 0\n"
+            "fi\n"
+            "if [ \"$2\" = \"-filters\" ]; then\n"
+            "  echo \" ... subtitles V->V Render text subtitles\"\n"
+            "  echo \" ... drawtext V->V Draw text\"\n"
+            "  echo \" ... overlay VV->V Overlay video\"\n"
+            "  exit 0\n"
+            "fi\n"
+            "exit 0\n",
+            encoding="utf-8",
+        )
+        path.chmod(0o755)
+
+    @unittest.skipIf(os.name == "nt", "shell fake binaries are POSIX-only")
+    def test_discover_includes_configured_path(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            ffmpeg_path = Path(tmp_dir) / "ffmpeg"
+            ffprobe_path = Path(tmp_dir) / "ffprobe"
+            self._write_fake_binary(ffmpeg_path, "ffmpeg version fake-1.0")
+            self._write_fake_binary(ffprobe_path, "ffprobe version fake-1.0")
+
+            engines = ffmpeg_detector.discover_ffmpeg_engines(
+                configured_path=str(ffmpeg_path),
+                root_dir=tmp_dir,
+                include_system=False,
+            )
+
+            self.assertEqual(engines[0]["path"], str(ffmpeg_path.resolve()))
+            self.assertEqual(engines[0]["ffprobe_path"], str(ffprobe_path.resolve()))
+            self.assertTrue(engines[0]["available"])
+
+    @unittest.skipIf(os.name == "nt", "shell fake binaries are POSIX-only")
+    def test_validate_reports_hardware_and_subtitle_support(self):
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            ffmpeg_path = Path(tmp_dir) / "ffmpeg"
+            ffprobe_path = Path(tmp_dir) / "ffprobe"
+            self._write_fake_binary(ffmpeg_path, "ffmpeg version fake-1.0")
+            self._write_fake_binary(ffprobe_path, "ffprobe version fake-1.0")
+
+            report = ffmpeg_detector.validate_ffmpeg_engine(str(ffmpeg_path))
+
+            self.assertTrue(report["ffmpeg_available"])
+            self.assertTrue(report["ffprobe_available"])
+            self.assertTrue(report["hardware_acceleration"]["available"])
+            self.assertTrue(report["subtitle_burn"]["available"])
+            self.assertEqual(report["subtitle_burn"]["method"], "subtitles")
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/config.example.toml
+++ b/config.example.toml
@ -25,6 +25,10 @@
    vision_openai_model_name = "Qwen/Qwen3.5-122B-A10B"
    vision_openai_api_key = ""  # 填入对应 provider 的 API key
    vision_openai_base_url = "https://api.siliconflow.cn/v1"  # 可选：自定义 API base URL（官方 OpenAI 可留空）
+    vision_openai_temperature = 1.0
+    vision_openai_top_p = 0.95
+    vision_openai_max_tokens = 65536
+    vision_openai_thinking_level = "auto"  # auto/off/low/medium/high

    # ===== 文本模型配置 =====
    text_llm_provider = "openai"
@ -40,6 +44,16 @@
    text_openai_model_name = "Pro/zai-org/GLM-5"
    text_openai_api_key = ""  # 填入对应 provider 的 API key
    text_openai_base_url = "https://api.siliconflow.cn/v1"  # 可选：自定义 API base URL（官方 OpenAI 可留空）
+    text_openai_temperature = 1.0
+    text_openai_top_p = 0.95
+    text_openai_max_tokens = 65536
+    text_openai_thinking_level = "auto"  # auto/off/low/medium/high
+
+    # ===== Tavily 联网搜索配置 =====
+    # 用于短剧剧情理解前，按短剧名称检索公开剧情/人物/分集信息
+    tavily_api_key = ""  # 获取地址：https://app.tavily.com
+    tavily_search_depth = "basic"  # basic / advanced / fast / ultra-fast
+    tavily_max_results = 5

    # ===== API Keys 参考 =====
    # 主流 LLM Providers API Key 获取地址：
@ -61,6 +75,10 @@
    # WebUI 界面是否显示配置项
    hide_config = true

+    # FFmpeg 引擎路径（可选）
+    # 为空时使用系统 PATH；也可以在系统设置中通过下拉框选择整合包或本机 ffmpeg。
+    ffmpeg_path = ""
+
    # 官方 OpenAI 默认端点（可选）：
    # text_openai_base_url = "https://api.openai.com/v1"

@ -95,24 +113,32 @@
    model_name = "qwen3-tts-flash"

 [fun_asr]
-    # 阿里百炼 Fun-ASR 字幕转录配置
-    # 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
+    # Fun-ASR 字幕转录配置
+    # backend = "local" 使用本地 FunASR-Pack API；backend = "firered" 使用本地 FireRedASR2-AED-Pack API；backend = "bailian" 使用阿里百炼在线 fun-asr
+    auto_transcribe_enabled = false
+    backend = "local"
+    api_url = "http://127.0.0.1:7860"
+    firered_api_url = "http://127.0.0.1:7867"
+    hotword = ""
+    enable_spk = false
+    # 使用阿里百炼在线 fun-asr 时，访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取 API Key
    api_key = ""
    model = "fun-asr"
-    
-[indextts2]
-    # IndexTTS2 语音克隆配置
+
+[indextts]
+    # IndexTTS-1.5 语音克隆配置
    # 这是一个开源的零样本语音克隆项目，需要自行部署
    # 项目地址：https://github.com/index-tts/index-tts
    # 默认 API 地址（本地部署）
    api_url = "http://127.0.0.1:8081/tts"
-    
-    # 默认参考音频路径（可选）
+
+    # 默认参考音频（可选）
+    reference_audio_source = "resource"
    # reference_audio = "/path/to/reference_audio.wav"
-    
+
    # 推理模式：普通推理 / 快速推理
    infer_mode = "普通推理"
-    
+
    # 高级参数
    temperature = 1.0
    top_p = 0.8
@ -120,6 +146,66 @@
    do_sample = true
    num_beams = 3
    repetition_penalty = 10.0
+
+[indextts2]
+    # IndexTTS-2 语音克隆配置
+    # 支持 IndexTTS2-Pack FastAPI 接口：POST /tts
+    api_url = "http://192.168.3.6:7863/tts"
+
+    # 默认参考音频（可选），音色列表复用 IndexTTS-1.5 的资源目录
+    reference_audio_source = "resource"
+    # reference_audio = "/path/to/reference_audio.wav"
+
+    # 情感控制：speaker / audio / vector / text
+    emotion_mode = "speaker"
+    emotion_audio = ""
+    emotion_alpha = 0.65
+    emotion_text = ""
+    use_random = false
+    max_text_tokens_per_segment = 120
+
+    # 8 维情感向量，顺序：happy, angry, sad, afraid, disgusted, melancholic, surprised, calm
+    vec_happy = 0.0
+    vec_angry = 0.0
+    vec_sad = 0.0
+    vec_afraid = 0.0
+    vec_disgusted = 0.0
+    vec_melancholic = 0.0
+    vec_surprised = 0.0
+    vec_calm = 0.8
+
+    # 高级生成参数
+    temperature = 0.8
+    top_p = 0.8
+    top_k = 30
+    num_beams = 3
+    repetition_penalty = 10.0
+    max_mel_tokens = 1500
+
+[omnivoice]
+    # OmniVoice-Pack 语音合成配置
+    # 支持 OmniVoice-Pack FastAPI 接口：POST /tts
+    api_url = "http://127.0.0.1:7866/tts"
+    language = "zh"
+
+    # 生成模式：auto / voice_design / voice_clone
+    mode = "auto"
+    instruct = ""
+
+    # voice_clone 模式下使用，音色列表复用 IndexTTS-1.5 的资源目录
+    reference_audio_source = "resource"
+    reference_audio = ""
+    ref_text = ""
+
+    # 高级生成参数
+    num_step = 32
+    guidance_scale = 2.0
+    speed = 1.0
+    duration = ""
+    denoise = true
+    postprocess_output = true
+    preprocess_prompt = true
+
 [doubaotts]
    # 豆包语音 TTS 配置
    # 申请流程：
@ -138,8 +224,8 @@
    silence_duration = 0.125

 [ui]
-    # TTS引擎选择 (edge_tts, azure_speech, soulvoice, tencent_tts, tts_qwen, doubaotts)
-    tts_engine = "edge_tts"
+    # TTS引擎选择 (indextts, indextts2, omnivoice, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
+    tts_engine = "indextts"

    # Edge TTS 配置
    edge_voice_name = "zh-CN-XiaoyiNeural-Female"
@ -157,6 +243,23 @@
    doubaotts_voice_type = "BV700_V2_streaming"
    doubaotts_rate = 1.0

+    # 字幕遮罩配置：用于在烧录新字幕前遮盖原视频自带字幕
+    subtitle_mask_enabled = false
+    subtitle_mask_landscape_x_percent = 10
+    subtitle_mask_landscape_y_percent = 78
+    subtitle_mask_landscape_width_percent = 80
+    subtitle_mask_landscape_height_percent = 14
+    subtitle_mask_landscape_blur_radius = 18
+    subtitle_mask_landscape_opacity_percent = 82
+    subtitle_mask_portrait_x_percent = 8
+    subtitle_mask_portrait_y_percent = 79
+    subtitle_mask_portrait_width_percent = 84
+    subtitle_mask_portrait_height_percent = 16
+    subtitle_mask_portrait_blur_radius = 26
+    subtitle_mask_portrait_opacity_percent = 84
+    subtitle_position_landscape_y_percent = 85
+    subtitle_position_portrait_y_percent = 82
+
 ##########################################
 # 代理和网络配置
 ##########################################
--- a/docs/voice-list.txt
+++ b/docs/voice-list.txt
@ -199,7 +199,7 @@ Gender: Male
 Name: en-AU-NatashaNeural
 Gender: Female

-Name: en-AU-WilliamNeural
+Name: en-AU-WilliamMultilingualNeural
 Gender: Male

 Name: en-CA-ClaraNeural
@ -286,21 +286,33 @@ Gender: Female
 Name: en-US-AndrewNeural
 Gender: Male

+Name: en-US-AndrewMultilingualNeural
+Gender: Male
+
 Name: en-US-AriaNeural
 Gender: Female

 Name: en-US-AvaNeural
 Gender: Female

+Name: en-US-AvaMultilingualNeural
+Gender: Female
+
 Name: en-US-BrianNeural
 Gender: Male

+Name: en-US-BrianMultilingualNeural
+Gender: Male
+
 Name: en-US-ChristopherNeural
 Gender: Male

 Name: en-US-EmmaNeural
 Gender: Female

+Name: en-US-EmmaMultilingualNeural
+Gender: Female
+
 Name: en-US-EricNeural
 Gender: Male

@ -583,12 +595,24 @@ Gender: Male
 Name: it-IT-ElsaNeural
 Gender: Female

-Name: it-IT-GiuseppeNeural
+Name: it-IT-GiuseppeMultilingualNeural
 Gender: Male

 Name: it-IT-IsabellaNeural
 Gender: Female

+Name: iu-Cans-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Cans-CA-TaqqiqNeural
+Gender: Male
+
+Name: iu-Latn-CA-SiqiniqNeural
+Gender: Female
+
+Name: iu-Latn-CA-TaqqiqNeural
+Gender: Male
+
 Name: ja-JP-KeitaNeural
 Gender: Male

@ -625,7 +649,7 @@ Gender: Male
 Name: kn-IN-SapnaNeural
 Gender: Female

-Name: ko-KR-HyunsuNeural
+Name: ko-KR-HyunsuMultilingualNeural
 Gender: Male

 Name: ko-KR-InJoonNeural
@ -739,7 +763,7 @@ Gender: Male
 Name: pt-BR-FranciscaNeural
 Gender: Female

-Name: pt-BR-ThalitaNeural
+Name: pt-BR-ThalitaMultilingualNeural
 Gender: Female

 Name: pt-PT-DuarteNeural
--- a/2
+++ b/2
@ -1 +1 @@
-0.7.9
+0.8.1
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,7 @@
 requests>=2.32.0
 moviepy==2.1.1
 edge-tts==7.2.7
-streamlit>=1.45.0
+streamlit==1.56.0
 watchdog==6.0.0
 loguru>=0.7.3
 tomli>=2.2.1
@ -35,6 +35,3 @@ tenacity>=9.0.0
 # torch>=2.0.0
 # torchvision>=0.15.0
 # torchaudio>=2.0.0
-
-# 剪映草稿导出依赖
-pyJianYingDraft>=0.1.0
--- a/webui.py
+++ b/webui.py
@ -2,6 +2,7 @@ import streamlit as st
 import os
 import sys
 import time
+from html import escape
 from loguru import logger
 from app.config import config
 from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
@ -9,6 +10,7 @@ from webui.components import basic_settings, video_settings, audio_settings, sub
 # from webui.utils import cache, file_utils
 from app.utils import utils
 from app.utils import ffmpeg_utils
+from app.models import const
 from app.models.schema import VideoClipParams, VideoAspect


@ -128,6 +130,82 @@ def tr(key):
    return loc.get("Translation", {}).get(key, key)


+VIDEO_GENERATION_STEP_LABELS = [
+    "正在加载剪辑脚本",
+    "正在生成 TTS 配音",
+    "正在按脚本裁剪视频片段",
+    "正在合并配音和字幕",
+    "正在合并视频片段",
+    "正在合成最终视频",
+]
+
+
+def _safe_int(value, default=0):
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+
+def _format_optional_percent(value):
+    try:
+        percent = max(0.0, min(100.0, float(value)))
+    except (TypeError, ValueError):
+        return None
+    if percent.is_integer():
+        return str(int(percent))
+    return f"{percent:.1f}"
+
+
+def _render_generation_status(task: dict | None) -> str:
+    task = task or {}
+    state = task.get("state")
+    current_step = _safe_int(task.get("step_current"), 0)
+    step_total = _safe_int(task.get("step_total"), len(VIDEO_GENERATION_STEP_LABELS))
+    message = str(task.get("message") or "")
+    ffmpeg_percent = _format_optional_percent(task.get("ffmpeg_progress"))
+
+    if current_step <= 0:
+        return f"<div style='font-weight:650;color:#262730;'>{escape(message or '正在生成视频，请稍候...')}</div>"
+
+    lines = []
+    for index, default_label in enumerate(VIDEO_GENERATION_STEP_LABELS, start=1):
+        is_current = index == current_step
+        is_complete = state == const.TASK_STATE_COMPLETE
+        is_done = is_complete or index < current_step
+        label = message if is_current and message else default_label
+
+        suffix = f"{index}/{step_total}"
+        if (
+            is_current
+            and index == step_total
+            and ffmpeg_percent is not None
+            and not is_complete
+        ):
+            suffix = f"{suffix}，ffmpeg {ffmpeg_percent}%"
+
+        color = "#262730" if is_current else "#8b9099" if is_done else "#b9bec7"
+        weight = "650" if is_current else "500"
+        lines.append(
+            "<div style='"
+            "font-size:1.02rem;"
+            "line-height:1.85;"
+            "margin:0.28rem 0;"
+            f"color:{color};"
+            f"font-weight:{weight};"
+            "'>"
+            f"{escape(label)} <span style='white-space:nowrap;'>({escape(suffix)})</span>"
+            "</div>"
+        )
+
+    return "".join(lines)
+
+
+def get_help_text():
+    """返回带当前项目版本号的帮助文案"""
+    return tr("Get Help").replace("🎉🎉🎉", f" v{config.project_version}")
+
+
 def render_generate_button():
    """渲染生成按钮和处理逻辑"""
    if st.button(tr("Generate Video"), use_container_width=True, type="primary"):
@ -143,10 +221,10 @@ def render_generate_button():
        # 移除task_id检查 - 现在使用统一裁剪策略，不再需要预裁剪
        # 直接检查必要的文件是否存在
        if not st.session_state.get('video_clip_json_path'):
-            st.error(tr("脚本文件不能为空"))
+            st.error(tr("Script file cannot be empty"))
            return
        if not st.session_state.get('video_origin_path'):
-            st.error(tr("视频文件不能为空"))
+            st.error(tr("Video file cannot be empty"))
            return

        # 获取所有参数
@ -169,79 +247,189 @@ def render_generate_button():
        # 生成一个新的task_id用于本次处理
        task_id = str(uuid.uuid4())

-        # 创建进度条
-        progress_bar = st.progress(0)
-        status_text = st.empty()
+        @st.dialog(tr("Generating Video"), width="large")
+        def generate_video_dialog():
+            st.markdown(
+                """
+                <style>
+                    div[data-testid="stDialog"] div[data-testid="stStatusWidget"] {
+                        margin-top: 0.25rem;
+                    }
+                    div[data-testid="stDialog"] div[data-testid="stProgress"] {
+                        margin-bottom: 0.75rem;
+                    }
+                    div[data-testid="stDialog"] video {
+                        max-height: 62vh;
+                        object-fit: contain;
+                        background: #000;
+                    }
+                </style>
+                """,
+                unsafe_allow_html=True,
+            )

-        def run_task():
-            try:
-                tm.start_subclip_unified(
-                    task_id=task_id,
-                    params=params
+            progress_bar = st.progress(0)
+            status_panel = st.status(tr("Generating Video"), expanded=True)
+            with status_panel:
+                status_placeholder = st.empty()
+                status_placeholder.markdown(
+                    _render_generation_status(None),
+                    unsafe_allow_html=True,
                )
-            except Exception as e:
-                logger.error(f"任务执行失败: {e}")
-                sm.state.update_task(task_id, state=const.TASK_STATE_FAILED, message=str(e))

-        # 在新线程中启动任务
-        thread = threading.Thread(target=run_task)
-        thread.start()
+            def run_task():
+                try:
+                    tm.start_subclip_unified(
+                        task_id=task_id,
+                        params=params
+                    )
+                except Exception as e:
+                    logger.error(f"任务执行失败: {e}")
+                    current_task = sm.state.get_task(task_id) or {}
+                    sm.state.update_task(
+                        task_id,
+                        state=const.TASK_STATE_FAILED,
+                        progress=current_task.get("progress", 0),
+                        message=str(e),
+                    )

-        # 轮询任务状态
-        while True:
-            task = sm.state.get_task(task_id)
-            if task:
-                progress = task.get("progress", 0)
-                state = task.get("state")
-                
-                # 更新进度条
-                progress_bar.progress(progress / 100)
-                status_text.text(f"Processing... {progress}%")
+            # 在新线程中启动任务
+            thread = threading.Thread(target=run_task)
+            thread.start()
+
+            last_status_key = None
+
+            # 轮询任务状态
+            while True:
+                task = sm.state.get_task(task_id)
+                if task:
+                    progress = task.get("progress", 0)
+                    state = task.get("state")

-                if state == const.TASK_STATE_COMPLETE:
-                    status_text.text(tr("视频生成完成"))
-                    progress_bar.progress(1.0)
-                    
-                    # 显示结果
-                    video_files = task.get("videos", [])
                    try:
-                        if video_files:
-                            player_cols = st.columns(len(video_files) * 2 + 1)
-                            for i, url in enumerate(video_files):
-                                player_cols[i * 2 + 1].video(url)
-                    except Exception as e:
-                        logger.error(f"播放视频失败: {e}")
-                    
-                    st.success(tr("视频生成完成"))
-                    break
-                
-                elif state == const.TASK_STATE_FAILED:
-                    st.error(f"任务失败: {task.get('message', 'Unknown error')}")
-                    break
-            
-            time.sleep(0.5)
+                        progress = int(progress)
+                    except (TypeError, ValueError):
+                        progress = 0
+                    progress = max(0, min(progress, 100))
+
+                    # 更新进度条和阶段状态
+                    progress_bar.progress(progress / 100)
+                    current_message = task.get("message") or f"Processing... {progress}%"
+                    status_key = (
+                        state,
+                        progress,
+                        current_message,
+                        task.get("step_current"),
+                        task.get("step_total"),
+                        task.get("ffmpeg_progress"),
+                    )
+                    if status_key != last_status_key:
+                        status_placeholder.markdown(
+                            _render_generation_status(task),
+                            unsafe_allow_html=True,
+                        )
+                        last_status_key = status_key
+
+                    if state == const.TASK_STATE_COMPLETE:
+                        status_panel.update(
+                            label=tr("Video Generation Completed"),
+                            state="complete",
+                            expanded=False,
+                        )
+                        progress_bar.progress(1.0)
+
+                        # 显示结果
+                        video_files = task.get("videos", [])
+                        try:
+                            if video_files:
+                                aspect = getattr(params, "video_aspect", "")
+                                aspect = getattr(aspect, "value", aspect)
+                                preview_width = 320 if aspect in {
+                                    VideoAspect.portrait.value,
+                                    VideoAspect.portrait_2.value,
+                                } else 600
+                                for url in video_files:
+                                    _, preview_col, _ = st.columns([1, 2, 1])
+                                    with preview_col:
+                                        st.video(url, width=preview_width)
+                        except Exception as e:
+                            logger.error(f"播放视频失败: {e}")
+
+                        st.success(tr("Video Generation Completed"))
+                        break
+
+                    if state == const.TASK_STATE_FAILED:
+                        status_panel.update(
+                            label=f"{tr('Task failed')}: {task.get('message', 'Unknown error')}",
+                            state="error",
+                            expanded=True,
+                        )
+                        st.error(f"{tr('Task failed')}: {task.get('message', 'Unknown error')}")
+                        break
+
+                time.sleep(0.5)
+
+        generate_video_dialog()


 def get_voice_name_for_tts_engine(tts_engine: str) -> str:
    """根据TTS引擎获取用户选择的音色"""
+    if tts_engine == 'edge_tts':
+        return config.ui.get('edge_voice_name', 'zh-CN-XiaoxiaoNeural-Female')
+    if tts_engine == 'azure_speech':
+        return config.ui.get('azure_voice_name', 'zh-CN-XiaoxiaoMultilingualNeural')
+    if tts_engine == 'tencent_tts':
+        return f"tencent:{config.ui.get('tencent_voice_type', '101001')}"
+    if tts_engine == 'qwen3_tts':
+        return f"qwen3:{config.ui.get('qwen_voice_type', 'Cherry')}"
+    if tts_engine == config.INDEXTTS2_ENGINE:
+        reference_audio = config.indextts2.get('reference_audio', '')
+        if reference_audio:
+            return f"{config.INDEXTTS2_VOICE_PREFIX}{reference_audio}"
+        return config.ui.get('voice_name', '')
+    if config.normalize_tts_engine_name(tts_engine) == config.INDEXTTS_ENGINE:
+        reference_audio = config.indextts.get('reference_audio', '')
+        if reference_audio:
+            return f"{config.INDEXTTS_VOICE_PREFIX}{reference_audio}"
+        return config.ui.get('voice_name', '')
+    if tts_engine == config.OMNIVOICE_ENGINE:
+        mode = config.omnivoice.get('mode', 'auto')
+        reference_audio = config.omnivoice.get('reference_audio', '')
+        if mode == 'voice_clone' and reference_audio:
+            return f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
+        return f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
    if tts_engine == 'doubaotts':
-        return st.session_state.get('voice_name', config.ui.get('doubaotts_voice_type', 'BV700_streaming'))
-    elif tts_engine == 'azure_speech':
-        return st.session_state.get('voice_name', config.ui.get('azure_voice_name', 'zh-CN-XiaoxiaoMultilingualNeural'))
-    else:
-        return st.session_state.get('voice_name', config.ui.get('edge_voice_name', 'zh-CN-XiaoxiaoNeural-Female'))
+        return config.ui.get('doubaotts_voice_type', 'BV700_streaming')
+    if tts_engine == 'soulvoice':
+        voice_uri = config.soulvoice.get('voice_uri', '')
+        if voice_uri and not voice_uri.startswith(('soulvoice:', 'speech:')):
+            return f"soulvoice:{voice_uri}"
+        return voice_uri
+    return config.ui.get('voice_name', config.ui.get('edge_voice_name', 'zh-CN-XiaoxiaoNeural-Female'))


-def get_jianying_export_params() -> VideoClipParams:
+def get_jianying_export_params(draft_name=None) -> VideoClipParams:
    """获取导出到剪映草稿的参数"""
-    tts_engine = st.session_state.get('tts_engine', 'azure')
+    tts_engine = st.session_state.get('tts_engine', config.ui.get('tts_engine', 'edge_tts'))
    voice_name = get_voice_name_for_tts_engine(tts_engine)
    voice_rate = st.session_state.get('voice_rate', 1.0)
    voice_pitch = st.session_state.get('voice_pitch', 1.0)
+    subtitle_paths = st.session_state.get('subtitle_paths', [])
+    if isinstance(subtitle_paths, str):
+        subtitle_paths = [subtitle_paths]
+    subtitle_paths = [
+        path for path in subtitle_paths
+        if isinstance(path, str) and path.strip()
+    ]
+    if not subtitle_paths and st.session_state.get('subtitle_path'):
+        subtitle_paths = [st.session_state.get('subtitle_path')]
    
    return VideoClipParams(
        video_clip_json_path=st.session_state['video_clip_json_path'],
        video_origin_path=st.session_state['video_origin_path'],
+        video_origin_paths=st.session_state.get('video_origin_paths', []),
+        original_subtitle_path=subtitle_paths[0] if subtitle_paths else "",
+        original_subtitle_paths=subtitle_paths,
        tts_engine=tts_engine,
        voice_name=voice_name,
        voice_rate=voice_rate,
@ -257,108 +445,208 @@ def get_jianying_export_params() -> VideoClipParams:
        tts_volume=st.session_state.get('tts_volume', 1.0),
        original_volume=st.session_state.get('original_volume', 0.7),
        bgm_volume=st.session_state.get('bgm_volume', 0.3),
-        draft_name=st.session_state.get('draft_name_input', f"NarratoAI_{int(time.time())}")
+        draft_name=(
+            draft_name
+            if draft_name is not None
+            else st.session_state.get('draft_name_input', f"NarratoAI_{int(time.time())}")
+        )
    )


+def _render_jianying_export_status():
+    """渲染剪映导出的结果提示。"""
+    result = st.session_state.get('jianying_export_result')
+    error = st.session_state.get('jianying_export_error')
+
+    if result:
+        st.success(tr("Jianying draft exported successfully").format(name=result['draft_name']))
+        st.info(tr("Draft saved to").format(path=result['draft_path']))
+    elif error:
+        st.error(f"{tr('Failed to export Jianying draft')}: {error}")
+
+
+def _render_jianying_export_dialog():
+    """使用弹窗确认剪映草稿名称。"""
+    import uuid
+    from loguru import logger
+
+    @st.dialog(tr("Export to Jianying Draft"), width="small")
+    def jianying_export_dialog():
+        jianying_draft_path = config.ui.get("jianying_draft_path", "")
+        dialog_title = escape(tr("Jianying export dialog title"))
+        dialog_description = escape(tr("Jianying export dialog description"))
+        destination_label = escape(tr("Jianying export destination"))
+        destination_path = escape(jianying_draft_path or "-")
+
+        st.markdown(
+            f"""
+            <style>
+                .jianying-export-panel {{
+                    display: flex;
+                    gap: 12px;
+                    align-items: flex-start;
+                    padding: 14px;
+                    margin: 2px 0 18px;
+                    border: 1px solid rgba(255, 75, 75, 0.24);
+                    border-radius: 8px;
+                    background: linear-gradient(135deg, rgba(255, 75, 75, 0.10), rgba(255, 255, 255, 0.96));
+                }}
+                .jianying-export-icon {{
+                    width: 38px;
+                    height: 38px;
+                    display: flex;
+                    align-items: center;
+                    justify-content: center;
+                    flex: 0 0 auto;
+                    border-radius: 8px;
+                    color: #ffffff;
+                    background: #ff4b4b;
+                    font-size: 20px;
+                    line-height: 1;
+                }}
+                .jianying-export-title {{
+                    color: #202534;
+                    font-size: 17px;
+                    font-weight: 700;
+                    line-height: 1.35;
+                    margin-bottom: 4px;
+                }}
+                .jianying-export-description {{
+                    color: #5f6575;
+                    font-size: 13px;
+                    line-height: 1.55;
+                }}
+                .jianying-export-path {{
+                    padding: 10px 12px;
+                    margin: 2px 0 16px;
+                    border: 1px solid #e4e7ef;
+                    border-radius: 8px;
+                    background: #f8f9fc;
+                    color: #323846;
+                    font-size: 13px;
+                    line-height: 1.45;
+                    word-break: break-all;
+                }}
+                .jianying-export-path-label {{
+                    display: block;
+                    color: #7a8192;
+                    font-size: 12px;
+                    margin-bottom: 4px;
+                }}
+            </style>
+            <div class="jianying-export-panel">
+                <div class="jianying-export-icon">📤</div>
+                <div>
+                    <div class="jianying-export-title">{dialog_title}</div>
+                    <div class="jianying-export-description">{dialog_description}</div>
+                </div>
+            </div>
+            <div class="jianying-export-path">
+                <span class="jianying-export-path-label">{destination_label}</span>
+                {destination_path}
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )
+
+        draft_name = st.text_input(
+            tr("Jianying draft name"),
+            key="draft_name_input",
+            placeholder="NarratoAI_",
+        )
+
+        error = st.session_state.get('jianying_export_error')
+        if error:
+            st.error(f"{tr('Failed to export Jianying draft')}: {error}")
+
+        cancel_col, confirm_col = st.columns(2)
+        with cancel_col:
+            if st.button(tr("Cancel"), key="cancel_export", use_container_width=True):
+                st.session_state['jianying_export_error'] = None
+                st.rerun()
+
+        with confirm_col:
+            if st.button(tr("Confirm Export"), key="confirm_export", type="primary", use_container_width=True):
+                draft_name = (draft_name or "").strip()
+                if not draft_name:
+                    st.error(tr("Please enter draft name"))
+                    return
+
+                # 创建任务ID
+                task_id = str(uuid.uuid4())
+                st.session_state['task_id'] = task_id
+
+                # 构建参数
+                try:
+                    params = get_jianying_export_params(draft_name)
+                except Exception as e:
+                    logger.error(f"构建参数失败: {e}")
+                    st.session_state['jianying_export_error'] = f"{tr('Failed to build parameters')}: {e}"
+                    st.error(st.session_state['jianying_export_error'])
+                    return
+
+                with st.spinner(tr("Exporting to Jianying draft...")):
+                    try:
+                        from app.services import jianying_task
+
+                        # 调用导出到剪映草稿的任务
+                        result = jianying_task.start_export_jianying_draft(task_id, params)
+
+                        # 记录日志
+                        logger.info(f"成功导出到剪映草稿: {result['draft_name']}")
+                        logger.info(f"草稿已保存到: {result['draft_path']}")
+
+                        # 保存结果到session state
+                        st.session_state['jianying_export_result'] = result
+                        st.session_state['jianying_export_error'] = None
+                        st.rerun()
+                    except Exception as e:
+                        logger.error(f"导出到剪映草稿失败: {e}")
+                        import traceback
+                        logger.error(f"错误详情: {traceback.format_exc()}")
+                        st.session_state['jianying_export_error'] = str(e)
+                        st.session_state['jianying_export_result'] = None
+                        st.error(f"{tr('Failed to export Jianying draft')}: {e}")
+
+    jianying_export_dialog()
+
+
 def render_export_jianying_button():
    """渲染导出到剪映草稿按钮和处理逻辑"""
    import os
    import time
-    import uuid
-    from loguru import logger
    
    # 初始化session state
-    if 'show_jianying_export_form' not in st.session_state:
-        st.session_state['show_jianying_export_form'] = False
    if 'jianying_export_result' not in st.session_state:
        st.session_state['jianying_export_result'] = None
    if 'jianying_export_error' not in st.session_state:
        st.session_state['jianying_export_error'] = None
    
-    if st.button("📤 导出到剪映草稿", use_container_width=True, type="secondary"):
+    if st.button(tr("Export to Jianying Draft"), use_container_width=True, type="secondary"):
        config.save_config()
        
        if not st.session_state.get('video_clip_json_path'):
-            st.error("脚本文件不能为空")
+            st.error(tr("Script file cannot be empty"))
            return
        if not st.session_state.get('video_origin_path'):
-            st.error("视频文件不能为空")
+            st.error(tr("Video file cannot be empty"))
            return
        
        jianying_draft_path = config.ui.get("jianying_draft_path", "")
        if not jianying_draft_path:
-            st.error("请在基础设置中配置剪映草稿地址")
+            st.error(tr("Please configure Jianying draft folder in basic settings"))
            return
        
        if not os.path.exists(jianying_draft_path):
-            st.error(f"剪映草稿文件夹不存在: {jianying_draft_path}")
+            st.error(tr("Jianying draft folder does not exist").format(path=jianying_draft_path))
            return
        
-        # 显示导出表单
-        st.session_state['show_jianying_export_form'] = True
        st.session_state['jianying_export_result'] = None
        st.session_state['jianying_export_error'] = None
+        st.session_state['draft_name_input'] = f"NarratoAI_{int(time.time())}"
+        _render_jianying_export_dialog()
    
-    # 显示导出表单
-    if st.session_state['show_jianying_export_form']:
-        st.markdown("---")
-        st.subheader("导出到剪映草稿")
-        
-        draft_name = st.text_input(
-            "请输入剪映草稿名称",
-            value=f"NarratoAI_{int(time.time())}",
-            key="draft_name_input"
-        )
-        
-        if st.button("确认导出", key="confirm_export"):
-            if not draft_name:
-                st.error("请输入草稿名称")
-                return
-            
-            # 创建任务ID
-            task_id = str(uuid.uuid4())
-            st.session_state['task_id'] = task_id
-            
-            # 构建参数
-            try:
-                params = get_jianying_export_params()
-            except Exception as e:
-                logger.error(f"构建参数失败: {e}")
-                st.error(f"参数构建失败: {e}")
-                return
-            
-            with st.spinner("正在导出到剪映草稿，请稍候..."):
-                try:
-                    from app.services import jianying_task
-                    
-                    # 调用导出到剪映草稿的任务
-                    result = jianying_task.start_export_jianying_draft(task_id, params)
-                    
-                    # 记录日志
-                    logger.info(f"成功导出到剪映草稿: {result['draft_name']}")
-                    logger.info(f"草稿已保存到: {result['draft_path']}")
-                    
-                    # 保存结果到session state
-                    st.session_state['jianying_export_result'] = result
-                    st.session_state['jianying_export_error'] = None
-                    st.session_state['show_jianying_export_form'] = False
-                    
-                    st.success(f"✅ 成功导出到剪映草稿: {result['draft_name']}")
-                    st.info(f"📁 草稿已保存到: {result['draft_path']}")
-                except Exception as e:
-                    logger.error(f"导出到剪映草稿失败: {e}")
-                    import traceback
-                    logger.error(f"错误详情: {traceback.format_exc()}")
-                    st.session_state['jianying_export_error'] = str(e)
-                    st.session_state['jianying_export_result'] = None
-                    st.error(f"❌ 导出到剪映草稿失败: {e}")
-        
-        if st.button("取消", key="cancel_export"):
-            st.session_state['show_jianying_export_form'] = False
-            st.session_state['jianying_export_result'] = None
-            st.session_state['jianying_export_error'] = None
-            st.rerun()
+    _render_jianying_export_status()



@ -379,7 +667,7 @@ def main():
            logger.error(f"❌ LLM 提供商注册失败: {str(e)}")
            import traceback
            logger.error(traceback.format_exc())
-            st.error(f"⚠️ LLM 初始化失败: {str(e)}\n\n请检查配置文件和依赖是否正确安装。")
+            st.error(tr("LLM initialization failed").format(error=str(e)))
            # 不抛出异常，允许应用继续运行（但 LLM 功能不可用）

    # 检测FFmpeg硬件加速，但只打印一次日志（使用 session_state 持久化）
@ -402,7 +690,7 @@ def main():
        logger.warning(f"资源初始化时出现警告: {e}")

    st.title(f"Narrato:blue[AI]:sunglasses: 📽️")
-    st.write(tr("Get Help"))
+    st.write(get_help_text())

    # 首先渲染不依赖PyTorch的UI部分
    # 渲染基础设置面板
--- a/webui/components/audio_settings.py
+++ b/webui/components/audio_settings.py
--- a/webui/components/basic_settings.py
+++ b/webui/components/basic_settings.py
@ -4,6 +4,8 @@ import streamlit as st
 import os
 from app.config import config
 from app.config.defaults import (
+    DEFAULT_LLM_GENERATION_CONFIG,
+    DEFAULT_LLM_THINKING_LEVELS,
    DEFAULT_OPENAI_COMPATIBLE_BASE_URL,
    DEFAULT_OPENAI_COMPATIBLE_PROVIDER,
    DEFAULT_TEXT_LLM_PROVIDER,
@ -26,7 +28,7 @@ OPENAI_COMPATIBLE_GATEWAY_BASE_URLS = {
 }


-def build_base_url_help(provider: str, model_type: str) -> tuple[str, bool, str]:
+def build_base_url_help(provider: str, model_type: str, tr=lambda key: key) -> tuple[str, bool, str]:
    """
    根据 provider 返回 Base URL 的帮助文案

@ -35,14 +37,14 @@ def build_base_url_help(provider: str, model_type: str) -> tuple[str, bool, str]
        requires_base: 是否强制提示必须填写 Base URL
        placeholder: 推荐的默认值（可为空字符串）
    """
-    default_help = "自定义 API 端点（可选），当使用自建或第三方代理时需要填写"
+    default_help = tr("Custom API endpoint help")
    provider_key = (provider or "").lower()
    example_url = OPENAI_COMPATIBLE_GATEWAY_BASE_URLS.get(provider_key)

    if example_url is not None:
-        extra = f"\n推荐接口地址: {example_url}" if example_url else ""
+        extra = f"\n{tr('Recommended API endpoint')}: {example_url}" if example_url else ""
        help_text = (
-            f"{model_type} 选择的提供商基于 OpenAI 兼容网关，必须填写完整的接口地址。"
+            f"{tr('OpenAI compatible gateway help').format(model_type=model_type)}"
            f"{extra}"
        )
        return help_text, True, example_url
@ -87,7 +89,7 @@ def validate_openai_compatible_model_name(model_name: str, model_type: str) -> t
    
    Args:
        model_name: 模型名称，应为 provider/model 格式
-        model_type: 模型类型（如"视频分析"、"文案生成"）
+        model_type: 模型类型（如"视觉分析"、"文案生成"）
        
    Returns:
        (是否有效, 错误消息)
@ -140,6 +142,113 @@ def show_config_validation_errors(errors: list):
            st.error(error)


+def update_app_config_if_changed(key: str, value) -> bool:
+    """Update app config only when the value really changed."""
+    if config.app.get(key) == value:
+        return False
+
+    config.app[key] = value
+    return True
+
+
+def render_openai_compatible_protocol_field(tr, label_key: str, key: str) -> None:
+    """Render the fixed OpenAI-compatible protocol as a non-selectable field."""
+    st.text_input(
+        tr(label_key),
+        value=tr("OpenAI compatible protocol"),
+        help=tr("OpenAI compatible protocol help"),
+        disabled=True,
+        key=key,
+    )
+
+
+def get_generation_config_value(model_prefix: str, param_name: str):
+    """Read a per-model generation parameter with a shared default."""
+    config_key = f"{model_prefix}_openai_{param_name}"
+    if config_key in config.app:
+        return config.app.get(config_key)
+
+    if model_prefix == "text" and param_name == "temperature":
+        return st.session_state.get("temperature", DEFAULT_LLM_GENERATION_CONFIG[param_name])
+
+    return DEFAULT_LLM_GENERATION_CONFIG[param_name]
+
+
+def render_llm_generation_settings(tr, model_prefix: str) -> dict:
+    """Render generation parameters directly below a model's Base URL."""
+    st.markdown(f"**{tr('Generation Settings')}**")
+
+    row1 = st.columns(2)
+    with row1[0]:
+        temperature = st.slider(
+            tr("Sampling Temperature"),
+            min_value=0.0,
+            max_value=2.0,
+            value=float(get_generation_config_value(model_prefix, "temperature")),
+            step=0.05,
+            help=tr("Sampling Temperature Help"),
+            key=f"{model_prefix}_openai_temperature_input",
+        )
+    with row1[1]:
+        top_p = st.slider(
+            tr("Top P"),
+            min_value=0.0,
+            max_value=1.0,
+            value=float(get_generation_config_value(model_prefix, "top_p")),
+            step=0.05,
+            help=tr("Top P Help"),
+            key=f"{model_prefix}_openai_top_p_input",
+        )
+
+    row2 = st.columns(2)
+    with row2[0]:
+        max_tokens = st.number_input(
+            tr("Max Output Tokens"),
+            min_value=0,
+            max_value=200000,
+            value=int(get_generation_config_value(model_prefix, "max_tokens")),
+            step=256,
+            help=tr("Max Output Tokens Help"),
+            key=f"{model_prefix}_openai_max_tokens_input",
+        )
+    with row2[1]:
+        current_thinking_level = str(get_generation_config_value(model_prefix, "thinking_level") or "auto")
+        if current_thinking_level not in DEFAULT_LLM_THINKING_LEVELS:
+            current_thinking_level = "auto"
+
+        thinking_level = st.selectbox(
+            tr("Thinking Level"),
+            options=DEFAULT_LLM_THINKING_LEVELS,
+            index=DEFAULT_LLM_THINKING_LEVELS.index(current_thinking_level),
+            format_func=lambda level: tr(f"Thinking Level {level.title()}"),
+            help=tr("Thinking Level Help"),
+            key=f"{model_prefix}_openai_thinking_level_input",
+        )
+
+    params = {
+        "temperature": round(float(temperature), 2),
+        "top_p": round(float(top_p), 2),
+        "max_tokens": int(max_tokens),
+        "thinking_level": thinking_level,
+    }
+
+    if model_prefix == "text":
+        st.session_state["temperature"] = params["temperature"]
+
+    return params
+
+
+def save_llm_generation_settings(model_prefix: str, params: dict) -> bool:
+    """Persist per-model generation parameters in app config."""
+    changed = False
+    for param_name, value in params.items():
+        config_key = f"{model_prefix}_openai_{param_name}"
+        changed |= update_app_config_if_changed(config_key, value)
+        st.session_state[config_key] = value
+
+    return changed
+
+
 def render_basic_settings(tr):
    """渲染基础设置面板"""
    with st.expander(tr("Basic Settings"), expanded=False):
@ -151,14 +260,24 @@ def render_basic_settings(tr):
        with left_config_panel:
            render_language_settings(tr)
            render_proxy_settings(tr)
+            render_tavily_search_settings(tr)

        with middle_config_panel:
-            render_vision_llm_settings(tr)  # 视频分析模型设置
+            render_vision_llm_settings(tr)  # 视觉分析模型设置

        with right_config_panel:
            render_text_llm_settings(tr)  # 文案生成模型设置


+def render_generation_settings(tr):
+    """渲染通用生成参数。"""
+    st.divider()
+    st.subheader(tr("Generation Settings"))
+    if 'temperature' not in st.session_state:
+        st.session_state['temperature'] = DEFAULT_LLM_GENERATION_CONFIG["temperature"]
+    st.slider("temperature", 0.0, 2.0, key="temperature")
+
+
 def render_language_settings(tr):
    st.subheader(tr("Proxy Settings"))

@ -218,15 +337,41 @@ def render_proxy_settings(tr):
        config.proxy["https"] = ""

    # 剪映草稿地址设置
-    st.subheader("剪映草稿设置")
+    st.subheader(tr("Jianying Draft Settings"))
    jianying_draft_path = st.text_input(
-        "剪映草稿文件夹路径",
+        tr("Jianying Draft Folder Path"),
        value=config.ui.get("jianying_draft_path", ""),
-        help="剪映草稿文件夹路径，例如：C:\\Users\\用户名\\Documents\\JianyingPro Drafts"
+        help=tr("Jianying Draft Folder Path Help")
    )
    config.ui["jianying_draft_path"] = jianying_draft_path


+def render_tavily_search_settings(tr):
+    """Render Tavily API key settings used by short drama web search."""
+    st.subheader(tr("Tavily Search Settings"))
+    st.markdown(
+        f"{tr('API Key URL')}: "
+        "[https://app.tavily.com](https://app.tavily.com)"
+    )
+
+    tavily_api_key = st.text_input(
+        tr("Tavily API Key"),
+        value=config.app.get("tavily_api_key", ""),
+        type="password",
+        help=tr("Tavily API Key Help"),
+        key="tavily_api_key_input",
+    )
+
+    if update_app_config_if_changed("tavily_api_key", str(tavily_api_key or "").strip()):
+        try:
+            config.save_config()
+            st.session_state["tavily_api_key"] = str(tavily_api_key or "").strip()
+            st.success(tr("Tavily config saved"))
+        except Exception as e:
+            st.error(f"{tr('Failed to save config')}: {str(e)}")
+            logger.error(f"保存 Tavily 配置失败: {str(e)}")
+
+
 def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
    """测试视觉模型连接

@ -435,7 +580,7 @@ def test_openai_compatible_text_model(api_key: str, base_url: str, model_name: s
        return False, f"连接失败: {error_msg}"

 def render_vision_llm_settings(tr):
-    """渲染视频分析模型设置（OpenAI 兼容 统一配置）"""
+    """渲染视觉分析模型设置（OpenAI 兼容 统一配置）"""
    st.subheader(tr("Vision Model Settings"))

    # 固定使用 OpenAI 兼容 提供商
@ -447,36 +592,35 @@ def render_vision_llm_settings(tr):
    vision_base_url = config.app.get("vision_openai_base_url", DEFAULT_OPENAI_COMPATIBLE_BASE_URL)
    
    # 固定 provider 为 openai，模型输入框保留完整模型名称
-    current_provider, current_model = get_openai_compatible_ui_values(
+    _current_provider, current_model = get_openai_compatible_ui_values(
        full_vision_model_name,
        DEFAULT_VISION_OPENAI_MODEL_NAME,
        provider=DEFAULT_VISION_LLM_PROVIDER,
    )
-
-    # 定义支持的 provider 列表
-    OPENAI_COMPATIBLE_PROVIDERS = ["openai"]
+    selected_provider = DEFAULT_VISION_LLM_PROVIDER

    # 渲染配置输入框
    col1, col2 = st.columns([1, 2])
    with col1:
-        selected_provider = st.selectbox(
-            tr("Vision Model Provider"),
-            options=OPENAI_COMPATIBLE_PROVIDERS,
-            index=OPENAI_COMPATIBLE_PROVIDERS.index(current_provider) if current_provider in OPENAI_COMPATIBLE_PROVIDERS else 0,
-            key="vision_provider_select"
+        render_openai_compatible_protocol_field(
+            tr,
+            "Vision Model Provider",
+            key="vision_openai_protocol_display",
        )
    
    with col2:
        model_name_input = st.text_input(
            tr("Vision Model Name"),
            value=current_model,
-            help="输入完整模型名称\n\n"
-                 "常用示例:\n"
-                 "• Qwen/Qwen3.5-122B-A10B\n"
-                 "• gemini/gemini-2.0-flash-lite\n"
-                 "• gpt-4o\n"
-                 "• Qwen/Qwen2.5-VL-32B-Instruct (SiliconFlow)\n\n"
-                 "支持常见 OpenAI 兼容网关（如 OpenAI/DeepSeek/OpenRouter/SiliconFlow）",
+            help=(
+                tr("Model Name Input Help")
+                + "\n\n"
+                + "• Qwen/Qwen3.5-122B-A10B\n"
+                + "• gemini/gemini-2.0-flash-lite\n"
+                + "• gpt-4o\n"
+                + "• Qwen/Qwen2.5-VL-32B-Instruct (SiliconFlow)\n\n"
+                + tr("OpenAI compatible providers help")
+            ),
            key="vision_model_input"
        )

@ -487,16 +631,18 @@ def render_vision_llm_settings(tr):
        tr("Vision API Key"),
        value=vision_api_key,
        type="password",
-        help="对应 provider 的 API 密钥\n\n"
-             "获取地址:\n"
-             "• Gemini: https://makersuite.google.com/app/apikey\n"
-             "• OpenAI: https://platform.openai.com/api-keys\n"
-             "• Qwen: https://bailian.console.aliyun.com/\n"
-             "• SiliconFlow: https://cloud.siliconflow.cn/account/ak"
+        help=(
+            tr("Provider API Key Help")
+            + "\n\n"
+            + "• Gemini: https://makersuite.google.com/app/apikey\n"
+            + "• OpenAI: https://platform.openai.com/api-keys\n"
+            + "• Qwen: https://bailian.console.aliyun.com/\n"
+            + "• SiliconFlow: https://cloud.siliconflow.cn/account/ak"
+        )
    )

    vision_base_help, vision_base_required, vision_placeholder = build_base_url_help(
-        selected_provider, "视频分析模型"
+        selected_provider, tr("Vision model"), tr
    )
    st_vision_base_url = st.text_input(
        tr("Vision Base URL"),
@ -506,15 +652,17 @@ def render_vision_llm_settings(tr):
    )
    if vision_base_required and not st_vision_base_url:
        info_example = vision_placeholder or "https://your-openai-compatible-endpoint/v1"
-        st.info(f"请在上方填写 OpenAI 兼容网关地址，例如：{info_example}")
+        st.info(tr("Please fill OpenAI compatible gateway").format(example=info_example))
+
+    vision_generation_params = render_llm_generation_settings(tr, "vision")

    # 添加测试连接按钮
    if st.button(tr("Test Connection"), key="test_vision_connection"):
        test_errors = []
        if not st_vision_api_key:
-            test_errors.append("请先输入 API 密钥")
+            test_errors.append(tr("Please enter API key"))
        if not model_name_input:
-            test_errors.append("请先输入模型名称")
+            test_errors.append(tr("Please enter model name"))

        if test_errors:
            for error in test_errors:
@ -534,8 +682,8 @@ def render_vision_llm_settings(tr):
                    else:
                        st.error(message)
                except Exception as e:
-                    st.error(f"测试连接时发生错误: {str(e)}")
-                    logger.error(f"OpenAI 兼容 视频分析模型连接测试失败: {str(e)}")
+                    st.error(f"{tr('Connection test error')}: {str(e)}")
+                    logger.error(f"OpenAI 兼容 视觉分析模型连接测试失败: {str(e)}")

    # 验证和保存配置
    validation_errors = []
@ -544,34 +692,42 @@ def render_vision_llm_settings(tr):
    # 验证模型名称
    if st_vision_model_name:
        # 这里的验证逻辑可能需要微调，因为我们现在是自动组合的
-        is_valid, error_msg = validate_openai_compatible_model_name(st_vision_model_name, "视频分析")
+        is_valid, error_msg = validate_openai_compatible_model_name(st_vision_model_name, "视觉分析")
        if is_valid:
-            config.app["vision_openai_model_name"] = st_vision_model_name
+            config_changed |= update_app_config_if_changed(
+                "vision_openai_model_name",
+                st_vision_model_name
+            )
            st.session_state["vision_openai_model_name"] = st_vision_model_name
-            config_changed = True
        else:
            validation_errors.append(error_msg)

    # 验证 API 密钥
    if st_vision_api_key:
-        is_valid, error_msg = validate_api_key(st_vision_api_key, "视频分析")
+        is_valid, error_msg = validate_api_key(st_vision_api_key, "视觉分析")
        if is_valid:
-            config.app["vision_openai_api_key"] = st_vision_api_key
+            config_changed |= update_app_config_if_changed(
+                "vision_openai_api_key",
+                st_vision_api_key
+            )
            st.session_state["vision_openai_api_key"] = st_vision_api_key
-            config_changed = True
        else:
            validation_errors.append(error_msg)

    # 验证 Base URL（可选）
    if st_vision_base_url:
-        is_valid, error_msg = validate_base_url(st_vision_base_url, "视频分析")
+        is_valid, error_msg = validate_base_url(st_vision_base_url, "视觉分析")
        if is_valid:
-            config.app["vision_openai_base_url"] = st_vision_base_url
+            config_changed |= update_app_config_if_changed(
+                "vision_openai_base_url",
+                st_vision_base_url
+            )
            st.session_state["vision_openai_base_url"] = st_vision_base_url
-            config_changed = True
        else:
            validation_errors.append(error_msg)

+    config_changed |= save_llm_generation_settings("vision", vision_generation_params)
+
    # 显示验证错误
    show_config_validation_errors(validation_errors)

@ -582,10 +738,10 @@ def render_vision_llm_settings(tr):
            # 清除缓存，确保下次使用新配置
            UnifiedLLMService.clear_cache()
            if st_vision_api_key or st_vision_base_url or st_vision_model_name:
-                st.success(f"视频分析模型配置已保存（OpenAI 兼容）")
+                st.success(tr("Vision model config saved"))
        except Exception as e:
-            st.error(f"保存配置失败: {str(e)}")
-            logger.error(f"保存视频分析配置失败: {str(e)}")
+            st.error(f"{tr('Failed to save config')}: {str(e)}")
+            logger.error(f"保存视觉分析配置失败: {str(e)}")


 def test_text_model_connection(api_key, base_url, model_name, provider, tr):
@ -704,36 +860,35 @@ def render_text_llm_settings(tr):
    text_base_url = config.app.get("text_openai_base_url", DEFAULT_OPENAI_COMPATIBLE_BASE_URL)

    # 固定 provider 为 openai，模型输入框保留完整模型名称
-    current_provider, current_model = get_openai_compatible_ui_values(
+    _current_provider, current_model = get_openai_compatible_ui_values(
        full_text_model_name,
        DEFAULT_TEXT_OPENAI_MODEL_NAME,
        provider=DEFAULT_TEXT_LLM_PROVIDER,
    )
-
-    # 定义支持的 provider 列表
-    OPENAI_COMPATIBLE_PROVIDERS = ["openai"]
+    selected_provider = DEFAULT_TEXT_LLM_PROVIDER

    # 渲染配置输入框
    col1, col2 = st.columns([1, 2])
    with col1:
-        selected_provider = st.selectbox(
-            tr("Text Model Provider"),
-            options=OPENAI_COMPATIBLE_PROVIDERS,
-            index=OPENAI_COMPATIBLE_PROVIDERS.index(current_provider) if current_provider in OPENAI_COMPATIBLE_PROVIDERS else 0,
-            key="text_provider_select"
+        render_openai_compatible_protocol_field(
+            tr,
+            "Text Model Provider",
+            key="text_openai_protocol_display",
        )
    
    with col2:
        model_name_input = st.text_input(
            tr("Text Model Name"),
            value=current_model,
-            help="输入完整模型名称\n\n"
-                 "常用示例:\n"
-                 "• Pro/zai-org/GLM-5\n"
-                 "• deepseek/deepseek-chat\n"
-                 "• gpt-4o\n"
-                 "• deepseek-ai/DeepSeek-R1 (SiliconFlow)\n\n"
-                 "支持常见 OpenAI 兼容网关（如 OpenAI/DeepSeek/OpenRouter/SiliconFlow）",
+            help=(
+                tr("Model Name Input Help")
+                + "\n\n"
+                + "• Pro/zai-org/GLM-5\n"
+                + "• deepseek/deepseek-chat\n"
+                + "• gpt-4o\n"
+                + "• deepseek-ai/DeepSeek-R1 (SiliconFlow)\n\n"
+                + tr("OpenAI compatible providers help")
+            ),
            key="text_model_input"
        )

@ -744,18 +899,20 @@ def render_text_llm_settings(tr):
        tr("Text API Key"),
        value=text_api_key,
        type="password",
-        help="对应 provider 的 API 密钥\n\n"
-             "获取地址:\n"
-             "• DeepSeek: https://platform.deepseek.com/api_keys\n"
-             "• Gemini: https://makersuite.google.com/app/apikey\n"
-             "• OpenAI: https://platform.openai.com/api-keys\n"
-             "• Qwen: https://bailian.console.aliyun.com/\n"
-             "• SiliconFlow: https://cloud.siliconflow.cn/account/ak\n"
-             "• Moonshot: https://platform.moonshot.cn/console/api-keys"
+        help=(
+            tr("Provider API Key Help")
+            + "\n\n"
+            + "• DeepSeek: https://platform.deepseek.com/api_keys\n"
+            + "• Gemini: https://makersuite.google.com/app/apikey\n"
+            + "• OpenAI: https://platform.openai.com/api-keys\n"
+            + "• Qwen: https://bailian.console.aliyun.com/\n"
+            + "• SiliconFlow: https://cloud.siliconflow.cn/account/ak\n"
+            + "• Moonshot: https://platform.moonshot.cn/console/api-keys"
+        )
    )

    text_base_help, text_base_required, text_placeholder = build_base_url_help(
-        selected_provider, "文案生成模型"
+        selected_provider, tr("Text model"), tr
    )
    st_text_base_url = st.text_input(
        tr("Text Base URL"),
@ -765,15 +922,17 @@ def render_text_llm_settings(tr):
    )
    if text_base_required and not st_text_base_url:
        info_example = text_placeholder or "https://your-openai-compatible-endpoint/v1"
-        st.info(f"请在上方填写 OpenAI 兼容网关地址，例如：{info_example}")
+        st.info(tr("Please fill OpenAI compatible gateway").format(example=info_example))
+
+    text_generation_params = render_llm_generation_settings(tr, "text")

    # 添加测试连接按钮
    if st.button(tr("Test Connection"), key="test_text_connection"):
        test_errors = []
        if not st_text_api_key:
-            test_errors.append("请先输入 API 密钥")
+            test_errors.append(tr("Please enter API key"))
        if not model_name_input:
-            test_errors.append("请先输入模型名称")
+            test_errors.append(tr("Please enter model name"))

        if test_errors:
            for error in test_errors:
@ -793,7 +952,7 @@ def render_text_llm_settings(tr):
                    else:
                        st.error(message)
                except Exception as e:
-                    st.error(f"测试连接时发生错误: {str(e)}")
+                    st.error(f"{tr('Connection test error')}: {str(e)}")
                    logger.error(f"OpenAI 兼容 文案生成模型连接测试失败: {str(e)}")

    # 验证和保存配置
@ -804,9 +963,11 @@ def render_text_llm_settings(tr):
    if st_text_model_name:
        is_valid, error_msg = validate_openai_compatible_model_name(st_text_model_name, "文案生成")
        if is_valid:
-            config.app["text_openai_model_name"] = st_text_model_name
+            text_config_changed |= update_app_config_if_changed(
+                "text_openai_model_name",
+                st_text_model_name
+            )
            st.session_state["text_openai_model_name"] = st_text_model_name
-            text_config_changed = True
        else:
            text_validation_errors.append(error_msg)

@ -814,9 +975,11 @@ def render_text_llm_settings(tr):
    if st_text_api_key:
        is_valid, error_msg = validate_api_key(st_text_api_key, "文案生成")
        if is_valid:
-            config.app["text_openai_api_key"] = st_text_api_key
+            text_config_changed |= update_app_config_if_changed(
+                "text_openai_api_key",
+                st_text_api_key
+            )
            st.session_state["text_openai_api_key"] = st_text_api_key
-            text_config_changed = True
        else:
            text_validation_errors.append(error_msg)

@ -824,12 +987,16 @@ def render_text_llm_settings(tr):
    if st_text_base_url:
        is_valid, error_msg = validate_base_url(st_text_base_url, "文案生成")
        if is_valid:
-            config.app["text_openai_base_url"] = st_text_base_url
+            text_config_changed |= update_app_config_if_changed(
+                "text_openai_base_url",
+                st_text_base_url
+            )
            st.session_state["text_openai_base_url"] = st_text_base_url
-            text_config_changed = True
        else:
            text_validation_errors.append(error_msg)

+    text_config_changed |= save_llm_generation_settings("text", text_generation_params)
+
    # 显示验证错误
    show_config_validation_errors(text_validation_errors)

@ -840,9 +1007,9 @@ def render_text_llm_settings(tr):
            # 清除缓存，确保下次使用新配置
            UnifiedLLMService.clear_cache()
            if st_text_api_key or st_text_base_url or st_text_model_name:
-                st.success(f"文案生成模型配置已保存（OpenAI 兼容）")
+                st.success(tr("Text model config saved"))
        except Exception as e:
-            st.error(f"保存配置失败: {str(e)}")
+            st.error(f"{tr('Failed to save config')}: {str(e)}")
            logger.error(f"保存文案生成配置失败: {str(e)}")

    # # Cloudflare 特殊配置
--- a/webui/components/script_settings.py
+++ b/webui/components/script_settings.py
--- a/webui/components/subtitle_settings.py
+++ b/webui/components/subtitle_settings.py
@ -1,47 +1,573 @@
-
-from loguru import logger
 import streamlit as st
 from app.config import config
+from app.utils import utils
 from webui.utils.cache import get_fonts_cache
+import hashlib
 import os


+SUBTITLE_MASK_DEFAULTS = {
+    "landscape": {
+        "x_percent": 10,
+        "y_percent": 78,
+        "width_percent": 80,
+        "height_percent": 14,
+        "blur_radius": 18,
+        "opacity_percent": 82,
+    },
+    "portrait": {
+        "x_percent": 8,
+        "y_percent": 79,
+        "width_percent": 84,
+        "height_percent": 16,
+        "blur_radius": 26,
+        "opacity_percent": 84,
+    },
+}
+
+SUBTITLE_POSITION_DEFAULTS = {
+    "landscape": {
+        "y_percent": 85,
+    },
+    "portrait": {
+        "y_percent": 82,
+    },
+}
+
+
+VIDEO_PREVIEW_UPLOAD_TYPES = ["mp4", "mov", "avi", "flv", "mkv", "mpeg4"]
+
+
 def render_subtitle_panel(tr):
    """渲染字幕设置面板"""
    with st.container(border=True):
        st.write(tr("Subtitle Settings"))
-        st.info("💡 提示：目前仅 **edge-tts** 引擎支持自动生成字幕，其他 TTS 引擎暂不支持。")

-        # 检查是否选择了 SoulVoice qwen3_tts引擎
-        from app.services import voice
-        # current_voice = st.session_state.get('voice_name', '')
        tts_engine = config.ui.get('tts_engine', '')
        is_disabled_subtitle = is_disabled_subtitle_settings(tts_engine)

        if is_disabled_subtitle:
-            # SoulVoice 引擎时显示禁用提示
-            st.warning(f"⚠️ {tts_engine}不支持精确字幕生成")
-            st.info("💡 建议使用专业剪辑工具（如剪映、PR等）手动添加字幕")
+            st.warning(tr("TTS engine does not support precise subtitles").format(engine=tts_engine))

-            # 强制禁用字幕
-            st.session_state['subtitle_enabled'] = False
+        enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
+        st.session_state['subtitle_enabled'] = enable_subtitles

-            # 显示禁用状态的复选框
-            st.checkbox(
-                tr("Enable Subtitles"),
-                value=False,
-                disabled=True,
-                help="SoulVoice 引擎不支持字幕生成，请使用其他 TTS 引擎"
-            )
+        if enable_subtitles:
+            render_subtitle_mask_settings(tr)
+            render_auto_transcription_settings(tr)
+            render_font_settings(tr)
+            render_position_settings(tr)
+            render_style_settings(tr)
        else:
-            # 其他引擎正常显示字幕选项
-            enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True)
-            st.session_state['subtitle_enabled'] = enable_subtitles
+            st.session_state['subtitle_mask_enabled'] = False
+            config.ui["subtitle_mask_enabled"] = False
+            st.session_state['subtitle_auto_transcribe_enabled'] = False
+            config.fun_asr["auto_transcribe_enabled"] = False

-            if enable_subtitles:
-                render_font_settings(tr)
-                render_position_settings(tr)
-                render_style_settings(tr)
+
+def _subtitle_mask_key(orientation, field):
+    return f"subtitle_mask_{orientation}_{field}"
+
+
+def _get_subtitle_mask_value(orientation, field):
+    key = _subtitle_mask_key(orientation, field)
+    return config.ui.get(key, SUBTITLE_MASK_DEFAULTS[orientation][field])
+
+
+def _set_subtitle_mask_value(orientation, field, value):
+    key = _subtitle_mask_key(orientation, field)
+    config.ui[key] = value
+    st.session_state[key] = value
+
+
+def _subtitle_position_key(orientation, field):
+    return f"subtitle_position_{orientation}_{field}"
+
+
+def _get_orientation_subtitle_position_value(orientation, field):
+    key = _subtitle_position_key(orientation, field)
+    return config.ui.get(key, SUBTITLE_POSITION_DEFAULTS[orientation][field])
+
+
+def _set_orientation_subtitle_position_value(orientation, field, value):
+    key = _subtitle_position_key(orientation, field)
+    config.ui[key] = value
+    st.session_state[key] = value
+
+
+def _format_preview_time(seconds):
+    seconds = max(0.0, float(seconds or 0))
+    minutes = int(seconds // 60)
+    remaining_seconds = seconds - minutes * 60
+    return f"{minutes:02d}:{remaining_seconds:04.1f}"
+
+
+def _get_current_preview_video_path():
+    uploaded_path = st.session_state.get("subtitle_mask_preview_video_path")
+    if uploaded_path and os.path.exists(uploaded_path):
+        return uploaded_path
+
+    video_path = st.session_state.get("video_origin_path", "")
+    if isinstance(video_path, str) and video_path and os.path.exists(video_path):
+        return video_path
+
+    video_paths = st.session_state.get("video_origin_paths", [])
+    if isinstance(video_paths, list):
+        for path in video_paths:
+            if isinstance(path, str) and path and os.path.exists(path):
+                return path
+
+    return ""
+
+
+def _save_subtitle_mask_preview_video(uploaded_file):
+    if uploaded_file is None:
+        return ""
+
+    signature = f"{uploaded_file.name}:{uploaded_file.size}"
+    existing_signature = st.session_state.get("subtitle_mask_preview_upload_signature")
+    existing_path = st.session_state.get("subtitle_mask_preview_video_path", "")
+    if signature == existing_signature and existing_path and os.path.exists(existing_path):
+        return existing_path
+
+    target_dir = utils.temp_dir("subtitle_mask_preview")
+    safe_name = os.path.basename(uploaded_file.name).strip() or "preview.mp4"
+    digest = hashlib.md5(signature.encode("utf-8")).hexdigest()[:10]
+    preview_path = os.path.join(target_dir, f"{digest}_{safe_name}")
+
+    with open(preview_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+
+    st.session_state["subtitle_mask_preview_upload_signature"] = signature
+    st.session_state["subtitle_mask_preview_video_path"] = preview_path
+    return preview_path
+
+
+def _video_mtime(video_path):
+    try:
+        return os.path.getmtime(video_path)
+    except OSError:
+        return 0
+
+
+@st.cache_data(show_spinner=False)
+def _probe_subtitle_mask_preview_video(video_path, mtime):
+    from moviepy import VideoFileClip
+
+    clip = VideoFileClip(video_path)
+    try:
+        return {
+            "duration": float(clip.duration or 0),
+            "width": int(clip.w),
+            "height": int(clip.h),
+        }
+    finally:
+        clip.close()
+
+
+@st.cache_data(show_spinner=False)
+def _extract_subtitle_mask_preview_frame(video_path, timestamp, mtime):
+    import numpy as np
+    from moviepy import VideoFileClip
+
+    clip = VideoFileClip(video_path)
+    try:
+        safe_time = min(max(float(timestamp or 0), 0.0), max(float(clip.duration or 0), 0.0))
+        frame = np.asarray(clip.get_frame(safe_time))
+        if frame.dtype != np.uint8:
+            frame = np.clip(frame, 0, 255).astype(np.uint8)
+        return frame
+    finally:
+        clip.close()
+
+
+def _build_subtitle_mask_preview_options():
+    options = {"subtitle_mask_enabled": True}
+    for orientation in ("landscape", "portrait"):
+        for field in ("x_percent", "y_percent", "width_percent", "height_percent", "blur_radius", "opacity_percent"):
+            options[_subtitle_mask_key(orientation, field)] = _get_subtitle_mask_value(orientation, field)
+        options[_subtitle_position_key(orientation, "y_percent")] = _get_orientation_subtitle_position_value(
+            orientation,
+            "y_percent",
+        )
+    return options
+
+
+def _draw_subtitle_mask_preview(frame):
+    from PIL import Image, ImageDraw
+    from app.services.generate_video import _resolve_subtitle_mask_region
+
+    image = Image.fromarray(frame).convert("RGBA")
+    region = _resolve_subtitle_mask_region(image.width, image.height, _build_subtitle_mask_preview_options())
+
+    overlay = Image.new("RGBA", image.size, (0, 0, 0, 0))
+    draw = ImageDraw.Draw(overlay)
+    rect = (
+        region["x"],
+        region["y"],
+        region["x"] + region["width"],
+        region["y"] + region["height"],
+    )
+    draw.rounded_rectangle(
+        rect,
+        radius=region["corner_radius"],
+        fill=(0, 0, 0, 96),
+        outline=(255, 75, 85, 235),
+        width=max(2, round(min(image.width, image.height) * 0.004)),
+    )
+    subtitle_y_percent = _get_orientation_subtitle_position_value(region["orientation"], "y_percent")
+    subtitle_y = round((image.height - 1) * subtitle_y_percent / 100)
+    line_width = max(2, round(min(image.width, image.height) * 0.004))
+    draw.line(
+        (0, subtitle_y, image.width, subtitle_y),
+        fill=(59, 130, 246, 220),
+        width=line_width,
+    )
+    image.alpha_composite(overlay)
+    return image.convert("RGB"), region
+
+
+def _resize_subtitle_mask_preview_image(image, max_width=520, max_height=360):
+    image = image.copy()
+    image.thumbnail((max_width, max_height))
+    return image
+
+
+def _render_subtitle_mask_preview(tr):
+    st.subheader(tr("Subtitle Mask Preview"))
+
+    uploaded_path = st.session_state.get("subtitle_mask_preview_video_path", "")
+    if uploaded_path and os.path.exists(uploaded_path):
+        preview_cols = st.columns([0.68, 0.32], vertical_alignment="center")
+        with preview_cols[0]:
+            st.caption(
+                tr("Using Subtitle Mask Preview Video").format(
+                    file=os.path.basename(uploaded_path)
+                )
+            )
+        with preview_cols[1]:
+            if st.button(
+                tr("Change Subtitle Mask Preview Video"),
+                key="change_subtitle_mask_preview_video",
+                use_container_width=True,
+            ):
+                st.session_state.pop("subtitle_mask_preview_video_path", None)
+                st.session_state.pop("subtitle_mask_preview_upload_signature", None)
+                st.rerun(scope="fragment")
+    else:
+        uploaded_file = st.file_uploader(
+            tr("Upload Subtitle Mask Preview Video"),
+            type=VIDEO_PREVIEW_UPLOAD_TYPES,
+            key="subtitle_mask_preview_video_uploader",
+            help=tr("Upload Subtitle Mask Preview Video Help"),
+        )
+        uploaded_path = _save_subtitle_mask_preview_video(uploaded_file)
+        if uploaded_path:
+            st.rerun(scope="fragment")
+
+    preview_video_path = uploaded_path or _get_current_preview_video_path()
+
+    if not preview_video_path:
+        st.info(tr("Subtitle Mask Preview Empty"))
+        return
+
+    try:
+        mtime = _video_mtime(preview_video_path)
+        video_info = _probe_subtitle_mask_preview_video(preview_video_path, mtime)
+        duration = max(0.0, video_info["duration"])
+        if duration <= 0:
+            st.warning(tr("Subtitle Mask Preview Failed"))
+            return
+
+        selected_time = st.slider(
+            tr("Subtitle Mask Preview Timeline"),
+            min_value=0.0,
+            max_value=duration,
+            value=min(float(st.session_state.get("subtitle_mask_preview_time", 0.0)), duration),
+            step=0.1,
+            format="%.1f",
+            key="subtitle_mask_preview_time",
+            help=tr("Subtitle Mask Preview Timeline Help"),
+        )
+        frame = _extract_subtitle_mask_preview_frame(preview_video_path, selected_time, mtime)
+        preview_image, region = _draw_subtitle_mask_preview(frame)
+        preview_image = _resize_subtitle_mask_preview_image(preview_image, max_width=420, max_height=280)
+        st.image(
+            preview_image,
+            caption=tr("Subtitle Mask Preview Frame Caption").format(
+                time=_format_preview_time(selected_time),
+                orientation=tr("Portrait") if region["orientation"] == "portrait" else tr("Landscape"),
+            ),
+        )
+    except Exception:
+        st.warning(tr("Subtitle Mask Preview Failed"))
+
+
+def _render_subtitle_mask_region_controls(tr, orientation):
+    x_percent = st.slider(
+        tr("Subtitle Mask Left"),
+        min_value=0,
+        max_value=99,
+        value=int(_get_subtitle_mask_value(orientation, "x_percent")),
+        help=tr("Subtitle Mask Left Help"),
+        key=f"{orientation}_subtitle_mask_x_percent",
+    )
+    _set_subtitle_mask_value(orientation, "x_percent", x_percent)
+
+    y_percent = st.slider(
+        tr("Subtitle Mask Top"),
+        min_value=0,
+        max_value=99,
+        value=int(_get_subtitle_mask_value(orientation, "y_percent")),
+        help=tr("Subtitle Mask Top Help"),
+        key=f"{orientation}_subtitle_mask_y_percent",
+    )
+    _set_subtitle_mask_value(orientation, "y_percent", y_percent)
+
+    max_width = max(2, 100 - x_percent)
+    width_widget_key = f"{orientation}_subtitle_mask_width_percent"
+    if st.session_state.get(width_widget_key, 2) < 2:
+        st.session_state[width_widget_key] = 2
+    if st.session_state.get(width_widget_key, 0) > max_width:
+        st.session_state[width_widget_key] = max_width
+    width_percent = st.slider(
+        tr("Subtitle Mask Width"),
+        min_value=2,
+        max_value=max_width,
+        value=min(int(_get_subtitle_mask_value(orientation, "width_percent")), max_width),
+        help=tr("Subtitle Mask Width Help"),
+        key=width_widget_key,
+    )
+    _set_subtitle_mask_value(orientation, "width_percent", width_percent)
+
+    max_height = max(2, 100 - y_percent)
+    height_widget_key = f"{orientation}_subtitle_mask_height_percent"
+    if st.session_state.get(height_widget_key, 2) < 2:
+        st.session_state[height_widget_key] = 2
+    if st.session_state.get(height_widget_key, 0) > max_height:
+        st.session_state[height_widget_key] = max_height
+    height_percent = st.slider(
+        tr("Subtitle Mask Height"),
+        min_value=2,
+        max_value=max_height,
+        value=min(int(_get_subtitle_mask_value(orientation, "height_percent")), max_height),
+        help=tr("Subtitle Mask Height Help"),
+        key=height_widget_key,
+    )
+    _set_subtitle_mask_value(orientation, "height_percent", height_percent)
+
+    blur_radius = st.slider(
+        tr("Subtitle Mask Blur Radius"),
+        min_value=0,
+        max_value=200,
+        value=int(_get_subtitle_mask_value(orientation, "blur_radius")),
+        help=tr("Subtitle Mask Blur Radius Help"),
+        key=f"{orientation}_subtitle_mask_blur_radius",
+    )
+    _set_subtitle_mask_value(orientation, "blur_radius", blur_radius)
+
+    opacity_percent = st.slider(
+        tr("Subtitle Mask Opacity"),
+        min_value=0,
+        max_value=100,
+        value=int(_get_subtitle_mask_value(orientation, "opacity_percent")),
+        help=tr("Subtitle Mask Opacity Help"),
+        key=f"{orientation}_subtitle_mask_opacity_percent",
+    )
+    _set_subtitle_mask_value(orientation, "opacity_percent", opacity_percent)
+
+
+def _render_subtitle_position_controls(tr, orientation):
+    y_percent = st.slider(
+        tr("Subtitle Burn Position"),
+        min_value=0,
+        max_value=99,
+        value=int(_get_orientation_subtitle_position_value(orientation, "y_percent")),
+        help=tr("Subtitle Burn Position Help"),
+        key=f"{orientation}_subtitle_burn_y_percent",
+    )
+    _set_orientation_subtitle_position_value(orientation, "y_percent", y_percent)
+
+
+def _render_subtitle_mask_dialog(tr):
+    @st.dialog(tr("Subtitle Mask Settings"), width="large")
+    def subtitle_mask_dialog():
+        preview_col, settings_col = st.columns([1, 1], vertical_alignment="top")
+
+        with settings_col:
+            st.caption(tr("Subtitle Mask Settings Caption"))
+            st.caption(tr("Subtitle Mask Preview Caption"))
+            landscape_mask_tab, portrait_mask_tab, landscape_position_tab, portrait_position_tab = st.tabs([
+                tr("Landscape Subtitle Mask"),
+                tr("Portrait Subtitle Mask"),
+                tr("Landscape Subtitle Position"),
+                tr("Portrait Subtitle Position"),
+            ])
+            with landscape_mask_tab:
+                _render_subtitle_mask_region_controls(tr, "landscape")
+            with portrait_mask_tab:
+                _render_subtitle_mask_region_controls(tr, "portrait")
+            with landscape_position_tab:
+                _render_subtitle_position_controls(tr, "landscape")
+            with portrait_position_tab:
+                _render_subtitle_position_controls(tr, "portrait")
+
+        with preview_col:
+            _render_subtitle_mask_preview(tr)
+
+        if st.button(tr("Save Subtitle Mask Settings"), type="primary", use_container_width=True):
+            config.save_config()
+            st.rerun()
+
+    subtitle_mask_dialog()
+
+
+def render_subtitle_mask_settings(tr):
+    """渲染原字幕遮罩设置。"""
+    mask_enabled = st.checkbox(
+        tr("Enable Subtitle Mask"),
+        value=bool(config.ui.get("subtitle_mask_enabled", False)),
+        help=tr("Enable Subtitle Mask Help"),
+        key="subtitle_mask_enabled_checkbox",
+    )
+    st.session_state['subtitle_mask_enabled'] = mask_enabled
+    config.ui["subtitle_mask_enabled"] = mask_enabled
+
+    if not mask_enabled:
+        return
+
+    button_col, summary_col = st.columns([0.35, 0.65], vertical_alignment="center")
+    with button_col:
+        if st.button(tr("Set Subtitle Mask"), key="set_subtitle_mask", use_container_width=True):
+            _render_subtitle_mask_dialog(tr)
+    with summary_col:
+        st.caption(
+            tr("Subtitle Mask Summary").format(
+                landscape_x=_get_subtitle_mask_value("landscape", "x_percent"),
+                landscape_y=_get_subtitle_mask_value("landscape", "y_percent"),
+                landscape_width=_get_subtitle_mask_value("landscape", "width_percent"),
+                landscape_height=_get_subtitle_mask_value("landscape", "height_percent"),
+                portrait_x=_get_subtitle_mask_value("portrait", "x_percent"),
+                portrait_y=_get_subtitle_mask_value("portrait", "y_percent"),
+                portrait_width=_get_subtitle_mask_value("portrait", "width_percent"),
+                portrait_height=_get_subtitle_mask_value("portrait", "height_percent"),
+            )
+        )
+
+
+def _get_saved_auto_transcribe_backend():
+    saved_backend = str(config.fun_asr.get("backend", "")).strip().lower()
+    if saved_backend not in {"local", "firered", "bailian"}:
+        saved_backend = (
+            "bailian"
+            if config.fun_asr.get("api_key") and not config.fun_asr.get("api_url")
+            else "local"
+        )
+    return saved_backend
+
+
+def render_auto_transcription_settings(tr):
+    """渲染最终视频自动转录设置。"""
+    from app.services import fun_asr_subtitle
+
+    auto_transcribe_enabled = st.checkbox(
+        tr("Enable Auto Transcription"),
+        value=bool(config.fun_asr.get("auto_transcribe_enabled", False)),
+        help=tr("Enable Auto Transcription Help"),
+        key="subtitle_auto_transcribe_enabled_checkbox",
+    )
+    st.session_state['subtitle_auto_transcribe_enabled'] = auto_transcribe_enabled
+    config.fun_asr["auto_transcribe_enabled"] = auto_transcribe_enabled
+
+    backend = _get_saved_auto_transcribe_backend()
+    api_url = config.fun_asr.get("api_url", fun_asr_subtitle.LOCAL_FUN_ASR_API_URL)
+    firered_api_url = config.fun_asr.get("firered_api_url", fun_asr_subtitle.LOCAL_FIRERED_ASR_API_URL)
+    hotword = config.fun_asr.get("hotword", "")
+    enable_spk = bool(config.fun_asr.get("enable_spk", False))
+    api_key = config.fun_asr.get("api_key", "")
+
+    if not auto_transcribe_enabled:
+        st.session_state['subtitle_auto_transcribe_backend'] = backend
+        st.session_state['subtitle_auto_transcribe_api_url'] = api_url
+        st.session_state['subtitle_auto_transcribe_firered_api_url'] = firered_api_url
+        st.session_state['subtitle_auto_transcribe_hotword'] = hotword
+        st.session_state['subtitle_auto_transcribe_enable_spk'] = enable_spk
+        st.session_state['subtitle_auto_transcribe_api_key'] = api_key
+        return
+
+    backend_options = {
+        tr("Local FunASR-Pack API"): "local",
+        tr("Local FireRedASR API"): "firered",
+        tr("Ali Bailian Online Fun-ASR"): "bailian",
+    }
+    backend_values = list(backend_options.values())
+    backend_labels = list(backend_options.keys())
+
+    backend_label = st.selectbox(
+        tr("Subtitle Processing Method"),
+        options=backend_labels,
+        index=backend_values.index(backend),
+        key="subtitle_auto_transcribe_backend_select",
+    )
+    backend = backend_options[backend_label]
+
+    if backend == "local":
+        st.caption(tr("Auto Transcription Local Caption"))
+        api_url = st.text_input(
+            tr("Local FunASR-Pack API URL"),
+            value=api_url,
+            help=tr("Local FunASR-Pack API URL Help"),
+            key="subtitle_auto_transcribe_api_url_input",
+        )
+        hotword = st.text_input(
+            tr("Fun-ASR Hotword"),
+            value=hotword,
+            help=tr("Fun-ASR Hotword Help"),
+            key="subtitle_auto_transcribe_hotword_input",
+        )
+        enable_spk = st.checkbox(
+            tr("Enable speaker diarization"),
+            value=enable_spk,
+            help=tr("Enable speaker diarization Help"),
+            key="subtitle_auto_transcribe_enable_spk_checkbox",
+        )
+    elif backend == "firered":
+        st.caption(tr("Auto Transcription FireRed Caption"))
+        firered_api_url = st.text_input(
+            tr("Local FireRedASR API URL"),
+            value=firered_api_url,
+            help=tr("Local FireRedASR API URL Help"),
+            key="subtitle_auto_transcribe_firered_api_url_input",
+        )
+    else:
+        st.caption(tr("Auto Transcription Online Caption"))
+        st.markdown(
+            f"{tr('API Key URL')}: "
+            "[https://bailian.console.aliyun.com/?tab=model#/api-key]"
+            "(https://bailian.console.aliyun.com/?tab=model#/api-key)"
+        )
+        api_key = st.text_input(
+            tr("Ali Bailian API Key"),
+            value=api_key,
+            type="password",
+            help=tr("Ali Bailian API Key Help"),
+            key="subtitle_auto_transcribe_api_key_input",
+        )
+
+    config.fun_asr["backend"] = backend
+    config.fun_asr["api_url"] = str(api_url).strip()
+    config.fun_asr["firered_api_url"] = str(firered_api_url).strip()
+    config.fun_asr["api_key"] = str(api_key).strip()
+    config.fun_asr["hotword"] = str(hotword).strip()
+    config.fun_asr["enable_spk"] = bool(enable_spk)
+    config.fun_asr["model"] = "fun-asr"
+
+    st.session_state['subtitle_auto_transcribe_backend'] = backend
+    st.session_state['subtitle_auto_transcribe_api_url'] = str(api_url).strip()
+    st.session_state['subtitle_auto_transcribe_firered_api_url'] = str(firered_api_url).strip()
+    st.session_state['subtitle_auto_transcribe_api_key'] = str(api_key).strip()
+    st.session_state['subtitle_auto_transcribe_hotword'] = str(hotword).strip()
+    st.session_state['subtitle_auto_transcribe_enable_spk'] = bool(enable_spk)


 def render_font_settings(tr):
@ -90,7 +616,7 @@ def render_font_settings(tr):

 def is_disabled_subtitle_settings(tts_engine:str)->bool:
    """是否禁用字幕设置"""
-    return tts_engine=="soulvoice" or tts_engine=="qwen3_tts"
+    return tts_engine=="soulvoice" or tts_engine=="qwen3_tts" or tts_engine==config.OMNIVOICE_ENGINE

 def render_position_settings(tr):
    """渲染位置设置"""
@ -154,6 +680,46 @@ def get_subtitle_params():
    font_name = st.session_state.get('font_name') or "SimHei"
    return {
        'subtitle_enabled': st.session_state.get('subtitle_enabled', True),
+        'subtitle_mask_enabled': st.session_state.get('subtitle_mask_enabled', False),
+        'subtitle_mask_landscape_x_percent': _get_subtitle_mask_value("landscape", "x_percent"),
+        'subtitle_mask_landscape_y_percent': _get_subtitle_mask_value("landscape", "y_percent"),
+        'subtitle_mask_landscape_width_percent': _get_subtitle_mask_value("landscape", "width_percent"),
+        'subtitle_mask_landscape_height_percent': _get_subtitle_mask_value("landscape", "height_percent"),
+        'subtitle_mask_landscape_blur_radius': _get_subtitle_mask_value("landscape", "blur_radius"),
+        'subtitle_mask_landscape_opacity_percent': _get_subtitle_mask_value("landscape", "opacity_percent"),
+        'subtitle_mask_portrait_x_percent': _get_subtitle_mask_value("portrait", "x_percent"),
+        'subtitle_mask_portrait_y_percent': _get_subtitle_mask_value("portrait", "y_percent"),
+        'subtitle_mask_portrait_width_percent': _get_subtitle_mask_value("portrait", "width_percent"),
+        'subtitle_mask_portrait_height_percent': _get_subtitle_mask_value("portrait", "height_percent"),
+        'subtitle_mask_portrait_blur_radius': _get_subtitle_mask_value("portrait", "blur_radius"),
+        'subtitle_mask_portrait_opacity_percent': _get_subtitle_mask_value("portrait", "opacity_percent"),
+        'subtitle_position_landscape_y_percent': _get_orientation_subtitle_position_value("landscape", "y_percent"),
+        'subtitle_position_portrait_y_percent': _get_orientation_subtitle_position_value("portrait", "y_percent"),
+        'subtitle_auto_transcribe_enabled': st.session_state.get('subtitle_auto_transcribe_enabled', False),
+        'subtitle_auto_transcribe_backend': st.session_state.get(
+            'subtitle_auto_transcribe_backend',
+            _get_saved_auto_transcribe_backend()
+        ),
+        'subtitle_auto_transcribe_api_url': st.session_state.get(
+            'subtitle_auto_transcribe_api_url',
+            config.fun_asr.get("api_url", "")
+        ),
+        'subtitle_auto_transcribe_firered_api_url': st.session_state.get(
+            'subtitle_auto_transcribe_firered_api_url',
+            config.fun_asr.get("firered_api_url", "")
+        ),
+        'subtitle_auto_transcribe_api_key': st.session_state.get(
+            'subtitle_auto_transcribe_api_key',
+            config.fun_asr.get("api_key", "")
+        ),
+        'subtitle_auto_transcribe_hotword': st.session_state.get(
+            'subtitle_auto_transcribe_hotword',
+            config.fun_asr.get("hotword", "")
+        ),
+        'subtitle_auto_transcribe_enable_spk': st.session_state.get(
+            'subtitle_auto_transcribe_enable_spk',
+            bool(config.fun_asr.get("enable_spk", False))
+        ),
        'font_name': font_name,
        'font_size': st.session_state.get('font_size', 60),
        'text_fore_color': st.session_state.get('text_fore_color', '#FFFFFF'),
--- a/webui/components/system_settings.py
+++ b/webui/components/system_settings.py
@ -3,6 +3,8 @@ import os
 import shutil
 from loguru import logger

+from app.config import config
+from app.utils import ffmpeg_detector, ffmpeg_utils
 from app.utils.utils import storage_dir


@ -27,6 +29,162 @@ def clear_directory(dir_path, tr):
    else:
        st.warning(tr("Directory does not exist"))

+
+def _format_engine_label(engines_by_path, tr):
+    def formatter(path):
+        engine = engines_by_path.get(path, {})
+        source = engine.get("source", "")
+        source_key = f"FFmpeg source {source}"
+        translated_source = tr(source_key)
+        if translated_source == source_key:
+            translated_source = source
+
+        version = str(engine.get("version_line", "")).replace("ffmpeg version", "").strip()
+        version = version or "unknown version"
+        status = _status_text(engine.get("available"), tr)
+        return f"{translated_source} - {version} - {path} ({status})"
+
+    return formatter
+
+
+def _status_text(value, tr):
+    return tr("Available") if value else tr("Unavailable")
+
+
+def _render_ffmpeg_report(report, tr):
+    st.write(f"**{tr('FFmpeg detection details')}**")
+    st.caption(f"{tr('Path')}: {report.get('path', '')}")
+    if report.get("version_line"):
+        st.caption(f"{tr('Version')}: {report['version_line']}")
+
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric("FFmpeg", _status_text(report.get("ffmpeg_available"), tr))
+    with col2:
+        st.metric("FFprobe", _status_text(report.get("ffprobe_available"), tr))
+    with col3:
+        hwaccel = report.get("hardware_acceleration", {})
+        st.metric(tr("Hardware Acceleration"), _status_text(hwaccel.get("available"), tr))
+    with col4:
+        subtitle_burn = report.get("subtitle_burn", {})
+        st.metric(tr("Subtitle Burn-in"), _status_text(subtitle_burn.get("available"), tr))
+
+    if report.get("ffmpeg_available") and report.get("subtitle_burn", {}).get("available"):
+        if report.get("hardware_acceleration", {}).get("available"):
+            st.success(tr("FFmpeg engine passed all checks"))
+        else:
+            st.warning(tr("FFmpeg engine works but hardware acceleration is unavailable"))
+    else:
+        st.error(tr("FFmpeg engine check failed"))
+
+    hwaccel = report.get("hardware_acceleration", {})
+    subtitle_burn = report.get("subtitle_burn", {})
+    col1, col2 = st.columns(2)
+    with col1:
+        st.write(f"**{tr('Hardware acceleration detail')}**")
+        st.write(f"- {tr('Type')}: {hwaccel.get('type') or '-'}")
+        st.write(f"- {tr('Encoder')}: {hwaccel.get('encoder') or '-'}")
+        st.write(f"- {tr('Message')}: {hwaccel.get('message') or '-'}")
+        hwaccels = report.get("hwaccels") or []
+        st.write(f"- {tr('Supported Hardware Methods')}: {', '.join(hwaccels) if hwaccels else '-'}")
+    with col2:
+        filters = subtitle_burn.get("filters") or {}
+        st.write(f"**{tr('Subtitle burn-in detail')}**")
+        st.write(f"- {tr('Method')}: {subtitle_burn.get('method') or '-'}")
+        st.write(f"- {tr('Message')}: {subtitle_burn.get('message') or '-'}")
+        st.write(
+            "- "
+            + tr("Subtitle Filters")
+            + ": "
+            + ", ".join(
+                f"{name}={_status_text(enabled, tr)}"
+                for name, enabled in filters.items()
+            )
+        )
+
+    errors = report.get("errors") or []
+    if errors:
+        with st.expander(tr("FFmpeg errors")):
+            for error in errors:
+                st.write(f"- {error}")
+
+    with st.expander(tr("Raw FFmpeg report")):
+        st.json(report)
+
+
+def render_ffmpeg_engine_settings(tr):
+    """Render FFmpeg engine discovery, selection and diagnostics."""
+    st.divider()
+    st.subheader(tr("FFmpeg Engine Detection"))
+
+    engines = ffmpeg_detector.discover_ffmpeg_engines(
+        configured_path=config.app.get("ffmpeg_path", ""),
+        root_dir=config.root_dir,
+    )
+    engines_by_path = {engine["path"]: engine for engine in engines}
+    engine_paths = list(engines_by_path.keys())
+
+    if not engine_paths:
+        st.warning(tr("No FFmpeg engines found"))
+
+    current_path = config.app.get("ffmpeg_path", "")
+    selected_index = 0
+    if current_path in engines_by_path:
+        selected_index = engine_paths.index(current_path)
+
+    selected_path = ""
+    if engine_paths:
+        selected_path = st.selectbox(
+            tr("FFmpeg Engine"),
+            options=engine_paths,
+            index=selected_index,
+            format_func=_format_engine_label(engines_by_path, tr),
+            help=tr("FFmpeg Engine Help"),
+        )
+
+    custom_path = st.text_input(
+        tr("Custom FFmpeg Path"),
+        value="",
+        help=tr("Custom FFmpeg Path Help"),
+        placeholder="/path/to/ffmpeg",
+    ).strip()
+    effective_path = custom_path or selected_path
+
+    active_path = config.app.get("ffmpeg_path", "")
+    if active_path:
+        st.caption(f"{tr('Current FFmpeg Engine')}: {active_path}")
+
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button(tr("Save FFmpeg Engine"), use_container_width=True, disabled=not effective_path):
+            try:
+                if not os.path.isfile(effective_path):
+                    st.error(tr("Selected FFmpeg path is invalid"))
+                else:
+                    config.app["ffmpeg_path"] = effective_path
+                    config.ffmpeg_path = effective_path
+                    config.apply_ffmpeg_path(effective_path)
+                    config.save_config()
+                    ffmpeg_utils.reset_hwaccel_detection()
+                    st.success(tr("FFmpeg engine saved"))
+            except Exception as e:
+                st.error(f"{tr('Failed to save config')}: {str(e)}")
+                logger.error(f"保存 FFmpeg 引擎失败: {e}")
+
+    with col2:
+        if st.button(tr("Test Selected FFmpeg"), use_container_width=True, disabled=not effective_path):
+            with st.spinner(tr("Testing FFmpeg engine")):
+                try:
+                    st.session_state["ffmpeg_engine_report"] = ffmpeg_detector.validate_ffmpeg_engine(effective_path)
+                except Exception as e:
+                    st.error(f"{tr('FFmpeg engine check failed')}: {str(e)}")
+                    logger.error(f"FFmpeg 引擎检测失败: {e}")
+
+    report = st.session_state.get("ffmpeg_engine_report")
+    if report:
+        _render_ffmpeg_report(report, tr)
+
+
 def render_system_panel(tr):
    """渲染系统设置面板"""
    with st.expander(tr("System settings"), expanded=False):
@ -43,3 +201,5 @@ def render_system_panel(tr):
        with col3:
            if st.button(tr("Clear tasks"), use_container_width=True):
                clear_directory(os.path.join(storage_dir(), "tasks"), tr)
+
+        render_ffmpeg_engine_settings(tr)
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@ -8,11 +8,23 @@
    "Script Files": "Script Files",
    "Generate Video Script and Keywords": "Click to use AI to generate **Video Script** and **Video Keywords** based on the **subject**",
    "Auto Detect": "Auto Detect",
-    "Auto Generate": "Auto Generate",
-    "Video Script": "Video Script (:blue[①Optional, use AI to generate ②Proper punctuation helps in generating subtitles])",
+    "Auto Generate": "Frame Analysis",
+    "Video Script": "Video Script",
+    "Edit Video Script": "View/Edit Video Script",
+    "Video script row count": "{count} script rows",
+    "Video script table help": "Edit the full script JSON as a table. You can add or delete rows; saving will validate and write the script file again.",
+    "Raw JSON Preview": "Raw JSON Preview",
+    "Script Column ID": "ID",
+    "Script Column Video ID": "Video",
+    "Script Column Video Name": "Video Name",
+    "Script Column Timestamp": "Timestamp",
+    "Script Column Picture": "Picture",
+    "Script Column Narration": "Narration",
+    "Script Column OST": "Mark",
+    "Generation Settings": "Generation Settings",
    "Save Script": "Save Script",
    "Crop Video": "Crop Video",
-    "Video File": "Video File (:blue[1️⃣Supports uploading video files (limit 2G) 2️⃣For large files, it is recommended to directly import them into the ./resource/videos directory])",
+    "Video File": "Video File",
    "Plot Description": "Plot Description (:blue[Can be obtained from https://www.tvmao.com/])",
    "Generate Video Keywords": "Click to use AI to generate **Video Keywords** based on the **script**",
    "Please Enter the Video Subject": "Please enter the video script first",
@ -41,9 +53,56 @@
    "Random Background Music": "Random Background Music",
    "Custom Background Music": "Custom Background Music",
    "Custom Background Music File": "Please enter the file path of the custom background music",
+    "Background Music Source": "Background Music Source",
+    "Background Music Source Help": "Choose background music from the resource directory, upload a new file, or disable background music.",
+    "Upload Background Music": "Upload Background Music",
+    "Background Music Path Help": "Choose the background music used for video synthesis.",
+    "No Background Music Resources Found": "No background music resources found. Please upload a background music file.",
+    "Preview Background Music Help": "Play the selected background music.",
+    "Upload Background Music File": "Upload Background Music File",
+    "Upload Background Music Help": "Upload an audio file to use as background music.",
+    "Background Music uploaded": "Background music uploaded: {path}",
    "Background Music Volume": "Background Music Volume (0.2 represents 20%, background sound should not be too loud)",
    "Subtitle Settings": "**Subtitle Settings**",
    "Enable Subtitles": "Enable Subtitles (If unchecked, the following settings will not take effect)",
+    "Enable Subtitle Mask": "Enable Subtitle Mask",
+    "Enable Subtitle Mask Help": "Before burning in new subtitles, cover the original subtitle area with a soft blurred mask.",
+    "Set Subtitle Mask": "Set Subtitle Mask",
+    "Subtitle Mask Summary": "Landscape {landscape_x}%/{landscape_y}% · {landscape_width}%×{landscape_height}%; portrait {portrait_x}%/{portrait_y}% · {portrait_width}%×{portrait_height}%",
+    "Subtitle Mask Settings": "Subtitle Mask Settings",
+    "Subtitle Mask Settings Caption": "Save landscape and portrait mask regions as frame percentages. The mask is applied before new subtitles are burned in.",
+    "Landscape Subtitle Mask": "Landscape Mask",
+    "Portrait Subtitle Mask": "Portrait Mask",
+    "Landscape Subtitle Position": "Landscape Subtitle Position",
+    "Portrait Subtitle Position": "Portrait Subtitle Position",
+    "Save Subtitle Mask Settings": "Save Subtitle Mask Settings",
+    "Subtitle Mask Left": "Left Position",
+    "Subtitle Mask Left Help": "Mask distance from the left edge as a frame percentage.",
+    "Subtitle Mask Top": "Top Position",
+    "Subtitle Mask Top Help": "Mask distance from the top edge as a frame percentage.",
+    "Subtitle Mask Width": "Mask Width",
+    "Subtitle Mask Width Help": "Width of the covered mask region as a frame percentage.",
+    "Subtitle Mask Height": "Mask Height",
+    "Subtitle Mask Height Help": "Height of the covered mask region as a frame percentage.",
+    "Subtitle Mask Blur Radius": "Blur Radius",
+    "Subtitle Mask Blur Radius Help": "Blur strength for the mask background and edge.",
+    "Subtitle Mask Opacity": "Mask Strength",
+    "Subtitle Mask Opacity Help": "Mask blend strength. Higher values cover source subtitles more strongly.",
+    "Subtitle Burn Position": "Subtitle Position",
+    "Subtitle Burn Position Help": "New subtitle distance from the top edge as a frame percentage. The blue line in preview shows this position.",
+    "Subtitle Mask Preview": "Source Subtitle Mask Preview",
+    "Subtitle Mask Preview Caption": "Upload a source video for preview, or use the currently selected source video. Uploaded files here are only used for mask preview.",
+    "Upload Subtitle Mask Preview Video": "Upload Preview Source Video",
+    "Upload Subtitle Mask Preview Video Help": "Only used for previewing the mask in this dialog. It will not replace the source video used for generation.",
+    "Using Subtitle Mask Preview Video": "Preview video: {file}",
+    "Change Subtitle Mask Preview Video": "Change Video",
+    "Subtitle Mask Preview Empty": "Upload a preview video, or select a source video above first.",
+    "Subtitle Mask Preview Timeline": "Preview Timeline (seconds)",
+    "Subtitle Mask Preview Timeline Help": "Drag to a frame where the source subtitles appear, then fine-tune the mask region.",
+    "Subtitle Mask Preview Frame Caption": "{time} · {orientation} · red outline is the mask, blue line is the subtitle position",
+    "Subtitle Mask Preview Failed": "Unable to read this video preview. Please try another video file.",
+    "Enable Auto Transcription": "Enable Auto Transcription",
+    "Enable Auto Transcription Help": "After the final video is merged, transcribe the whole video into subtitles and burn them into the output.",
    "Font": "Subtitle Font",
    "Position": "Subtitle Position",
    "Top": "Top",
@ -84,8 +143,560 @@
    "Synthesizing Voice": "Synthesizing voice, please wait...",
    "TTS Provider": "TTS Provider",
    "Hide Log": "Hide Log",
+    "Select from resource directory": "Select from resource directory",
+    "Select a video from resource videos directory": "Select a video from the ./resource/videos directory",
+    "Upload a new video file up to 2GB": "Upload a new video file, up to 2GB",
+    "Upload new video files up to 2GB each": "Upload one or more video files, up to 2GB each",
+    "Select Video": "Select Video",
+    "Choose a video file": "Choose a video file",
+    "Upload Video": "Upload Video",
+    "No video files found in resource videos directory": "No video files found in the ./resource/videos directory",
    "Upload Local Files": "Upload Local Files",
    "File Uploaded Successfully": "File Uploaded Successfully",
-    "Frame Interval (seconds)": "Frame Interval (seconds) (More keyframes consume more tokens)"
+    "Selected videos for processing": "Selected {count} video(s): {files}",
+    "Frame Interval (seconds)": "Frame Interval (seconds)",
+    "Generate Video Script": "Generate Video Script",
+    "Video Theme": "Video Theme",
+    "Generation Prompt": "Custom Prompt",
+    "Video LLM Provider": "Video Analysis Model",
+    "timestamp": "Timestamp",
+    "Picture description": "Picture Description",
+    "Narration": "Narration",
+    "Rebuild": "Regenerate",
+    "Load Video Script": "Load Video Script",
+    "Speech Pitch": "Speech Pitch",
+    "Please Select Script File": "Please Select Script File",
+    "Check Format": "Check Format",
+    "Script Loaded Successfully": "Script Loaded Successfully",
+    "Script loaded successfully": "Script loaded successfully",
+    "Script format check passed": "Script format check passed",
+    "Script format check failed": "Script format check failed",
+    "Failed to Load Script": "Failed to Load Script",
+    "Failed to load script": "Failed to load script",
+    "Failed to Save Script": "Failed to Save Script",
+    "Failed to save script": "Failed to save script",
+    "Script saved successfully": "Script saved successfully",
+    "Video Quality": "Video Quality",
+    "Custom prompt for LLM, leave empty to use default prompt": "Custom prompt for LLM. Leave empty to use the default prompt.",
+    "Proxy Settings": "Proxy Settings",
+    "HTTP_PROXY": "HTTP Proxy",
+    "HTTPs_PROXY": "HTTPS Proxy",
+    "Vision Model Settings": "Vision Model Settings",
+    "Vision Model Provider": "API Protocol",
+    "Vision API Key": "Vision API Key",
+    "Vision Base URL": "Vision Base URL",
+    "Vision Model Name": "Vision Model Name",
+    "Text Generation Model Settings": "Text Generation Model Settings",
+    "LLM Model Name": "LLM Model Name",
+    "LLM Model API Key": "LLM Model API Key",
+    "Text Model Provider": "API Protocol",
+    "Text API Key": "Text API Key",
+    "Text Base URL": "Text Base URL",
+    "Text Model Name": "Text Model Name",
+    "Top P": "Top P",
+    "Top K": "Top K",
+    "Max Output Tokens": "Max Output Tokens",
+    "Max Output Tokens Help": "Maximum generated output length. 0 uses the provider default.",
+    "Thinking Level": "Thinking Level",
+    "Thinking Level Help": "Controls reasoning effort. Auto sends no extra thinking parameter; low/medium/high tries reasoning_effort.",
+    "Thinking Level Auto": "Auto",
+    "Thinking Level Off": "Off",
+    "Thinking Level Low": "Low",
+    "Thinking Level Medium": "Medium",
+    "Thinking Level High": "High",
+    "Skip the first few seconds": "Skip the first few seconds",
+    "Difference threshold": "Difference Threshold",
+    "Vision processing batch size": "Vision Processing Batch Size",
+    "Test Connection": "Test Connection",
+    "Testing connection...": "Testing connection...",
+    "gemini model is available": "Gemini model is available",
+    "gemini model is not available": "Gemini model is not available",
+    "Unsupported provider": "Unsupported provider",
+    "0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio": "0: Keep the narration only, 1: Keep the original sound only, 2: Keep both original sound and narration",
+    "Text model is not available": "Text model is not available",
+    "Text model is available": "Text model is available",
+    "Upload Script": "Upload Script",
+    "Upload Script File": "Upload Script File",
+    "Script Uploaded Successfully": "Script Uploaded Successfully",
+    "Invalid JSON format": "Invalid JSON format",
+    "Upload failed": "Upload failed",
+    "Enable Proxy": "Enable Proxy",
+    "QwenVL model is available": "QwenVL model is available",
+    "QwenVL model is not available": "QwenVL model is not available",
+    "QwenVL model returned invalid response": "QwenVL model returned an invalid response",
+    "System settings": "System Settings",
+    "Clear Cache": "Clear Cache",
+    "Cache cleared": "Cache cleared",
+    "storage directory does not exist": "Storage directory does not exist",
+    "Failed to clear cache": "Failed to clear cache",
+    "Clear frames": "Clear frames",
+    "Clear clip videos": "Clear clip videos",
+    "Clear tasks": "Clear tasks",
+    "Directory cleared": "Directory cleared",
+    "Directory does not exist": "Directory does not exist",
+    "Failed to clear directory": "Failed to clear directory",
+    "FFmpeg Engine Detection": "FFmpeg Engine Detection",
+    "FFmpeg Engine": "FFmpeg Engine",
+    "FFmpeg Engine Help": "Choose the ffmpeg executable this app should prefer; the packaged runtime and local PATH are discovered automatically",
+    "No FFmpeg engines found": "No FFmpeg engines found",
+    "Custom FFmpeg Path": "Custom FFmpeg Path",
+    "Custom FFmpeg Path Help": "Paste an absolute path to an ffmpeg executable if the target engine is not listed",
+    "Current FFmpeg Engine": "Current FFmpeg Engine",
+    "Save FFmpeg Engine": "Save Engine",
+    "Test Selected FFmpeg": "Test Selected FFmpeg",
+    "Testing FFmpeg engine": "Testing FFmpeg engine...",
+    "FFmpeg engine saved": "FFmpeg engine saved",
+    "Selected FFmpeg path is invalid": "Selected FFmpeg path is invalid",
+    "FFmpeg detection details": "FFmpeg detection details",
+    "FFmpeg source Configured": "Configured",
+    "FFmpeg source NarratoAI packaged runtime": "NarratoAI packaged runtime",
+    "FFmpeg source Integrated runtime": "Integrated runtime",
+    "FFmpeg source System PATH": "System PATH",
+    "FFmpeg source Homebrew": "Homebrew",
+    "FFmpeg source Python environment": "Python environment",
+    "FFmpeg source Python executable folder": "Python executable folder",
+    "FFmpeg source IMAGEIO_FFMPEG_EXE": "IMAGEIO_FFMPEG_EXE",
+    "FFmpeg source imageio-ffmpeg": "imageio-ffmpeg",
+    "FFmpeg source System": "System",
+    "Version": "Version",
+    "Path": "Path",
+    "Available": "Available",
+    "Unavailable": "Unavailable",
+    "Hardware Acceleration": "Hardware Acceleration",
+    "Subtitle Burn-in": "Subtitle Burn-in",
+    "FFmpeg engine passed all checks": "FFmpeg engine passed all checks: basic execution, hardware acceleration and subtitle burn-in are available",
+    "FFmpeg engine works but hardware acceleration is unavailable": "FFmpeg and subtitle burn-in work, but hardware acceleration is unavailable; software encoding will be used",
+    "FFmpeg engine check failed": "FFmpeg engine check failed",
+    "Hardware acceleration detail": "Hardware acceleration detail",
+    "Subtitle burn-in detail": "Subtitle burn-in detail",
+    "Type": "Type",
+    "Encoder": "Encoder",
+    "Message": "Message",
+    "Method": "Method",
+    "Supported Hardware Methods": "Supported hardware methods",
+    "Subtitle Filters": "Subtitle filters",
+    "FFmpeg errors": "FFmpeg errors",
+    "Raw FFmpeg report": "Raw FFmpeg report",
+    "Subtitle Preview": "Subtitle Preview",
+    "One-Click Transcribe": "One-Click Transcribe",
+    "Transcribing...": "Transcribing...",
+    "Transcription Complete!": "Transcription Complete!",
+    "Transcription Failed. Please try again.": "Transcription failed. Please try again.",
+    "API rate limit exceeded. Please wait about an hour and try again.": "API rate limit exceeded. Please wait about an hour and try again.",
+    "Resources exhausted. Please try again later.": "Resources exhausted. Please try again later.",
+    "Transcription Failed": "Transcription Failed",
+    "Short Generate": "Short Drama Mix",
+    "Generate Short Video Script": "Generate Short Video Script",
+    "Adjust the volume of the original audio": "Adjust the volume of the original audio",
+    "Original Volume": "Original Volume",
+    "Frame Interval (seconds) (More keyframes consume more tokens)": "Frame Interval (seconds) (More keyframes consume more tokens)",
+    "Batch Size": "Batch Size",
+    "Batch Size (More keyframes consume more tokens)": "Batch Size (smaller batches consume more tokens)",
+    "Short Drama Summary": "Short Drama Summary",
+    "Film TV Narration": "Film/TV Narration",
+    "Video Type": "Creation Type",
+    "Select/Upload Script": "Custom Script",
+    "原生Gemini模型连接成功": "Native Gemini model connection succeeded",
+    "原生Gemini模型连接失败": "Native Gemini model connection failed",
+    "OpenAI兼容Gemini代理连接成功": "OpenAI-compatible Gemini proxy connection succeeded",
+    "OpenAI兼容Gemini代理连接失败": "OpenAI-compatible Gemini proxy connection failed",
+    "Connection failed": "Connection failed",
+    "自定义片段": "Custom Clips",
+    "设置需要生成的短视频片段数量": "Set the number of short video clips to generate",
+    "上传字幕文件": "Upload SRT",
+    "清除已上传字幕": "Clear Uploaded Subtitle",
+    "无法读取字幕文件，请检查文件编码（支持 UTF-8、UTF-16、GBK、GB2312）": "Unable to read the subtitle file. Please check the file encoding. Supported encodings: UTF-8, UTF-16, GBK, GB2312.",
+    "字幕文件内容似乎为空，请检查文件": "The subtitle file appears to be empty. Please check the file.",
+    "字幕上传成功": "Subtitle uploaded successfully",
+    "短剧名称": "Short Drama Name",
+    "影视名称": "Film/TV Title",
+    "解说语言": "Narration Language",
+    "自定义解说语言": "Custom Narration Language",
+    "例如：意大利语（意大利）": "For example: Italian (Italy)",
+    "请输入自定义解说语言": "Please enter a custom narration language",
+    "简体中文（中国）": "Simplified Chinese (China)",
+    "英语（美国）": "English (United States)",
+    "日语（日本）": "Japanese (Japan)",
+    "韩语（韩国）": "Korean (South Korea)",
+    "法语（法国）": "French (France)",
+    "德语（德国）": "German (Germany)",
+    "西班牙语（西班牙）": "Spanish (Spain)",
+    "葡萄牙语（巴西）": "Portuguese (Brazil)",
+    "俄语（俄罗斯）": "Russian (Russia)",
+    "自定义": "Custom",
+    "短剧类型": "Short Drama Type",
+    "自定义短剧类型": "Custom Short Drama Type",
+    "影视类型": "Film/TV Type",
+    "自定义影视类型": "Custom Film/TV Type",
+    "原片占比": "Original Footage Ratio",
+    "例如：豪门虐恋": "For example: billionaire angst romance",
+    "例如：悬疑犯罪": "For example: suspense crime",
+    "请输入自定义短剧类型": "Please enter a custom short drama type",
+    "请输入自定义影视类型": "Please enter a custom film/TV type",
+    "逆袭/复仇": "Counterattack / Revenge",
+    "霸总/甜宠": "CEO Romance / Sweet Romance",
+    "家庭伦理": "Family Ethics",
+    "古装/权谋": "Costume / Power Struggle",
+    "悬疑/犯罪": "Suspense / Crime",
+    "都市情感": "Urban Romance",
+    "年代/乡村": "Period / Rural",
+    "剧情/情感": "Drama / Emotion",
+    "动作/冒险": "Action / Adventure",
+    "喜剧/轻松": "Comedy / Light",
+    "科幻/奇幻": "Sci-Fi / Fantasy",
+    "历史/战争": "History / War",
+    "恐怖/惊悚": "Horror / Thriller",
+    "生成解说文案": "Generate Narration Copy",
+    "生成剪辑脚本": "Generate Editing Script",
+    "短剧解说文案": "Short Drama Narration Copy",
+    "影视解说文案": "Film/TV Narration Copy",
+    "Narration Copy Help": "Generate the narration copy first, review or rewrite it here, then generate the editing script to match footage and timestamps.",
+    "Narration copy generated successfully": "Narration copy generated. Please review and edit it.",
+    "生成短剧解说脚本": "Generate Short Drama Narration Script",
+    "请输入视频脚本": "Please enter the video script",
+    "TTS engine does not support precise subtitles": "⚠️ {engine} does not support precise subtitle generation",
+    "Manual subtitle editing recommendation": "💡 We recommend adding subtitles manually in a professional editor such as CapCut or Premiere Pro.",
+    "Disabled subtitles help": "This TTS engine does not support subtitle generation. Please use another TTS engine.",
+    "Tencent Cloud TTS": "Tencent Cloud TTS",
+    "Tongyi Qwen3 TTS": "Tongyi Qwen3 TTS",
+    "IndexTTS Voice Clone": "IndexTTS-1.5 Voice Clone",
+    "Doubao TTS": "Doubao TTS",
+    "Edge TTS features": "Completely free, but service stability can vary and voice cloning is not supported.",
+    "Edge TTS use case": "Testing and lightweight use",
+    "Azure Speech Services features": "Includes a free quota, then pay-as-you-go billing. An overseas credit card may be required.",
+    "Azure Speech Services use case": "Enterprise use cases that need a stable service",
+    "Tencent Cloud TTS features": "Includes a free quota, good voice quality, multiple voices, and fast access in mainland China.",
+    "Tencent Cloud TTS use case": "Personal and enterprise users who need stable Chinese speech synthesis",
+    "Tongyi Qwen3 TTS features": "Alibaba Cloud Tongyi Qwen speech synthesis with high-quality voices and multiple voice options.",
+    "High-quality Chinese speech synthesis use case": "Users who need high-quality Chinese speech synthesis",
+    "IndexTTS features": "A locally or privately deployed IndexTTS-1.5 voice-cloning engine. Choose a resource audio file or upload a reference audio file, then synthesize narration in that voice.",
+    "IndexTTS use case": "Best for fixed narrator voices, character dubbing, or generating multiple videos with the same voice. Start the IndexTTS-1.5 API service before use. Deployment package: https://pan.quark.cn/s/0767c9bcefd5",
+    "IndexTTS download link": "Download link: https://pan.quark.cn/s/0767c9bcefd5",
+    "IndexTTS2 features": "A locally or privately deployed IndexTTS-2 voice-cloning engine with emotion control and fuller generation parameters.",
+    "IndexTTS2 use case": "Best for fixed voices, emotional narration, and local speech synthesis workflows that need finer sampling controls. Start the IndexTTS-2 API service before use.",
+    "OmniVoice features": "A locally or privately deployed OmniVoice-Pack multilingual TTS engine with automatic voice generation, voice design, and reference-audio cloning.",
+    "OmniVoice use case": "Best for local controllable multilingual narration, voice design, or reference-audio cloning. Start the OmniVoice-Pack API service before use.",
+    "Doubao TTS features": "Volcengine Doubao speech synthesis with multiple voices and emotions, plus fast access in mainland China.",
+    "Local Deployment": "Local Deployment",
+    "Cloud Service": "Cloud Service",
+    "Select TTS Engine": "Select TTS Engine",
+    "Select TTS Engine Help": "Choose the text-to-speech engine you want to use.",
+    "TTS Engine Details": "{engine} Details",
+    "Features": "Features",
+    "Use Case": "Use Case",
+    "Registration URL": "Registration URL",
+    "Voice Selection": "Voice Selection",
+    "Select Edge TTS Voice": "Select an Edge TTS voice",
+    "Edge TTS Voice Description": "Edge TTS Voice Notes",
+    "Loaded voice count": "Loaded {count} voices",
+    "Female Voice": "Female voice",
+    "Male Voice": "Male voice",
+    "Voice Volume": "Voice Volume",
+    "Voice Volume Help Percent": "Adjust voice volume (0-100)",
+    "Voice Rate": "Voice Rate",
+    "Voice Rate Help 0.5-2.0": "Adjust voice speed (0.5-2.0x)",
+    "Voice Pitch": "Voice Pitch",
+    "Voice Pitch Help Percent": "Adjust voice pitch (-50% to +50%)",
+    "Service Region": "Service Region",
+    "Service Region Placeholder": "e.g. eastus",
+    "Azure Service Region Help": "Azure Speech Services region, such as eastus, westus2, or eastasia.",
+    "Azure Speech Key Help": "Azure Speech Services API key",
+    "Voice Name": "Voice Name",
+    "Azure Voice Name Help": "Enter an Azure Speech Services voice name. You can use the official voice name directly, such as zh-CN-YunzeNeural.",
+    "Common Voice Reference": "Common Voice Reference",
+    "Chinese Voices": "Chinese Voices",
+    "English Voices": "English Voices",
+    "Multilingual": "multilingual",
+    "Azure Voices Docs Notice": "For more voices, see the [Azure Speech Services documentation](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support).",
+    "Quick Select": "Quick Select",
+    "Chinese Female Voice": "Chinese Female Voice",
+    "Chinese Male Voice": "Chinese Male Voice",
+    "English Female Voice": "English Female Voice",
+    "Voice name valid": "Voice name is valid: {voice}",
+    "Voice name format may be invalid": "Voice name format may be incorrect: {voice}",
+    "Azure voice name format notice": "Azure voice names usually follow this format: [language]-[region]-[name]Neural",
+    "Azure Speech Services configured": "Azure Speech Services is configured",
+    "Please configure service region": "Please configure the service region",
+    "Please configure API Key": "Please configure the API Key",
+    "Task failed": "Task failed",
+    "Script file cannot be empty": "Script file cannot be empty",
+    "Video file cannot be empty": "Video file cannot be empty",
+    "Export to Jianying Draft": "📤 Export to Jianying Draft",
+    "Please configure Jianying draft folder in basic settings": "Please configure the Jianying draft folder in Basic Settings",
+    "Jianying draft folder does not exist": "Jianying draft folder does not exist: {path}",
+    "Jianying export dialog title": "Confirm draft name",
+    "Jianying export dialog description": "Confirm the Jianying draft name before exporting. Once complete, you can open it from the Jianying draft folder.",
+    "Jianying export destination": "Save location",
+    "Jianying draft name": "Draft name",
+    "Please enter Jianying draft name": "Please enter the Jianying draft name",
+    "Confirm Export": "Confirm Export",
+    "Please enter draft name": "Please enter a draft name",
+    "Failed to build parameters": "Failed to build parameters",
+    "Exporting to Jianying draft...": "Exporting to Jianying draft, please wait...",
+    "Jianying draft exported successfully": "✅ Successfully exported to Jianying draft: {name}",
+    "Draft saved to": "📁 Draft saved to: {path}",
+    "Failed to export Jianying draft": "❌ Failed to export Jianying draft",
+    "Cancel": "Cancel",
+    "LLM initialization failed": "⚠️ LLM initialization failed: {error}\n\nPlease check whether the configuration file and dependencies are installed correctly.",
+    "Jianying Draft Settings": "Jianying Draft Settings",
+    "Jianying Draft Folder Path": "Jianying Draft Folder Path",
+    "Jianying Draft Folder Path Help": "Jianying draft folder path, for example: C:\\Users\\Username\\Documents\\JianyingPro Drafts",
+    "Custom API endpoint help": "OpenAI-compatible endpoint URL. Use a full /v1 URL for third-party or self-hosted gateways; leave empty for the official OpenAI API.",
+    "Recommended API endpoint": "Recommended endpoint",
+    "OpenAI compatible gateway help": "{model_type} uses an OpenAI-compatible API, so a complete endpoint URL is required.",
+    "Vision model": "Vision model",
+    "Text model": "Text model",
+    "Model Name Input Help": "Enter the full model name.\n\nCommon examples:",
+    "OpenAI compatible providers help": "The vendor is not limited here; OpenAI, DeepSeek, OpenRouter, SiliconFlow, or a self-hosted gateway all work as long as the endpoint is OpenAI-compatible.",
+    "OpenAI compatible protocol": "OpenAI-compatible",
+    "OpenAI compatible protocol help": "This does not require the official OpenAI model; any service that supports the OpenAI Chat Completions compatible API can be used.",
+    "Provider API Key Help": "API key for the model service.\n\nCommon places to get one:",
+    "Please fill OpenAI compatible gateway": "Please fill in the OpenAI-compatible gateway URL above, for example: {example}",
+    "Please enter API key": "Please enter the API key first",
+    "Please enter model name": "Please enter the model name first",
+    "Connection test error": "An error occurred while testing the connection",
+    "Vision model config saved": "Vision model configuration saved (OpenAI compatible)",
+    "Text model config saved": "Text generation model configuration saved (OpenAI compatible)",
+    "Failed to save config": "Failed to save configuration",
+    "Custom Position (% from top)": "Custom Position (% from top)",
+    "Please enter a value between 0 and 100": "Please enter a value between 0 and 100",
+    "Please enter a valid number": "Please enter a valid number",
+    "None": "None",
+    "Uploaded subtitle": "Uploaded subtitle: {file}",
+    "Encoding": "Encoding",
+    "Size": "Size",
+    "Characters": "characters",
+    "Ali Bailian Fun-ASR Subtitle Transcription": "Subtitle Processing",
+    "Subtitle Processing Method": "Subtitle Processing Method",
+    "Fun-ASR Backend": "Fun-ASR Backend",
+    "Local FunASR-Pack API": "FunASR (Local)",
+    "Local FireRedASR API": "FireRedASR2 (Local)",
+    "Ali Bailian Online Fun-ASR": "FunASR (Online)",
+    "Local Fun-ASR upload caption": "The current video above will be converted to SRT subtitles through the locally running FunASR-Pack API.",
+    "Local FireRed-ASR upload caption": "The current video above will be converted to SRT subtitles through the locally running FireRedASR2-AED-Pack API.",
+    "Fun-ASR upload caption": "The current video above will be uploaded to temporary Ali Bailian storage and converted to SRT subtitles with fun-asr.",
+    "Auto Transcription Local Caption": "After the final video is merged, it will be converted to SRT subtitles through the locally running FunASR-Pack API.",
+    "Auto Transcription FireRed Caption": "After the final video is merged, it will be converted to SRT subtitles through the locally running FireRedASR2-AED-Pack API.",
+    "Auto Transcription Online Caption": "After the final video is merged, it will be uploaded to temporary Ali Bailian storage and converted to SRT subtitles with fun-asr.",
+    "Local FunASR-Pack API URL": "Local FunASR-Pack API URL",
+    "Local FunASR-Pack API URL Help": "For example, http://127.0.0.1:7860. A full /asr endpoint URL is also supported.",
+    "Local FireRedASR API URL": "Local ASR API URL",
+    "Local FireRedASR API URL Help": "For example, http://127.0.0.1:7867. A full /asr endpoint URL is also supported.",
+    "Fun-ASR Hotword": "Hotword",
+    "Fun-ASR Hotword Help": "Optional hotwords passed to the local FunASR-Pack API.",
+    "Enable speaker diarization": "Enable speaker diarization",
+    "Enable speaker diarization Help": "Requires the local FunASR-Pack service to enable and load the spk model.",
+    "API Key URL": "API Key URL",
+    "Ali Bailian API Key": "Ali Bailian API Key",
+    "Ali Bailian API Key Help": "Enter your Ali Bailian API Key. After saving, it will be written to the local config.toml file.",
+    "Upload media to transcribe": "Upload audio/video to transcribe",
+    "Using selected video for subtitle transcription": "Using current video for subtitle transcription: {file}",
+    "Using selected videos for subtitle transcription": "Using {count} current videos for subtitle transcription: {files}",
+    "Please select or upload a video first": "Please select or upload a video file above first",
+    "Selected video file does not exist": "The selected video file does not exist. Please select or upload it again",
+    "Selected video files do not exist": "These selected video files do not exist. Please select or upload them again: {files}",
+    "Transcribe subtitles": "Transcribe Subtitles",
+    "Calibrate subtitles": "Calibrate Subtitles",
+    "Please enter Ali Bailian API Key": "Please enter the Ali Bailian API Key first",
+    "Please enter local FunASR-Pack API URL": "Please enter the local FunASR-Pack API URL first",
+    "Please enter local FireRedASR API URL": "Please enter the local ASR API URL first",
+    "Please upload media to transcribe": "Please upload the audio or video file to transcribe first",
+    "Transcribing with local FunASR-Pack...": "Transcribing subtitles with local FunASR-Pack, please wait...",
+    "Transcribing with local FireRedASR...": "Transcribing subtitles with local ASR, please wait...",
+    "Transcribing with Fun-ASR...": "Transcribing subtitles with Ali Bailian Fun-ASR, please wait...",
+    "Fun-ASR failed without subtitle file": "Fun-ASR transcription failed: no subtitle file was generated",
+    "Subtitle transcription succeeded": "Subtitle transcription succeeded: {file}",
+    "Subtitle transcription succeeded for multiple files": "Subtitle transcription succeeded for {count} files: {files}",
+    "Calibrating subtitles...": "Calibrating subtitles with the LLM, please wait...",
+    "Subtitle calibration succeeded": "Subtitle calibration succeeded: {file}",
+    "Subtitle calibration succeeded for multiple files": "Subtitle calibration succeeded for {count} files: {files}",
+    "Subtitle calibration failed": "Subtitle calibration failed",
+    "Transcribed subtitles storage hint": "Previously transcribed subtitles are saved in {path}; drag a file from that folder to upload",
+    "Tavily Search Settings": "Tavily Web Search",
+    "Tavily API Key": "Tavily API Key",
+    "Tavily API Key Help": "Used for web search before plot analysis. When Web Search is enabled, the app searches plot, character, and background context by title, then combines it with subtitles.",
+    "Tavily config saved": "Tavily configuration saved",
+    "联网搜索": "Web Search",
+    "Enable Web Search Help": "When enabled, plot analysis searches the web with Tavily by title before combining those results with subtitles.",
+    "Please configure Tavily API Key in Basic Settings": "Please configure the Tavily API Key in Basic Settings first",
+    "Please enter short drama name before web search": "Please enter the short drama name before enabling web search",
+    "Please enter film/tv title before web search": "Please enter the film/TV title before enabling web search",
+    "Searching short drama with Tavily...": "Searching short drama context with Tavily...",
+    "Tavily search failed": "Tavily search failed",
+    "剧情理解": "Plot Analysis",
+    "剧情理解结果": "Plot Analysis Result",
+    "Analyzing plot...": "Analyzing plot...",
+    "Plot analysis completed": "Plot analysis completed",
+    "Please generate or upload subtitles first": "Please transcribe or upload subtitles first",
+    "Please transcribe or upload subtitles first": "Please transcribe or upload subtitles first",
+    "Fun-ASR transcription failed": "Fun-ASR transcription failed",
+    "Validating script format...": "Validating script format...",
+    "Script format validation failed": "Script format validation failed",
+    "Error Message": "Error Message",
+    "Details": "Details",
+    "Correct script format example": "Correct script format example",
+    "Script format validation error": "An error occurred during script format validation",
+    "Script validated and saved successfully": "✅ Script format validated and saved successfully!",
+    "Tencent Secret ID Help": "Enter your Tencent Cloud Secret ID",
+    "Tencent Secret Key Help": "Enter your Tencent Cloud Secret Key",
+    "Tencent Service Region Help": "Select the Tencent Cloud TTS service region",
+    "Custom Voice": "Custom Voice",
+    "Select Tencent TTS Voice": "Select a Tencent Cloud TTS voice",
+    "Tencent Cloud TTS Voice Description": "Tencent Cloud TTS Voice Notes",
+    "Female Voices": "Female Voices",
+    "Male Voices": "Male Voices",
+    "Tencent More Voices Notice": "See the official Tencent Cloud documentation for more voices.",
+    "Qwen DashScope API Key Help": "Tongyi Qwen DashScope API Key",
+    "TTS Model Name": "TTS Model Name",
+    "Qwen TTS Model Help": "Qwen TTS model name, for example qwen3-tts-flash",
+    "Select Qwen3 TTS Voice": "Select a Qwen3 TTS voice",
+    "API URL": "API URL",
+    "IndexTTS API URL Help": "IndexTTS-1.5 API service URL",
+    "IndexTTS2 API URL Help": "IndexTTS-2 API service URL. You can enter the service root or the full /tts endpoint.",
+    "OmniVoice API URL Help": "OmniVoice-Pack API service URL. You can enter the service root or the full /tts endpoint.",
+    "OmniVoice Language Code": "Synthesis Language",
+    "OmniVoice Language Code Help": "The language parameter sent to OmniVoice-Pack, such as zh or en.",
+    "OmniVoice Generation Mode": "Generation Mode",
+    "OmniVoice Generation Mode Help": "Automatic voice needs no extra fields; voice design uses an instruction; reference-audio cloning needs reference audio and matching text.",
+    "OmniVoice Mode Auto": "Automatic Voice",
+    "OmniVoice Mode Voice Design": "Voice Design",
+    "OmniVoice Mode Voice Clone": "Reference Audio Clone",
+    "OmniVoice Instruct": "Voice Instruction",
+    "OmniVoice Instruct Help": "Describe the desired voice, such as gender, pitch, accent, or style.",
+    "OmniVoice Instruct Placeholder": "e.g. female, low pitch, british accent",
+    "OmniVoice Reference Text": "Reference Audio Text",
+    "OmniVoice Reference Text Help": "The exact transcript of the reference audio. Required when the deployed service has ASR disabled.",
+    "OmniVoice Reference Text Placeholder": "Enter the text spoken in the reference audio",
+    "OmniVoice Num Step Help": "Diffusion generation steps. Higher values usually improve quality but slow generation.",
+    "OmniVoice Guidance Scale Help": "Controls how strongly text conditions guide generation.",
+    "OmniVoice Duration": "Target Duration (seconds)",
+    "OmniVoice Duration Help": "0 lets the model decide the duration automatically.",
+    "OmniVoice Denoise": "Enable Denoise",
+    "OmniVoice Denoise Help": "Ask OmniVoice-Pack to denoise the generated output.",
+    "OmniVoice Postprocess Output": "Postprocess Output",
+    "OmniVoice Postprocess Output Help": "Enable OmniVoice-Pack output post-processing.",
+    "OmniVoice Preprocess Prompt": "Preprocess Text",
+    "OmniVoice Preprocess Prompt Help": "Enable OmniVoice-Pack text preprocessing.",
+    "Reference Audio Source": "Reference Audio Source",
+    "Reference Audio Source Help": "Choose a reference audio from the resource directory or upload a new one.",
+    "Select from Resource Directory": "Select from Resource Directory",
+    "Upload Reference Audio": "Upload Reference Audio",
+    "Reference Audio Path": "Reference Audio",
+    "Reference Audio Path Help": "Choose the reference audio for voice cloning (WAV/MP3, 3-10 seconds recommended)",
+    "No Reference Audio Resources Found": "No reference audio resources found. Please upload a reference audio file.",
+    "Preview Reference Audio": "Preview",
+    "Preview Reference Audio Help": "Play the selected reference audio.",
+    "Upload Reference Audio File": "Upload Reference Audio File",
+    "Upload Reference Audio Help": "Upload a clear audio clip for voice cloning",
+    "Audio uploaded": "Audio uploaded: {path}",
+    "Inference Mode": "Inference Mode",
+    "Standard Inference": "Standard Inference",
+    "Fast Inference": "Fast Inference",
+    "Inference Mode Help": "Standard inference has higher quality but is slower. Fast inference is faster with slightly lower quality.",
+    "Advanced Parameters": "Advanced Parameters",
+    "Sampling Temperature": "Sampling Temperature",
+    "Sampling Temperature Help": "Controls randomness. Higher values are more random; lower values are more deterministic.",
+    "Top P Help": "Probability threshold for nucleus sampling. Smaller values make results more deterministic.",
+    "Top K Help": "The k value for top-k sampling. 0 disables top-k.",
+    "Num Beams": "Num Beams",
+    "Num Beams Help": "Number of beams for beam search. Higher values may improve quality but slow generation.",
+    "Repetition Penalty": "Repetition Penalty",
+    "Repetition Penalty Help": "Higher values reduce repetition, but overly high values may sound unnatural.",
+    "Enable Sampling": "Enable Sampling",
+    "Enable Sampling Help": "Enable sampling for more natural speech.",
+    "IndexTTS Usage Instructions Title": "IndexTTS-1.5 Usage Instructions",
+    "IndexTTS Usage Instructions": "**Zero-shot voice cloning**\n\n1. **Prepare reference audio**: upload or specify a clear audio file (3-10 seconds recommended)\n2. **Set API URL**: make sure the IndexTTS-1.5 service is running\n3. **Start synthesis**: the system will use the reference voice to synthesize new speech\n\n**Notes**:\n- Reference audio quality directly affects synthesis quality\n- Use clean audio without background noise when possible\n- Keep text length within a reasonable range\n- The first synthesis may take longer",
+    "IndexTTS2 Emotion Parameters": "Emotion Parameters",
+    "Emotion Mode": "Emotion Mode",
+    "Emotion Mode Help": "Choose the emotion control source for IndexTTS-2.",
+    "Emotion Mode Speaker": "Same as speaker reference",
+    "Emotion Mode Audio": "Use emotion reference audio",
+    "Emotion Mode Vector": "Use emotion vector",
+    "Emotion Mode Text": "Use emotion text",
+    "Emotion Alpha": "Emotion Alpha",
+    "Emotion Alpha Help": "Controls how strongly the emotion condition affects generation. 0 is weak, 1 is strong.",
+    "Emotion Reference Audio Path": "Emotion Reference Audio Path",
+    "Emotion Reference Audio Path Help": "Local emotion reference audio path used when emotion_mode=audio.",
+    "Emotion Text": "Emotion Text",
+    "Emotion Text Help": "Emotion description used when emotion_mode=text, such as happy, nervous, or aggrieved.",
+    "Emotion Text Placeholder": "e.g. calm, nervous, happy",
+    "Use Random Emotion": "Use Random Emotion",
+    "Use Random Emotion Help": "Let IndexTTS-2 use random emotion sampling during generation.",
+    "Emotion Happy": "Happy",
+    "Emotion Angry": "Angry",
+    "Emotion Sad": "Sad",
+    "Emotion Afraid": "Afraid",
+    "Emotion Disgusted": "Disgusted",
+    "Emotion Melancholic": "Melancholic",
+    "Emotion Surprised": "Surprised",
+    "Emotion Calm": "Calm",
+    "Max Text Tokens Per Segment": "Max Text Tokens Per Segment",
+    "Max Text Tokens Per Segment Help": "Maximum text tokens per segment for IndexTTS-2 inference.",
+    "Max Mel Tokens": "Max Mel Tokens",
+    "Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.",
+    "IndexTTS2 Usage Instructions Title": "IndexTTS-2 Usage Instructions",
+    "IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments",
+    "OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions",
+    "OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
+    "Volcengine Access Key Help": "Volcengine Access Key",
+    "Volcengine Secret Key Help": "Volcengine Secret Key",
+    "Doubao AppID Help": "Doubao TTS application AppID",
+    "Doubao Token Help": "Doubao TTS application Token",
+    "Cluster": "Cluster",
+    "Doubao Cluster Help": "Business cluster. Standard voices use volcano_tts.",
+    "Select Doubao TTS Voice": "Select a Doubao TTS voice",
+    "Voice Rate Help 0.2-3.0": "Adjust voice speed (0.2-3.0)",
+    "Voice Volume Help 0.1-2.0": "Adjust voice volume (0.1-2.0)",
+    "Voice Pitch Help 0.5-1.5": "Adjust voice pitch (0.5-1.5)",
+    "Sentence Silence Duration": "Sentence-end Silence Duration (seconds)",
+    "Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)",
+    "Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process",
+    "Application Steps": "Application Steps",
+    "Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
+    "Doubao TTS Step 2": "2. Create a new Access Key and Secret Key",
+    "Doubao TTS Step 3": "3. Open [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)",
+    "Doubao TTS Step 4": "4. Click Start Now",
+    "Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)",
+    "Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token",
+    "Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.",
+    "Doubao TTS configured": "Doubao TTS is configured",
+    "Please configure missing fields": "Please configure: {fields}",
+    "Preview Voice Synthesis": "Preview Voice Synthesis",
+    "Voice Preview Sample": "Thanks for using NarratoAI. If you have any questions or suggestions, please join the community for help and discussion.",
+    "Please configure voice settings first": "Please configure voice settings first",
+    "Voice synthesis successful": "Voice synthesis successful!",
+    "Voice synthesis failed": "Voice synthesis failed. Please check your configuration.",
+    "SoulVoice pitch not supported": "SoulVoice does not support pitch adjustment",
+    "Progress": "Progress",
+    "Generating script...": "Generating script...",
+    "Please select video file first": "Please select a video file first",
+    "Extracting keyframes...": "Extracting keyframes...",
+    "Script generation completed": "Script generation completed",
+    "Script generation completed!": "Script generation completed!",
+    "Video script generated successfully": "✅ Video script generated successfully!",
+    "Generation error": "❌ An error occurred during generation",
+    "Please upload subtitle file first": "Please upload a subtitle file first",
+    "Video": "Video",
+    "Subtitle": "Subtitle",
+    "Preparing script generation": "Preparing script generation",
+    "Script generation failed check logs": "Script generation failed. Please check the logs.",
+    "Parsing subtitles...": "Parsing subtitles...",
+    "Analyzing subtitles with model...": "Waiting for the model to analyze subtitles...",
+    "Subtitle file does not exist": "Subtitle file does not exist",
+    "Subtitle file is empty or unreadable": "Subtitle file is empty or unreadable",
+    "Generating narration copy...": "Generating narration copy...",
+    "Generated narration copy is empty": "The generated narration copy is empty",
+    "Please generate and review narration copy first": "Please generate and review the narration copy first",
+    "Matching narration copy to footage...": "Matching narration copy to footage and timestamps...",
+    "Waiting for model stream...": "Waiting for model stream...",
+    "Streaming unavailable fallback waiting...": "Streaming is unavailable for this request. Waiting for the full response...",
+    "LLM stream window title": "Model reasoning / output stream",
+    "Model reasoning stream": "[Model reasoning]",
+    "Model output preview": "[Model output preview]",
+    "Repairing narration script...": "Repairing narration script...",
+    "Generated narration JSON parse failed": "The generated narration format is invalid and could not be parsed as JSON",
+    "Generated narration missing items field": "The generated narration is missing the required 'items' field",
+    "Generated narration validation failed": "The generated narration script failed validation",
+    "Preparing output...": "Preparing output..."
  }
-}
+}
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@ -10,8 +10,9 @@
    "Auto Detect": "自动检测",
    "Video Theme": "视频主题",
    "Generation Prompt": "自定义提示词",
+    "Generation Settings": "生成参数",
    "Save Script": "保存脚本",
-    "Video File": "视频文件（:blue[1️⃣支持上传视频文件(限制2G) 2️⃣大文件建议直接导入 ./resource/videos 目录]）",
+    "Video File": "视频文件",
    "Plot Description": "剧情描述 (:blue[可从 https://www.tvmao.com/ 获取])",
    "Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键】",
    "Please Enter the Video Subject": "请先填写视频文案",
@ -40,9 +41,56 @@
    "Random Background Music": "随机背景音乐",
    "Custom Background Music": "自定义背景音乐",
    "Custom Background Music File": "请输入自定义背景音乐的文件路径",
+    "Background Music Source": "背景音乐来源",
+    "Background Music Source Help": "选择资源目录中的背景音乐、上传新的背景音乐，或关闭背景音乐",
+    "Upload Background Music": "上传背景音乐",
+    "Background Music Path Help": "选择用于视频合成的背景音乐",
+    "No Background Music Resources Found": "未找到资源目录中的背景音乐，请上传背景音乐文件",
+    "Preview Background Music Help": "播放当前背景音乐",
+    "Upload Background Music File": "上传背景音乐文件",
+    "Upload Background Music Help": "上传一个音频文件作为背景音乐",
+    "Background Music uploaded": "背景音乐已上传: {path}",
    "Background Music Volume": "背景音乐音量（0.2表示20%，背景声音不宜过高）",
    "Subtitle Settings": "**字幕设置**",
    "Enable Subtitles": "启用字幕（若取消勾选，下面的设置都将不生效）",
+    "Enable Subtitle Mask": "启用字幕遮罩",
+    "Enable Subtitle Mask Help": "开启后会在烧录新字幕前，先用模糊遮罩覆盖原视频自带字幕区域",
+    "Set Subtitle Mask": "设置字幕遮罩",
+    "Subtitle Mask Summary": "横屏 {landscape_x}%/{landscape_y}% · {landscape_width}%×{landscape_height}%；竖屏 {portrait_x}%/{portrait_y}% · {portrait_width}%×{portrait_height}%",
+    "Subtitle Mask Settings": "字幕遮罩设置",
+    "Subtitle Mask Settings Caption": "按画面百分比保存横屏和竖屏遮罩区域；生成视频时会先叠加柔化遮罩，再烧录新字幕。",
+    "Landscape Subtitle Mask": "横屏遮罩",
+    "Portrait Subtitle Mask": "竖屏遮罩",
+    "Landscape Subtitle Position": "横屏字幕位置",
+    "Portrait Subtitle Position": "竖屏字幕位置",
+    "Save Subtitle Mask Settings": "保存字幕遮罩设置",
+    "Subtitle Mask Left": "左侧位置",
+    "Subtitle Mask Left Help": "遮罩距离画面左侧的百分比",
+    "Subtitle Mask Top": "顶部位置",
+    "Subtitle Mask Top Help": "遮罩距离画面顶部的百分比",
+    "Subtitle Mask Width": "遮罩宽度",
+    "Subtitle Mask Width Help": "遮罩覆盖区域的宽度百分比",
+    "Subtitle Mask Height": "遮罩高度",
+    "Subtitle Mask Height Help": "遮罩覆盖区域的高度百分比",
+    "Subtitle Mask Blur Radius": "模糊半径",
+    "Subtitle Mask Blur Radius Help": "遮罩边缘和背景的模糊强度",
+    "Subtitle Mask Opacity": "遮罩强度",
+    "Subtitle Mask Opacity Help": "遮罩融合强度，数值越高越容易遮住原字幕",
+    "Subtitle Burn Position": "字幕位置",
+    "Subtitle Burn Position Help": "新字幕距离画面顶部的百分比；预览中的蓝线表示当前字幕位置",
+    "Subtitle Mask Preview": "原字幕遮罩预览",
+    "Subtitle Mask Preview Caption": "可上传一段原视频作为预览，也可直接使用当前已选择的原视频；上传内容仅用于预览遮罩位置。",
+    "Upload Subtitle Mask Preview Video": "上传预览原视频",
+    "Upload Subtitle Mask Preview Video Help": "仅用于在弹窗中预览遮罩，不会替换生成视频使用的原视频",
+    "Using Subtitle Mask Preview Video": "当前预览视频: {file}",
+    "Change Subtitle Mask Preview Video": "更换视频",
+    "Subtitle Mask Preview Empty": "请上传预览视频，或先在上方选择原视频",
+    "Subtitle Mask Preview Timeline": "预览时间轴（秒）",
+    "Subtitle Mask Preview Timeline Help": "拖动到原字幕出现的画面，方便微调遮罩区域",
+    "Subtitle Mask Preview Frame Caption": "{time} · {orientation} · 红框为遮罩区域，蓝线为字幕位置",
+    "Subtitle Mask Preview Failed": "无法读取该视频预览，请尝试更换视频文件",
+    "Enable Auto Transcription": "启用自动转录",
+    "Enable Auto Transcription Help": "开启后会在最终视频合并完成后，对整条视频转录生成字幕并压入成片",
    "Font": "字幕字体",
    "Position": "字幕位置",
    "Top": "顶部",
@ -80,8 +128,17 @@
    "Synthesizing Voice": "语音合成中，请稍候...",
    "TTS Provider": "语音合成提供商",
    "Hide Log": "隐藏日志",
+    "Select from resource directory": "从资源目录选择",
+    "Select a video from resource videos directory": "选择 ./resource/videos 目录中的视频",
+    "Upload a new video file up to 2GB": "上传一个新的视频文件，限制 2GB",
+    "Upload new video files up to 2GB each": "上传一个或多个视频文件，单个文件限制 2GB",
+    "Select Video": "选择视频",
+    "Choose a video file": "选择一个视频文件",
+    "Upload Video": "上传视频",
+    "No video files found in resource videos directory": "未在 ./resource/videos 目录中找到视频文件",
    "Upload Local Files": "上传本地文件",
    "File Uploaded Successfully": "文件上传成功",
+    "Selected videos for processing": "已选择 {count} 个视频: {files}",
    "timestamp": "时间戳",
    "Picture description": "图片描述",
    "Narration": "视频文案",
@ -97,23 +154,45 @@
    "Failed to Save Script": "保存脚本失败",
    "Script saved successfully": "脚本保存成功",
    "Video Script": "视频脚本",
+    "Edit Video Script": "查看/编辑视频脚本",
+    "Video script row count": "共 {count} 条脚本",
+    "Video script table help": "在表格中编辑完整脚本 JSON。可新增、删除行；保存时会重新校验并写入脚本文件。",
+    "Raw JSON Preview": "原始 JSON 预览",
+    "Script Column ID": "序号",
+    "Script Column Video ID": "视频",
+    "Script Column Video Name": "视频文件",
+    "Script Column Timestamp": "时间戳",
+    "Script Column Picture": "画面描述",
+    "Script Column Narration": "解说台词",
+    "Script Column OST": "标记",
    "Video Quality": "视频质量",
    "Custom prompt for LLM, leave empty to use default prompt": "自定义提示词，留空则使用默认提示词",
    "Proxy Settings": "代理设置",
    "HTTP_PROXY": "HTTP 代理",
    "HTTPs_PROXY": "HTTPS 代理",
-    "Vision Model Settings": "视频分析模型设置",
-    "Vision Model Provider": "视频分析模型提供商",
-    "Vision API Key": "视频分析 API 密钥",
-    "Vision Base URL": "视频分析接口地址",
-    "Vision Model Name": "视频分析模型名称",
+    "Vision Model Settings": "视觉分析模型设置",
+    "Vision Model Provider": "接口规范",
+    "Vision API Key": "视觉分析 API 密钥",
+    "Vision Base URL": "视觉分析接口地址",
+    "Vision Model Name": "视觉分析模型名称",
    "Text Generation Model Settings": "文案生成模型设置",
    "LLM Model Name": "大语言模型名称",
    "LLM Model API Key": "大语言模型 API 密钥",
-    "Text Model Provider": "文案生成模型提供商",
+    "Text Model Provider": "接口规范",
    "Text API Key": "文案生成 API 密钥",
    "Text Base URL": "文案生成接口地址",
    "Text Model Name": "文案生成模型名称",
+    "Top P": "Top P",
+    "Top K": "Top K",
+    "Max Output Tokens": "最大输出 Token",
+    "Max Output Tokens Help": "单次生成的最大输出长度，0 表示使用服务端默认值",
+    "Thinking Level": "思考等级",
+    "Thinking Level Help": "控制推理/思考强度。自动表示不额外发送思考参数，低/中/高会尝试传递 reasoning_effort",
+    "Thinking Level Auto": "自动",
+    "Thinking Level Off": "关闭",
+    "Thinking Level Low": "低",
+    "Thinking Level Medium": "中",
+    "Thinking Level High": "高",
    "Account ID": "账户 ID",
    "Skip the first few seconds": "跳过开头多少秒",
    "Difference threshold": "差异阈值",
@ -144,6 +223,48 @@
    "Directory cleared": "目录清理完成",
    "Directory does not exist": "目录不存在",
    "Failed to clear directory": "清理目录失败",
+    "FFmpeg Engine Detection": "FFmpeg 引擎检测",
+    "FFmpeg Engine": "FFmpeg 引擎",
+    "FFmpeg Engine Help": "选择当前应用优先使用的 ffmpeg 可执行文件；会自动发现整合包运行时和本机 PATH 中的 ffmpeg",
+    "No FFmpeg engines found": "未发现可用 FFmpeg 引擎",
+    "Custom FFmpeg Path": "自定义 FFmpeg 路径",
+    "Custom FFmpeg Path Help": "如果下拉框没有列出目标引擎，可以粘贴 ffmpeg 可执行文件的绝对路径",
+    "Current FFmpeg Engine": "当前生效引擎",
+    "Save FFmpeg Engine": "保存引擎",
+    "Test Selected FFmpeg": "检测所选 FFmpeg",
+    "Testing FFmpeg engine": "正在检测 FFmpeg 引擎...",
+    "FFmpeg engine saved": "FFmpeg 引擎已保存",
+    "Selected FFmpeg path is invalid": "所选 FFmpeg 路径无效",
+    "FFmpeg detection details": "FFmpeg 检测详情",
+    "FFmpeg source Configured": "已配置",
+    "FFmpeg source NarratoAI packaged runtime": "NarratoAI 整合包运行时",
+    "FFmpeg source Integrated runtime": "内置运行时",
+    "FFmpeg source System PATH": "系统 PATH",
+    "FFmpeg source Homebrew": "Homebrew",
+    "FFmpeg source Python environment": "Python 环境",
+    "FFmpeg source Python executable folder": "Python 可执行目录",
+    "FFmpeg source IMAGEIO_FFMPEG_EXE": "IMAGEIO_FFMPEG_EXE",
+    "FFmpeg source imageio-ffmpeg": "imageio-ffmpeg",
+    "FFmpeg source System": "系统路径",
+    "Version": "版本",
+    "Path": "路径",
+    "Available": "可用",
+    "Unavailable": "不可用",
+    "Hardware Acceleration": "硬件加速",
+    "Subtitle Burn-in": "字幕烧录",
+    "FFmpeg engine passed all checks": "FFmpeg 引擎检测通过：基础功能、硬件加速和字幕烧录均可用",
+    "FFmpeg engine works but hardware acceleration is unavailable": "FFmpeg 基础功能和字幕烧录可用，但硬件加速不可用，将使用软件编码",
+    "FFmpeg engine check failed": "FFmpeg 引擎检测失败",
+    "Hardware acceleration detail": "硬件加速详情",
+    "Subtitle burn-in detail": "字幕烧录详情",
+    "Type": "类型",
+    "Encoder": "编码器",
+    "Message": "信息",
+    "Method": "方式",
+    "Supported Hardware Methods": "支持的硬件加速方法",
+    "Subtitle Filters": "字幕滤镜",
+    "FFmpeg errors": "FFmpeg 错误",
+    "Raw FFmpeg report": "原始 FFmpeg 报告",
    "Subtitle Preview": "字幕预览",
    "One-Click Transcribe": "一键转录",
    "Transcribing...": "正在转录中...",
@ -156,13 +277,422 @@
    "Generate Short Video Script": "AI生成短剧混剪脚本",
    "Adjust the volume of the original audio": "调整原始音频的音量",
    "Original Volume": "视频音量",
-    "Auto Generate": "逐帧解说",
+    "Auto Generate": "逐帧分析",
    "Frame Interval (seconds)": "帧间隔 (秒)",
    "Frame Interval (seconds) (More keyframes consume more tokens)": "帧间隔 (秒) (更多关键帧消耗更多令牌)",
    "Batch Size": "批处理大小",
    "Batch Size (More keyframes consume more tokens)": "批处理大小, 每批处理越少消耗 token 越多",
    "Short Drama Summary": "短剧解说",
-    "Video Type": "视频类型",
-    "Select/Upload Script": "选择/上传脚本"
+    "Film TV Narration": "影视解说",
+    "Video Type": "创作类型",
+    "Select/Upload Script": "自定义脚本",
+    "Script loaded successfully": "脚本加载成功",
+    "Failed to load script": "加载脚本失败",
+    "Failed to save script": "保存脚本失败",
+    "QwenVL model returned invalid response": "QwenVL 模型返回了无效响应",
+    "Testing connection...": "正在测试连接...",
+    "Connection failed": "连接失败",
+    "TTS engine does not support precise subtitles": "⚠️ {engine} 不支持精确字幕生成",
+    "Manual subtitle editing recommendation": "💡 建议使用专业剪辑工具（如剪映、PR 等）手动添加字幕",
+    "Disabled subtitles help": "当前 TTS 引擎不支持字幕生成，请使用其他 TTS 引擎",
+    "Tencent Cloud TTS": "腾讯云 TTS",
+    "Tongyi Qwen3 TTS": "通义千问 Qwen3 TTS",
+    "IndexTTS Voice Clone": "IndexTTS-1.5 语音克隆",
+    "Doubao TTS": "豆包语音 TTS",
+    "Edge TTS features": "完全免费，但服务稳定性一般，不支持语音克隆功能",
+    "Edge TTS use case": "测试和轻量级使用",
+    "Azure Speech Services features": "提供一定免费额度，超出后按量付费，需要绑定海外信用卡",
+    "Azure Speech Services use case": "企业级应用，需要稳定服务",
+    "Tencent Cloud TTS features": "提供免费额度，音质优秀，支持多种音色，国内访问速度快",
+    "Tencent Cloud TTS use case": "个人和企业用户，需要稳定的中文语音合成",
+    "Tongyi Qwen3 TTS features": "阿里云通义千问语音合成，音质优秀，支持多种音色",
+    "High-quality Chinese speech synthesis use case": "需要高质量中文语音合成的用户",
+    "IndexTTS features": "本地/私有部署的 IndexTTS-1.5 语音克隆引擎。选择资源目录音频或上传参考音频后，可按该音色合成旁白。",
+    "IndexTTS use case": "适合需要固定旁白音色、角色配音或批量生成同一音色视频的场景。使用前请先启动 IndexTTS-1.5 API 服务；部署包下载：https://pan.quark.cn/s/0767c9bcefd5",
+    "IndexTTS download link": "下载地址：https://pan.quark.cn/s/0767c9bcefd5",
+    "IndexTTS2 features": "本地/私有部署的 IndexTTS-2 语音克隆引擎，支持情感控制和更完整的生成参数。",
+    "IndexTTS2 use case": "适合需要固定音色、情绪化旁白或更细致采样控制的本地语音合成场景。使用前请先启动 IndexTTS-2 API 服务。",
+    "OmniVoice features": "本地/私有部署的 OmniVoice-Pack 多语种语音合成引擎，支持自动音色、指令音色和参考音频克隆。",
+    "OmniVoice use case": "适合需要本地可控、多语言旁白、音色设计或参考音频克隆的场景。使用前请先启动 OmniVoice-Pack API 服务。",
+    "Doubao TTS features": "火山引擎豆包语音合成，支持多种音色和情感，国内访问速度快",
+    "Local Deployment": "本地部署",
+    "Cloud Service": "云端服务",
+    "Select TTS Engine": "选择 TTS 引擎",
+    "Select TTS Engine Help": "选择您要使用的文本转语音引擎",
+    "TTS Engine Details": "{engine} 详细说明",
+    "Features": "特点",
+    "Use Case": "适用场景",
+    "Registration URL": "注册地址",
+    "Voice Selection": "音色选择",
+    "Select Edge TTS Voice": "选择 Edge TTS 音色",
+    "Edge TTS Voice Description": "Edge TTS 音色说明",
+    "Loaded voice count": "已加载 {count} 个音色",
+    "Female Voice": "女声",
+    "Male Voice": "男声",
+    "Voice Volume": "音量调节",
+    "Voice Volume Help Percent": "调节语音音量 (0-100)",
+    "Voice Rate": "语速调节",
+    "Voice Rate Help 0.5-2.0": "调节语音速度 (0.5-2.0 倍速)",
+    "Voice Pitch": "语调调节",
+    "Voice Pitch Help Percent": "调节语音音调 (-50% 到 +50%)",
+    "Service Region": "服务区域",
+    "Service Region Placeholder": "例如：eastus",
+    "Azure Service Region Help": "Azure Speech Services 服务区域，如：eastus、westus2、eastasia 等",
+    "Azure Speech Key Help": "Azure Speech Services API 密钥",
+    "Voice Name": "音色名称",
+    "Azure Voice Name Help": "输入 Azure Speech Services 音色名称，直接使用官方音色名称即可。例如：zh-CN-YunzeNeural",
+    "Common Voice Reference": "常用音色参考",
+    "Chinese Voices": "中文音色",
+    "English Voices": "英文音色",
+    "Multilingual": "多语言",
+    "Azure Voices Docs Notice": "更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)",
+    "Quick Select": "快速选择",
+    "Chinese Female Voice": "中文女声",
+    "Chinese Male Voice": "中文男声",
+    "English Female Voice": "英文女声",
+    "Voice name valid": "音色名称有效: {voice}",
+    "Voice name format may be invalid": "音色名称格式可能不正确: {voice}",
+    "Azure voice name format notice": "Azure 音色名称通常格式为: [语言]-[地区]-[名称]Neural",
+    "Azure Speech Services configured": "Azure Speech Services 配置已设置",
+    "Please configure service region": "请配置服务区域",
+    "Please configure API Key": "请配置 API Key",
+    "Language": "界面语言",
+    "Task failed": "任务失败",
+    "Script file cannot be empty": "脚本文件不能为空",
+    "Video file cannot be empty": "视频文件不能为空",
+    "Export to Jianying Draft": "📤 导出到剪映草稿",
+    "Please configure Jianying draft folder in basic settings": "请在基础设置中配置剪映草稿地址",
+    "Jianying draft folder does not exist": "剪映草稿文件夹不存在: {path}",
+    "Jianying export dialog title": "确认草稿名称",
+    "Jianying export dialog description": "导出前请确认剪映草稿名称，完成后可在剪映草稿目录中打开。",
+    "Jianying export destination": "保存目录",
+    "Jianying draft name": "草稿名称",
+    "Please enter Jianying draft name": "请输入剪映草稿名称",
+    "Confirm Export": "确认导出",
+    "Please enter draft name": "请输入草稿名称",
+    "Failed to build parameters": "参数构建失败",
+    "Exporting to Jianying draft...": "正在导出到剪映草稿，请稍候...",
+    "Jianying draft exported successfully": "✅ 成功导出到剪映草稿: {name}",
+    "Draft saved to": "📁 草稿已保存到: {path}",
+    "Failed to export Jianying draft": "❌ 导出到剪映草稿失败",
+    "Cancel": "取消",
+    "LLM initialization failed": "⚠️ LLM 初始化失败: {error}\n\n请检查配置文件和依赖是否正确安装。",
+    "Jianying Draft Settings": "剪映草稿设置",
+    "Jianying Draft Folder Path": "剪映草稿文件夹路径",
+    "Jianying Draft Folder Path Help": "剪映草稿文件夹路径，例如：C:\\Users\\用户名\\Documents\\JianyingPro Drafts",
+    "Custom API endpoint help": "OpenAI 兼容接口地址。使用第三方或自建网关时填写完整 /v1 地址；使用 OpenAI 官方接口可留空。",
+    "Recommended API endpoint": "推荐接口地址",
+    "OpenAI compatible gateway help": "{model_type} 使用 OpenAI 兼容接口，请填写完整的接口地址。",
+    "Vision model": "视觉分析模型",
+    "Text model": "文案生成模型",
+    "Model Name Input Help": "输入完整模型名称\n\n常用示例:",
+    "OpenAI compatible providers help": "这里不限定模型厂商；OpenAI、DeepSeek、OpenRouter、SiliconFlow 或自建网关均可，只需提供兼容 OpenAI 的接口地址和模型名称。",
+    "OpenAI compatible protocol": "OpenAI 兼容",
+    "OpenAI compatible protocol help": "不是限定 OpenAI 官方模型；只要模型服务支持 OpenAI Chat Completions 兼容接口即可。",
+    "Provider API Key Help": "模型服务的 API 密钥\n\n常见获取地址:",
+    "Please fill OpenAI compatible gateway": "请在上方填写 OpenAI 兼容网关地址，例如：{example}",
+    "Please enter API key": "请先输入 API 密钥",
+    "Please enter model name": "请先输入模型名称",
+    "Connection test error": "测试连接时发生错误",
+    "Vision model config saved": "视觉分析模型配置已保存（OpenAI 兼容）",
+    "Text model config saved": "文案生成模型配置已保存（OpenAI 兼容）",
+    "Failed to save config": "保存配置失败",
+    "Custom Position (% from top)": "自定义位置（距顶部百分比）",
+    "Please enter a value between 0 and 100": "请输入 0 到 100 之间的值",
+    "Please enter a valid number": "请输入有效数字",
+    "None": "无",
+    "Uploaded subtitle": "已上传字幕: {file}",
+    "Encoding": "编码",
+    "Size": "大小",
+    "Characters": "字符",
+    "Ali Bailian Fun-ASR Subtitle Transcription": "字幕处理",
+    "Subtitle Processing Method": "字幕处理方式",
+    "Fun-ASR Backend": "Fun-ASR 后端",
+    "Local FunASR-Pack API": "FunASR(本地部署)",
+    "Local FireRedASR API": "FireRedASR2(本地部署)",
+    "Ali Bailian Online Fun-ASR": "FunASR(在线服务)",
+    "Local Fun-ASR upload caption": "将使用上方当前视频，通过本机运行的 FunASR-Pack API 生成 SRT 字幕。",
+    "Local FireRed-ASR upload caption": "将使用上方当前视频，通过本机运行的 FireRedASR2-AED-Pack API 生成 SRT 字幕。",
+    "Fun-ASR upload caption": "将使用上方当前视频，自动上传到阿里百炼临时存储并通过 fun-asr 生成 SRT 字幕。",
+    "Auto Transcription Local Caption": "将在最终视频合并完成后，通过本机运行的 FunASR-Pack API 生成 SRT 字幕。",
+    "Auto Transcription FireRed Caption": "将在最终视频合并完成后，通过本机运行的 FireRedASR2-AED-Pack API 生成 SRT 字幕。",
+    "Auto Transcription Online Caption": "将在最终视频合并完成后，自动上传到阿里百炼临时存储并通过 fun-asr 生成 SRT 字幕。",
+    "Local FunASR-Pack API URL": "本地 FunASR-Pack API 地址",
+    "Local FunASR-Pack API URL Help": "例如 http://127.0.0.1:7860；也可以直接填到 /asr 的完整地址。",
+    "Local FireRedASR API URL": "本地ASR API 地址",
+    "Local FireRedASR API URL Help": "例如 http://127.0.0.1:7867；也可以直接填到 /asr 的完整地址。",
+    "Fun-ASR Hotword": "热词",
+    "Fun-ASR Hotword Help": "可选，传给本地 FunASR-Pack 的热词参数。",
+    "Enable speaker diarization": "启用说话人分段",
+    "Enable speaker diarization Help": "需要本地 FunASR-Pack 已启用并加载 spk 模型。",
+    "API Key URL": "API Key 获取地址",
+    "Ali Bailian API Key": "阿里百炼 API Key",
+    "Ali Bailian API Key Help": "请输入你自己的阿里百炼 API Key；保存配置后会写入本地 config.toml",
+    "Upload media to transcribe": "上传需要转录的音频/视频",
+    "Using selected video for subtitle transcription": "将使用当前视频生成字幕: {file}",
+    "Using selected videos for subtitle transcription": "将使用当前 {count} 个视频生成字幕: {files}",
+    "Please select or upload a video first": "请先在上方选择或上传视频文件",
+    "Selected video file does not exist": "当前视频文件不存在，请重新选择或上传",
+    "Selected video files do not exist": "以下视频文件不存在，请重新选择或上传: {files}",
+    "Transcribe subtitles": "转录字幕",
+    "Calibrate subtitles": "校准字幕",
+    "Please enter Ali Bailian API Key": "请先输入阿里百炼 API Key",
+    "Please enter local FunASR-Pack API URL": "请先输入本地 FunASR-Pack API 地址",
+    "Please enter local FireRedASR API URL": "请先输入本地ASR API 地址",
+    "Please upload media to transcribe": "请先上传需要转录的音频或视频文件",
+    "Transcribing with local FunASR-Pack...": "正在使用本地 FunASR-Pack 转写字幕，请稍候...",
+    "Transcribing with local FireRedASR...": "正在使用本地ASR转写字幕，请稍候...",
+    "Transcribing with Fun-ASR...": "正在使用阿里百炼 Fun-ASR 转写字幕，请稍候...",
+    "Fun-ASR failed without subtitle file": "Fun-ASR 转写失败：未生成字幕文件",
+    "Subtitle transcription succeeded": "字幕转写成功: {file}",
+    "Subtitle transcription succeeded for multiple files": "字幕转写成功，共 {count} 个文件: {files}",
+    "Calibrating subtitles...": "正在使用大模型校准字幕，请稍候...",
+    "Subtitle calibration succeeded": "字幕校准成功: {file}",
+    "Subtitle calibration succeeded for multiple files": "字幕校准成功，共 {count} 个文件: {files}",
+    "Subtitle calibration failed": "字幕校准失败",
+    "Transcribed subtitles storage hint": "之前转录生成的字幕保存在 {path}，可从该目录拖入上传",
+    "Tavily Search Settings": "Tavily 联网搜索",
+    "Tavily API Key": "Tavily API Key",
+    "Tavily API Key Help": "用于剧情理解前的联网检索。开启“联网搜索”后，会先按作品名称检索剧情、人物和背景信息，再结合字幕分析。",
+    "Tavily config saved": "Tavily 配置已保存",
+    "联网搜索": "联网搜索",
+    "Enable Web Search Help": "开启后，剧情理解会先使用 Tavily 按作品名称联网检索，再结合检索结果和字幕分析剧情。",
+    "Please configure Tavily API Key in Basic Settings": "请先在基础设置中配置 Tavily API Key",
+    "Please enter short drama name before web search": "开启联网搜索前，请先填写短剧名称",
+    "Please enter film/tv title before web search": "开启联网搜索前，请先填写影视名称",
+    "Searching short drama with Tavily...": "正在使用 Tavily 检索短剧信息...",
+    "Tavily search failed": "Tavily 检索失败",
+    "剧情理解": "剧情理解",
+    "剧情理解结果": "剧情理解结果",
+    "Analyzing plot...": "正在理解剧情...",
+    "Plot analysis completed": "剧情理解完成",
+    "Please generate or upload subtitles first": "请先转写或上传字幕",
+    "Please transcribe or upload subtitles first": "请先转写或上传字幕",
+    "Fun-ASR transcription failed": "Fun-ASR 字幕转写失败",
+    "Validating script format...": "正在验证脚本格式...",
+    "Script format validation failed": "脚本格式验证失败",
+    "Error Message": "错误信息",
+    "Details": "详细说明",
+    "Correct script format example": "正确的脚本格式示例",
+    "Script format validation error": "格式验证过程中发生错误",
+    "Script validated and saved successfully": "✅ 脚本格式验证通过，保存成功！",
+    "Tencent Secret ID Help": "请输入您的腾讯云 Secret ID",
+    "Tencent Secret Key Help": "请输入您的腾讯云 Secret Key",
+    "Tencent Service Region Help": "选择腾讯云 TTS 服务地域",
+    "Custom Voice": "自定义音色",
+    "Select Tencent TTS Voice": "选择腾讯云 TTS 音色",
+    "Tencent Cloud TTS Voice Description": "腾讯云 TTS 音色说明",
+    "Female Voices": "女声音色",
+    "Male Voices": "男声音色",
+    "Tencent More Voices Notice": "更多音色请参考腾讯云官方文档",
+    "Qwen DashScope API Key Help": "通义千问 DashScope API Key",
+    "TTS Model Name": "模型名称",
+    "Qwen TTS Model Help": "Qwen TTS 模型名，例如 qwen3-tts-flash",
+    "Select Qwen3 TTS Voice": "选择 Qwen3 TTS 音色",
+    "API URL": "API 地址",
+    "IndexTTS API URL Help": "IndexTTS-1.5 API 服务地址",
+    "IndexTTS2 API URL Help": "IndexTTS-2 API 服务地址，可填写服务根地址或完整 /tts 地址",
+    "OmniVoice API URL Help": "OmniVoice-Pack API 服务地址，可填写服务根地址或完整 /tts 地址",
+    "OmniVoice Language Code": "合成语言",
+    "OmniVoice Language Code Help": "传给 OmniVoice-Pack 的 language 参数，例如 zh、en。",
+    "OmniVoice Generation Mode": "生成模式",
+    "OmniVoice Generation Mode Help": "自动音色无需额外参数；指令音色使用描述词；参考音频克隆需要参考音频和对应文本。",
+    "OmniVoice Mode Auto": "自动音色",
+    "OmniVoice Mode Voice Design": "指令音色",
+    "OmniVoice Mode Voice Clone": "参考音频克隆",
+    "OmniVoice Instruct": "音色指令",
+    "OmniVoice Instruct Help": "描述希望生成的音色，例如性别、音高、口音或风格。",
+    "OmniVoice Instruct Placeholder": "例如：female, low pitch, british accent",
+    "OmniVoice Reference Text": "参考音频文本",
+    "OmniVoice Reference Text Help": "参考音频对应的逐字文本；当前部署未启用 ASR 时必须填写。",
+    "OmniVoice Reference Text Placeholder": "请输入参考音频中实际朗读的内容",
+    "OmniVoice Num Step Help": "扩散生成步数，值越大通常质量更高但速度更慢。",
+    "OmniVoice Guidance Scale Help": "控制文本条件的引导强度。",
+    "OmniVoice Duration": "目标时长（秒）",
+    "OmniVoice Duration Help": "0 表示由模型自动决定时长。",
+    "OmniVoice Denoise": "启用降噪",
+    "OmniVoice Denoise Help": "让 OmniVoice-Pack 对生成结果执行降噪处理。",
+    "OmniVoice Postprocess Output": "后处理输出",
+    "OmniVoice Postprocess Output Help": "启用 OmniVoice-Pack 的输出后处理。",
+    "OmniVoice Preprocess Prompt": "预处理文本",
+    "OmniVoice Preprocess Prompt Help": "启用 OmniVoice-Pack 的文本预处理。",
+    "Reference Audio Source": "参考音频来源",
+    "Reference Audio Source Help": "选择从资源目录选择参考音频，或上传新的参考音频",
+    "Select from Resource Directory": "从资源目录选择",
+    "Upload Reference Audio": "上传参考音频",
+    "Reference Audio Path": "参考音频",
+    "Reference Audio Path Help": "选择用于语音克隆的参考音频（WAV/MP3 格式，建议 3-10 秒）",
+    "No Reference Audio Resources Found": "未找到资源目录中的参考音频，请上传参考音频文件",
+    "Preview Reference Audio": "试听",
+    "Preview Reference Audio Help": "播放当前参考音频",
+    "Upload Reference Audio File": "上传参考音频文件",
+    "Upload Reference Audio Help": "上传一段清晰的音频用于语音克隆",
+    "Audio uploaded": "音频已上传: {path}",
+    "Inference Mode": "推理模式",
+    "Standard Inference": "普通推理",
+    "Fast Inference": "快速推理",
+    "Inference Mode Help": "普通推理质量更高但速度较慢，快速推理速度更快但质量略低",
+    "Advanced Parameters": "高级参数",
+    "Sampling Temperature": "采样温度 (Temperature)",
+    "Sampling Temperature Help": "控制随机性，值越高输出越随机，值越低越确定",
+    "Top P Help": "nucleus 采样的概率阈值，值越小结果越确定",
+    "Top K Help": "top-k 采样的 k 值，0 表示不使用 top-k",
+    "Num Beams": "束搜索 (Num Beams)",
+    "Num Beams Help": "束搜索的 beam 数量，值越大质量可能越好但速度越慢",
+    "Repetition Penalty": "重复惩罚 (Repetition Penalty)",
+    "Repetition Penalty Help": "值越大越能避免重复，但过大可能导致不自然",
+    "Enable Sampling": "启用采样",
+    "Enable Sampling Help": "启用采样可以获得更自然的语音",
+    "IndexTTS Usage Instructions Title": "IndexTTS-1.5 使用说明",
+    "IndexTTS Usage Instructions": "**零样本语音克隆**\n\n1. **准备参考音频**：上传或指定一段清晰的音频文件（建议 3-10 秒）\n2. **设置 API 地址**：确保 IndexTTS-1.5 服务正常运行\n3. **开始合成**：系统会自动使用参考音频的音色合成新语音\n\n**注意事项**：\n- 参考音频质量直接影响合成效果\n- 建议使用无背景噪音的清晰音频\n- 文本长度建议控制在合理范围内\n- 首次合成可能需要较长时间",
+    "IndexTTS2 Emotion Parameters": "情感参数",
+    "Emotion Mode": "情感控制方式",
+    "Emotion Mode Help": "选择 IndexTTS-2 的情感控制来源",
+    "Emotion Mode Speaker": "与音色参考相同",
+    "Emotion Mode Audio": "使用情感参考音频",
+    "Emotion Mode Vector": "使用情感向量",
+    "Emotion Mode Text": "使用情感描述文本",
+    "Emotion Alpha": "情感权重",
+    "Emotion Alpha Help": "控制情感条件的影响强度，0 表示弱，1 表示强",
+    "Emotion Reference Audio Path": "情感参考音频路径",
+    "Emotion Reference Audio Path Help": "emotion_mode=audio 时使用的本地情感参考音频路径",
+    "Emotion Text": "情感描述文本",
+    "Emotion Text Help": "emotion_mode=text 时使用的情感描述，例如开心、紧张、委屈",
+    "Emotion Text Placeholder": "例如：沉稳、紧张、开心",
+    "Use Random Emotion": "启用随机情感",
+    "Use Random Emotion Help": "让 IndexTTS-2 在生成时使用随机情感采样",
+    "Emotion Happy": "开心",
+    "Emotion Angry": "愤怒",
+    "Emotion Sad": "悲伤",
+    "Emotion Afraid": "害怕",
+    "Emotion Disgusted": "厌恶",
+    "Emotion Melancholic": "忧郁",
+    "Emotion Surprised": "惊讶",
+    "Emotion Calm": "平静",
+    "Max Text Tokens Per Segment": "单段最大文本 Token",
+    "Max Text Tokens Per Segment Help": "IndexTTS-2 分段推理的最大文本 token 数",
+    "Max Mel Tokens": "最大 Mel Tokens",
+    "Max Mel Tokens Help": "控制单次生成的最大 mel token 数，值越大可生成更长音频",
+    "IndexTTS2 Usage Instructions Title": "IndexTTS-2 使用说明",
+    "IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**：复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**：例如 http://192.168.3.6:7863/tts，也可以填写服务根地址\n3. **调整情感参数**：默认使用 speaker，可按需切换到 audio、vector 或 text\n4. **调整生成参数**：temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**：\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型，耗时更长\n- CPU 部署生成速度会明显慢于 GPU",
+    "OmniVoice Usage Instructions Title": "OmniVoice 使用说明",
+    "OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**：只需要设置 API 地址和语言，可直接合成。\n2. **指令音色**：填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**：上传或选择参考音频，并填写该音频对应文本。\n\n**注意事项**：\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频，系统会按音频时长估算字幕段落",
+    "Volcengine Access Key Help": "火山引擎 Access Key",
+    "Volcengine Secret Key Help": "火山引擎 Secret Key",
+    "Doubao AppID Help": "豆包语音应用 AppID",
+    "Doubao Token Help": "豆包语音应用 Token",
+    "Cluster": "集群",
+    "Doubao Cluster Help": "业务集群，标准音色使用 volcano_tts",
+    "Select Doubao TTS Voice": "选择豆包语音 TTS 音色",
+    "Voice Rate Help 0.2-3.0": "调节语音速度 (0.2-3.0)",
+    "Voice Volume Help 0.1-2.0": "调节语音音量 (0.1-2.0)",
+    "Voice Pitch Help 0.5-1.5": "调节语音音高 (0.5-1.5)",
+    "Sentence Silence Duration": "句尾静音时长 (秒)",
+    "Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)",
+    "Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程",
+    "Application Steps": "申请步骤",
+    "Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
+    "Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key",
+    "Doubao TTS Step 3": "3. 打开 [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)",
+    "Doubao TTS Step 4": "4. 点击立即使用",
+    "Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成（注意：是语音合成，不是语音合成大模型）",
+    "Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token",
+    "Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
+    "Doubao TTS configured": "豆包语音 TTS 配置已设置",
+    "Please configure missing fields": "请配置: {fields}",
+    "Preview Voice Synthesis": "试听语音合成",
+    "Voice Preview Sample": "感谢关注 NarratoAI，有任何问题或建议，可以加入社区频道求助或讨论",
+    "Please configure voice settings first": "请先配置语音设置",
+    "Voice synthesis successful": "语音合成成功！",
+    "Voice synthesis failed": "语音合成失败，请检查配置",
+    "SoulVoice pitch not supported": "SoulVoice 引擎不支持音调调节",
+    "上传字幕文件": "上传字幕",
+    "清除已上传字幕": "清除已上传字幕",
+    "无法读取字幕文件，请检查文件编码（支持 UTF-8、UTF-16、GBK、GB2312）": "无法读取字幕文件，请检查文件编码（支持 UTF-8、UTF-16、GBK、GB2312）",
+    "字幕文件内容似乎为空，请检查文件": "字幕文件内容似乎为空，请检查文件",
+    "字幕上传成功": "字幕上传成功",
+    "短剧名称": "短剧名称",
+    "影视名称": "影视名称",
+    "解说语言": "解说语言",
+    "自定义解说语言": "自定义解说语言",
+    "例如：意大利语（意大利）": "例如：意大利语（意大利）",
+    "请输入自定义解说语言": "请输入自定义解说语言",
+    "简体中文（中国）": "简体中文（中国）",
+    "英语（美国）": "英语（美国）",
+    "日语（日本）": "日语（日本）",
+    "韩语（韩国）": "韩语（韩国）",
+    "法语（法国）": "法语（法国）",
+    "德语（德国）": "德语（德国）",
+    "西班牙语（西班牙）": "西班牙语（西班牙）",
+    "葡萄牙语（巴西）": "葡萄牙语（巴西）",
+    "俄语（俄罗斯）": "俄语（俄罗斯）",
+    "自定义": "自定义",
+    "短剧类型": "短剧类型",
+    "自定义短剧类型": "自定义短剧类型",
+    "影视类型": "影视类型",
+    "自定义影视类型": "自定义影视类型",
+    "原片占比": "原片占比",
+    "例如：豪门虐恋": "例如：豪门虐恋",
+    "例如：悬疑犯罪": "例如：悬疑犯罪",
+    "请输入自定义短剧类型": "请输入自定义短剧类型",
+    "请输入自定义影视类型": "请输入自定义影视类型",
+    "逆袭/复仇": "逆袭/复仇",
+    "霸总/甜宠": "霸总/甜宠",
+    "家庭伦理": "家庭伦理",
+    "古装/权谋": "古装/权谋",
+    "悬疑/犯罪": "悬疑/犯罪",
+    "都市情感": "都市情感",
+    "年代/乡村": "年代/乡村",
+    "剧情/情感": "剧情/情感",
+    "动作/冒险": "动作/冒险",
+    "喜剧/轻松": "喜剧/轻松",
+    "科幻/奇幻": "科幻/奇幻",
+    "历史/战争": "历史/战争",
+    "恐怖/惊悚": "恐怖/惊悚",
+    "生成解说文案": "生成解说文案",
+    "生成剪辑脚本": "生成剪辑脚本",
+    "短剧解说文案": "短剧解说文案",
+    "影视解说文案": "影视解说文案",
+    "Narration Copy Help": "先点击生成解说文案；审核、删改或重写这段文案后，再点击生成剪辑脚本匹配画面和时间戳。",
+    "Narration copy generated successfully": "解说文案已生成，可先审核修改",
+    "生成短剧解说脚本": "生成短剧解说脚本",
+    "请输入视频脚本": "请输入视频脚本",
+    "自定义片段": "自定义片段",
+    "设置需要生成的短视频片段数量": "设置需要生成的短视频片段数量",
+    "原生Gemini模型连接成功": "原生 Gemini 模型连接成功",
+    "原生Gemini模型连接失败": "原生 Gemini 模型连接失败",
+    "OpenAI兼容Gemini代理连接成功": "OpenAI 兼容 Gemini 代理连接成功",
+    "OpenAI兼容Gemini代理连接失败": "OpenAI 兼容 Gemini 代理连接失败",
+    "Progress": "进度",
+    "Generating script...": "正在生成脚本...",
+    "Please select video file first": "请先选择视频文件",
+    "Extracting keyframes...": "正在提取关键帧...",
+    "Script generation completed": "脚本生成完成",
+    "Script generation completed!": "🎉 脚本生成完成！",
+    "Video script generated successfully": "✅ 视频脚本生成成功！",
+    "Generation error": "❌ 生成过程中发生错误",
+    "Please upload subtitle file first": "请先上传字幕文件",
+    "Video": "视频",
+    "Subtitle": "字幕",
+    "Preparing script generation": "开始准备生成脚本",
+    "Script generation failed check logs": "生成脚本失败，请检查日志",
+    "Parsing subtitles...": "正在解析字幕...",
+    "Analyzing subtitles with model...": "正在等待模型分析字幕...",
+    "Subtitle file does not exist": "字幕文件不存在",
+    "Subtitle file is empty or unreadable": "字幕文件内容为空或无法读取",
+    "Generating narration copy...": "正在生成文案...",
+    "Generated narration copy is empty": "生成的解说文案为空",
+    "Please generate and review narration copy first": "请先生成并审核解说文案",
+    "Matching narration copy to footage...": "正在根据解说文案匹配画面和时间戳...",
+    "Waiting for model stream...": "正在等待模型流式输出...",
+    "Streaming unavailable fallback waiting...": "当前接口未返回流式内容，正在等待完整响应...",
+    "LLM stream window title": "模型思考 / 输出流",
+    "Model reasoning stream": "【模型思考】",
+    "Model output preview": "【模型输出预览】",
+    "Repairing narration script...": "正在修复解说脚本...",
+    "Generated narration JSON parse failed": "生成的解说文案格式错误，无法解析为 JSON",
+    "Generated narration missing items field": "生成的解说文案缺少必要的 'items' 字段",
+    "Generated narration validation failed": "生成的解说脚本校验失败",
+    "Preparing output...": "整理输出..."
  }
-}
+}
--- a/webui/tools/generate_script_docu.py
+++ b/webui/tools/generate_script_docu.py
@ -24,7 +24,7 @@ def _normalize_progress_value(progress: float | int) -> int:
    return max(0, min(100, int(round(value))))


-def generate_script_docu(params):
+def generate_script_docu(params, tr=lambda key: key):
    """
    生成纪录片视频脚本。
    要求: 原视频无字幕无配音
@ -39,12 +39,12 @@ def generate_script_docu(params):
        if message:
            status_text.text(f"🎬 {message}")
        else:
-            status_text.text(f"📊 进度: {normalized_progress}%")
+            status_text.text(f"📊 {tr('Progress')}: {normalized_progress}%")

    try:
-        with st.spinner("正在生成脚本..."):
+        with st.spinner(tr("Generating script...")):
            if not params.video_origin_path:
-                st.error("请先选择视频文件")
+                st.error(tr("Please select video file first"))
                return

            vision_llm_provider = (
@ -76,7 +76,7 @@ def generate_script_docu(params):
                "vision_max_concurrency", 2
            )

-            update_progress(10, "正在提取关键帧...")
+            update_progress(10, tr("Extracting keyframes..."))
            service = DocumentaryFrameAnalysisService()
            script_items = asyncio.run(
                service.generate_documentary_script(
@ -100,15 +100,15 @@ def generate_script_docu(params):
                st.session_state["video_clip_json"] = script
            elif isinstance(script, str):
                st.session_state["video_clip_json"] = json.loads(script)
-            update_progress(100, "脚本生成完成")
+            update_progress(100, tr("Script generation completed"))

        time.sleep(0.1)
        progress_bar.progress(100)
-        status_text.text("🎉 脚本生成完成！")
-        st.success("✅ 视频脚本生成成功！")
+        status_text.text(tr("Script generation completed!"))
+        st.success(tr("Video script generated successfully"))

    except Exception as err:
-        st.error(f"❌ 生成过程中发生错误: {str(err)}")
+        st.error(f"{tr('Generation error')}: {str(err)}")
        logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
    finally:
        time.sleep(2)
--- a/webui/tools/generate_script_short.py
+++ b/webui/tools/generate_script_short.py
@ -27,21 +27,21 @@ def generate_script_short(tr, params, custom_clips=5):
        if message:
            status_text.text(f"{progress}% - {message}")
        else:
-            status_text.text(f"进度: {progress}%")
+            status_text.text(f"{tr('Progress')}: {progress}%")

    try:
-        with st.spinner("正在生成脚本..."):
+        with st.spinner(tr("Generating script...")):
            # ========== 严格验证：必须上传视频和字幕（与短剧解说保持一致）==========
            # 1. 验证视频文件
            video_path = getattr(params, "video_origin_path", None)
            if not video_path or not str(video_path).strip():
-                st.error("请先选择视频文件")
+                st.error(tr("Please select video file first"))
                st.stop()

            try:
                ensure_existing_file(
                    str(video_path),
-                    label="视频",
+                    label=tr("Video"),
                    allowed_exts=(".mp4", ".mov", ".avi", ".flv", ".mkv"),
                )
            except InputValidationError as e:
@ -51,13 +51,13 @@ def generate_script_short(tr, params, custom_clips=5):
            # 2. 验证字幕文件（移除推断逻辑，必须上传）
            subtitle_path = st.session_state.get("subtitle_path")
            if not subtitle_path or not str(subtitle_path).strip():
-                st.error("请先上传字幕文件")
+                st.error(tr("Please upload subtitle file first"))
                st.stop()

            try:
                subtitle_path = ensure_existing_file(
                    str(subtitle_path),
-                    label="字幕",
+                    label=tr("Subtitle"),
                    allowed_exts=(".srt",),
                )
            except InputValidationError as e:
@ -78,7 +78,7 @@ def generate_script_short(tr, params, custom_clips=5):
            vision_model = st.session_state.get(f'vision_{vision_llm_provider}_model_name') or config.app.get(f'vision_{vision_llm_provider}_model_name', "")
            vision_base_url = st.session_state.get(f'vision_{vision_llm_provider}_base_url') or config.app.get(f'vision_{vision_llm_provider}_base_url', "")

-            update_progress(20, "开始准备生成脚本")
+            update_progress(20, tr("Preparing script generation"))

            # ========== 调用后端生成脚本 ==========
            from app.services.SDP.generate_script_short import generate_script_result
@ -103,7 +103,7 @@ def generate_script_short(tr, params, custom_clips=5):
            )

            if result.get("status") != "success":
-                st.error(result.get("message", "生成脚本失败，请检查日志"))
+                st.error(result.get("message", tr("Script generation failed check logs")))
                st.stop()

            script = result.get("script")
@ -114,14 +114,14 @@ def generate_script_short(tr, params, custom_clips=5):
            elif isinstance(script, str):
                st.session_state['video_clip_json'] = json.loads(script)

-            update_progress(80, "脚本生成完成")
+            update_progress(80, tr("Script generation completed"))

        time.sleep(0.1)
        progress_bar.progress(100)
-        status_text.text("脚本生成完成！")
-        st.success("视频脚本生成成功！")
+        status_text.text(tr("Script generation completed!"))
+        st.success(tr("Video script generated successfully"))

    except Exception as err:
        progress_bar.progress(100)
-        st.error(f"生成过程中发生错误: {str(err)}")
+        st.error(f"{tr('Generation error')}: {str(err)}")
        logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
--- a/webui/tools/generate_short_summary.py
+++ b/webui/tools/generate_short_summary.py
@ -11,18 +11,92 @@ import os
 import json
 import time
 import traceback
+import html
 import streamlit as st
 from loguru import logger

 from app.config import config
-from app.services.SDE.short_drama_explanation import analyze_subtitle, generate_narration_script
+from app.services.SDE.short_drama_explanation import (
+    analyze_subtitle,
+    generate_narration_copy as generate_narration_copy_legacy,
+    match_narration_copy_to_script as match_narration_copy_to_script_legacy,
+)
 from app.services.subtitle_text import read_subtitle_text
+from app.services.short_drama_narration_validation import (
+    normalize_script_video_sources,
+)
+from app.services.tavily_search import TavilySearchError, format_search_context, search_story_context
 # 导入新的LLM服务模块 - 确保提供商被注册
 import app.services.llm  # 这会触发提供商注册
 from app.services.llm.migration_adapter import SubtitleAnalyzerAdapter
 import re


+PUBLIC_SCRIPT_FIELDS = ["_id", "video_id", "video_name", "timestamp", "picture", "narration", "OST"]
+SHORT_DRAMA_PROMPT_CATEGORY = "short_drama_narration"
+FILM_TV_PROMPT_CATEGORY = "film_tv_narration"
+SHORT_DRAMA_SEARCH_KEYWORDS = "短剧 剧情 介绍 人物 结局"
+FILM_TV_SEARCH_KEYWORDS = "影视 剧情 介绍 人物 结局 电影 电视剧"
+
+
+def _normalize_paths(paths):
+    if isinstance(paths, str):
+        paths = [paths]
+    if not paths:
+        return []
+
+    normalized_paths = []
+    seen = set()
+    for path in paths:
+        if not isinstance(path, str):
+            continue
+        path = path.strip()
+        if not path or path in seen:
+            continue
+        normalized_paths.append(path)
+        seen.add(path)
+    return normalized_paths
+
+
+def _build_combined_subtitle_content(subtitle_paths, video_paths=None):
+    sections = []
+    video_paths = _normalize_paths(video_paths)
+    for index, subtitle_path in enumerate(_normalize_paths(subtitle_paths), start=1):
+        if not os.path.exists(subtitle_path):
+            continue
+
+        video_path = video_paths[index - 1] if index <= len(video_paths) else ""
+        if video_path:
+            header = (
+                f"# 视频 {index}: {os.path.basename(video_path)}\n"
+                f"字幕文件: {os.path.basename(subtitle_path)}"
+            )
+        else:
+            header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
+        sections.append(f"{header}\n{read_subtitle_text(subtitle_path).text}".strip())
+
+    return "\n\n".join(sections)
+
+
+def _normalize_narration_items_video_sources(items, video_paths):
+    return normalize_script_video_sources(items, _normalize_paths(video_paths))
+
+
+def _strip_planner_only_fields(items):
+    return [
+        {field: item[field] for field in PUBLIC_SCRIPT_FIELDS if field in item}
+        for item in items
+        if isinstance(item, dict)
+    ]
+
+
+def _format_progress_status(progress, message: str = "", tr=lambda key: key):
+    message = str(message or "").strip()
+    if message:
+        return message
+    return f"{tr('Progress')}: {progress}%"
+
+
 def parse_and_fix_json(json_string):
    """
    解析并修复JSON字符串
@ -114,55 +188,382 @@ def parse_and_fix_json(json_string):
        logger.debug(f"综合修复失败: {e}")
        pass

-    # 如果所有方法都失败，尝试创建一个基本的结构
+    # 如果所有方法都失败，直接返回 None，避免生成不可剪辑的默认假脚本
    logger.error(f"所有JSON解析方法都失败，原始内容: {json_string[:200]}...")
+    return None
+
+
+def _get_tavily_api_key() -> str:
+    return (
+        st.session_state.get("tavily_api_key")
+        or config.app.get("tavily_api_key")
+        or ""
+    ).strip()
+
+
+def _build_tavily_context(
+    title: str,
+    tr=lambda key: key,
+    search_keywords: str = SHORT_DRAMA_SEARCH_KEYWORDS,
+    empty_title_message_key: str = "Please enter short drama name before web search",
+) -> str | None:
+    title = str(title or "").strip()
+    if not title:
+        st.error(tr(empty_title_message_key))
+        return None
+
+    api_key = _get_tavily_api_key()
+    if not api_key:
+        st.error(tr("Please configure Tavily API Key in Basic Settings"))
+        return None

-    # 尝试从文本中提取关键信息创建基本结构
    try:
-        # 这是一个简单的回退方案
-        return {
-            "items": [
-                {
-                    "_id": 1,
-                    "timestamp": "00:00:00,000-00:00:10,000",
-                    "picture": "解析失败，使用默认内容",
-                    "narration": json_string[:100] + "..." if len(json_string) > 100 else json_string,
-                    "OST": 0
-                }
-            ]
-        }
-    except Exception:
+        search_data = search_story_context(
+            title,
+            api_key,
+            search_keywords=search_keywords,
+            empty_name_message=tr(empty_title_message_key),
+            search_depth=config.app.get("tavily_search_depth", "basic"),
+            max_results=config.app.get("tavily_max_results", 5),
+        )
+        return format_search_context(search_data)
+    except TavilySearchError as e:
+        logger.error(f"Tavily 短剧检索失败: {str(e)}")
+        st.error(f"{tr('Tavily search failed')}: {str(e)}")
+        return None
+    except Exception as e:
+        logger.error(f"Tavily 短剧检索异常: {traceback.format_exc()}")
+        st.error(f"{tr('Tavily search failed')}: {str(e)}")
        return None


-def generate_script_short_sunmmary(params, subtitle_path, video_theme, temperature):
+def _build_plot_analysis_input(
+    subtitle_content: str,
+    short_name: str = "",
+    enable_web_search: bool = False,
+    tr=lambda key: key,
+    search_keywords: str = SHORT_DRAMA_SEARCH_KEYWORDS,
+    empty_title_message_key: str = "Please enter short drama name before web search",
+    web_search_context_description: str = "短剧名称、人物关系、剧情背景和公开剧情梗概",
+) -> str | None:
+    subtitle_content = str(subtitle_content or "").strip()
+    if not enable_web_search:
+        return subtitle_content
+
+    tavily_context = _build_tavily_context(
+        short_name,
+        tr,
+        search_keywords=search_keywords,
+        empty_title_message_key=empty_title_message_key,
+    )
+    if tavily_context is None:
+        return None
+
+    return f"""# 分析补充说明
+请先参考 Tavily 联网检索结果理解{web_search_context_description}，再结合原始字幕完成剧情理解。
+如果联网检索结果与字幕内容冲突，请以字幕内容为准；时间戳必须只从字幕内容中提取。
+
+{tavily_context}
+
+# 原始字幕
+{subtitle_content}"""
+
+
+def analyze_short_drama_plot(
+    subtitle_path,
+    temperature,
+    tr=lambda key: key,
+    subtitle_content=None,
+    short_name: str = "",
+    enable_web_search: bool = False,
+    video_paths=None,
+    prompt_category: str = SHORT_DRAMA_PROMPT_CATEGORY,
+    search_keywords: str = SHORT_DRAMA_SEARCH_KEYWORDS,
+    empty_title_message_key: str = "Please enter short drama name before web search",
+    web_search_context_description: str = "短剧名称、人物关系、剧情背景和公开剧情梗概",
+):
+    """仅执行短剧字幕剧情理解，返回可编辑的剧情分析文本。"""
+    subtitle_paths = _normalize_paths(subtitle_path)
+    if not subtitle_paths:
+        st.error(tr("Please generate or upload subtitles first"))
+        return None
+    missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
+    if missing_subtitle_paths:
+        st.error(tr("Subtitle file does not exist"))
+        return None
+
+    text_provider = config.app.get('text_llm_provider', 'gemini').lower()
+    text_api_key = config.app.get(f'text_{text_provider}_api_key')
+    text_model = config.app.get(f'text_{text_provider}_model_name')
+    text_base_url = config.app.get(f'text_{text_provider}_base_url')
+
+    subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
+        subtitle_paths,
+        video_paths,
+    )
+    if not subtitle_content:
+        st.error(tr("Subtitle file is empty or unreadable"))
+        return None
+
+    plot_analysis_input = _build_plot_analysis_input(
+        subtitle_content,
+        short_name=short_name,
+        enable_web_search=enable_web_search,
+        tr=tr,
+        search_keywords=search_keywords,
+        empty_title_message_key=empty_title_message_key,
+        web_search_context_description=web_search_context_description,
+    )
+    if plot_analysis_input is None:
+        return None
+
+    try:
+        logger.info("使用新的LLM服务架构进行字幕分析")
+        analyzer = SubtitleAnalyzerAdapter(
+            text_api_key,
+            text_model,
+            text_base_url,
+            text_provider,
+            prompt_category=prompt_category,
+        )
+        analysis_result = analyzer.analyze_subtitle(plot_analysis_input)
+    except Exception as e:
+        logger.warning(f"使用新LLM服务失败，回退到旧实现: {str(e)}")
+        analysis_result = analyze_subtitle(
+            subtitle_content=plot_analysis_input,
+            api_key=text_api_key,
+            model=text_model,
+            base_url=text_base_url,
+            save_result=True,
+            temperature=temperature,
+            provider=text_provider,
+            prompt_category=prompt_category,
+        )
+
+    if analysis_result["status"] != "success":
+        logger.error(f"分析失败: {analysis_result['message']}")
+        st.error(tr("Script generation failed check logs"))
+        return None
+
+    return analysis_result["analysis"]
+
+
+def generate_short_drama_narration_copy(
+    subtitle_path,
+    video_theme,
+    temperature,
+    tr=lambda key: key,
+    plot_analysis=None,
+    subtitle_content=None,
+    enable_web_search: bool = False,
+    video_paths=None,
+    narration_language: str = "简体中文（中国）",
+    drama_genre: str = "逆袭/复仇",
+    prompt_category: str = SHORT_DRAMA_PROMPT_CATEGORY,
+    search_keywords: str = SHORT_DRAMA_SEARCH_KEYWORDS,
+    empty_title_message_key: str = "Please enter short drama name before web search",
+    web_search_context_description: str = "短剧名称、人物关系、剧情背景和公开剧情梗概",
+):
+    """生成可由用户审核修改的短剧解说正文，不绑定时间戳。"""
+    subtitle_paths = _normalize_paths(subtitle_path)
+    if not subtitle_paths:
+        st.error(tr("Please generate or upload subtitles first"))
+        return None
+    missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
+    if missing_subtitle_paths:
+        st.error(tr("Subtitle file does not exist"))
+        return None
+
+    selected_video_paths = _normalize_paths(video_paths)
+    subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
+        subtitle_paths,
+        selected_video_paths,
+    )
+    if not subtitle_content:
+        st.error(tr("Subtitle file is empty or unreadable"))
+        return None
+
+    analysis_text = str(plot_analysis or "").strip()
+    if not analysis_text:
+        analysis_text = analyze_short_drama_plot(
+            subtitle_paths,
+            temperature,
+            tr,
+            subtitle_content=subtitle_content,
+            short_name=video_theme,
+            enable_web_search=enable_web_search,
+            video_paths=selected_video_paths,
+            prompt_category=prompt_category,
+            search_keywords=search_keywords,
+            empty_title_message_key=empty_title_message_key,
+            web_search_context_description=web_search_context_description,
+        )
+        if not analysis_text:
+            return None
+
+    text_provider = config.app.get('text_llm_provider', 'gemini').lower()
+    text_api_key = config.app.get(f'text_{text_provider}_api_key')
+    text_model = config.app.get(f'text_{text_provider}_model_name')
+    text_base_url = config.app.get(f'text_{text_provider}_base_url')
+
+    try:
+        logger.info("使用新的LLM服务架构生成可审核解说文案")
+        analyzer = SubtitleAnalyzerAdapter(
+            text_api_key,
+            text_model,
+            text_base_url,
+            text_provider,
+            prompt_category=prompt_category,
+        )
+        narration_result = analyzer.generate_narration_copy(
+            short_name=video_theme,
+            plot_analysis=analysis_text,
+            subtitle_content=subtitle_content,
+            temperature=temperature,
+            narration_language=narration_language,
+            drama_genre=drama_genre,
+        )
+    except Exception as e:
+        logger.warning(f"使用新LLM服务生成文案失败，回退到旧实现: {str(e)}")
+        narration_result = generate_narration_copy_legacy(
+            short_name=video_theme,
+            plot_analysis=analysis_text,
+            subtitle_content=subtitle_content,
+            api_key=text_api_key,
+            model=text_model,
+            base_url=text_base_url,
+            temperature=temperature,
+            provider=text_provider,
+            narration_language=narration_language,
+            drama_genre=drama_genre,
+            prompt_category=prompt_category,
+        )
+
+    if narration_result.get("status") != "success":
+        logger.error(f"解说文案正文生成失败: {narration_result.get('message')}")
+        st.error(tr("Script generation failed check logs"))
+        return None
+
+    narration_copy = str(narration_result.get("narration_copy", "")).strip()
+    if not narration_copy:
+        logger.error("模型返回空解说文案正文")
+        st.error(tr("Generated narration copy is empty"))
+        return None
+
+    return {
+        "narration_copy": narration_copy,
+        "plot_analysis": analysis_text,
+        "subtitle_content": subtitle_content,
+    }
+
+
+def generate_script_short_sunmmary(
+    params,
+    subtitle_path,
+    video_theme,
+    temperature,
+    tr=lambda key: key,
+    plot_analysis=None,
+    subtitle_content=None,
+    enable_web_search: bool = False,
+    video_paths=None,
+    narration_language: str = "简体中文（中国）",
+    narration_copy: str = "",
+    drama_genre: str = "逆袭/复仇",
+    original_sound_ratio: int = 30,
+    prompt_category: str = SHORT_DRAMA_PROMPT_CATEGORY,
+    search_keywords: str = SHORT_DRAMA_SEARCH_KEYWORDS,
+    empty_title_message_key: str = "Please enter short drama name before web search",
+    web_search_context_description: str = "短剧名称、人物关系、剧情背景和公开剧情梗概",
+):
    """
    生成 短剧解说 视频脚本
    要求: 提供高质量短剧字幕
    适合场景: 短剧
    """
-    progress_bar = st.progress(0)
+    progress_bar = st.empty()
    status_text = st.empty()
+    stream_text = st.empty()
+    stream_state = {
+        "reasoning": "",
+        "content": "",
+        "last_update": 0.0,
+    }

    def update_progress(progress: float, message: str = ""):
        progress_bar.progress(progress)
+        status_text.text(_format_progress_status(progress, message, tr))
+
+    def update_waiting(message: str = ""):
+        progress_bar.empty()
        if message:
-            status_text.text(f"{progress}% - {message}")
+            status_text.text(message)
        else:
-            status_text.text(f"进度: {progress}%")
+            status_text.empty()
+
+    def update_stream_window(event):
+        event = event or {}
+        chunk_type = str(event.get("type") or "content")
+        chunk_text = str(event.get("text") or "")
+        if chunk_type == "done" or not chunk_text:
+            return
+
+        bucket = "reasoning" if chunk_type == "reasoning" else "content"
+        stream_state[bucket] += chunk_text
+
+        now = time.time()
+        if now - stream_state["last_update"] < 0.12:
+            return
+        stream_state["last_update"] = now
+
+        blocks = []
+        if stream_state["reasoning"].strip():
+            blocks.append(
+                f"{tr('Model reasoning stream')}\n"
+                f"{stream_state['reasoning'][-900:]}"
+            )
+        if stream_state["content"].strip():
+            blocks.append(
+                f"{tr('Model output preview')}\n"
+                f"{stream_state['content'][-900:]}"
+            )
+
+        preview = "\n\n".join(blocks)[-1800:]
+        escaped_preview = html.escape(preview)
+        stream_text.markdown(
+            f"""
+            <div style="height:150px; overflow:hidden; border:1px solid #e5e7eb;
+                        border-radius:8px; padding:10px 12px; background:#f8fafc;
+                        color:#334155;">
+              <div style="font-size:12px; font-weight:600; color:#64748b; margin-bottom:6px;">
+                {html.escape(tr('LLM stream window title'))}
+              </div>
+              <pre style="white-space:pre-wrap; margin:0; font-size:12px; line-height:1.45;
+                          font-family:ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, monospace;">{escaped_preview}</pre>
+            </div>
+            """,
+            unsafe_allow_html=True,
+        )

    try:
-        with st.spinner("正在生成脚本..."):
-            if not params.video_origin_path:
-                st.error("请先选择视频文件")
+        with st.spinner(tr("Generating script...")):
+            selected_video_paths = _normalize_paths(
+                video_paths
+                or getattr(params, "video_origin_paths", [])
+                or getattr(params, "video_origin_path", "")
+            )
+            if not selected_video_paths:
+                st.error(tr("Please select video file first"))
                return
            """
            1. 获取字幕
            """
-            update_progress(30, "正在解析字幕...")
+            update_progress(30, tr("Parsing subtitles..."))
            # 判断字幕文件是否存在
-            if not os.path.exists(subtitle_path):
-                st.error("字幕文件不存在")
+            subtitle_paths = _normalize_paths(subtitle_path)
+            missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
+            if not subtitle_paths or missing_subtitle_paths:
+                st.error(tr("Subtitle file does not exist"))
                return

            """
@ -174,72 +575,118 @@ def generate_script_short_sunmmary(params, subtitle_path, video_theme, temperatu
            text_base_url = config.app.get(f'text_{text_provider}_base_url')

            # 读取字幕文件内容（无论使用哪种实现都需要）
-            subtitle_content = read_subtitle_text(subtitle_path).text
+            subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
+                subtitle_paths,
+                selected_video_paths,
+            )
            if not subtitle_content:
-                st.error("字幕文件内容为空或无法读取")
+                st.error(tr("Subtitle file is empty or unreadable"))
                return

-            try:
-                # 优先使用新的LLM服务架构
-                logger.info("使用新的LLM服务架构进行字幕分析")
-                analyzer = SubtitleAnalyzerAdapter(text_api_key, text_model, text_base_url, text_provider)
+            narration_copy = str(narration_copy or "").strip()
+            if not narration_copy:
+                st.error(tr("Please generate and review narration copy first"))
+                return

-                analysis_result = analyzer.analyze_subtitle(subtitle_content)
-
-            except Exception as e:
-                logger.warning(f"使用新LLM服务失败，回退到旧实现: {str(e)}")
-                # 回退到旧的实现
-                analysis_result = analyze_subtitle(
-                    subtitle_file_path=subtitle_path,
-                    api_key=text_api_key,
-                    model=text_model,
-                    base_url=text_base_url,
-                    save_result=True,
-                    temperature=temperature,
-                    provider=text_provider
-                )
-            """
-            3. 根据剧情生成解说文案
-            """
-            if analysis_result["status"] == "success":
-                logger.info("字幕分析成功！")
-                update_progress(60, "正在生成文案...")
-
-                # 根据剧情生成解说文案 - 使用新的LLM服务架构
+            analyzer = SubtitleAnalyzerAdapter(
+                text_api_key,
+                text_model,
+                text_base_url,
+                text_provider,
+                prompt_category=prompt_category,
+            )
+            if plot_analysis and str(plot_analysis).strip():
+                logger.info("使用用户编辑后的剧情理解结果匹配剪辑脚本")
+                analysis_result = {
+                    "status": "success",
+                    "analysis": str(plot_analysis).strip(),
+                }
+            else:
+                plot_analysis_input = subtitle_content
+                if enable_web_search:
+                    update_waiting(tr("Searching short drama with Tavily..."))
+                    plot_analysis_input = _build_plot_analysis_input(
+                        subtitle_content,
+                        short_name=video_theme,
+                        enable_web_search=True,
+                        tr=tr,
+                        search_keywords=search_keywords,
+                        empty_title_message_key=empty_title_message_key,
+                        web_search_context_description=web_search_context_description,
+                    )
+                    if plot_analysis_input is None:
+                        return
                try:
                    # 优先使用新的LLM服务架构
-                    logger.info("使用新的LLM服务架构生成解说文案")
-                    narration_result = analyzer.generate_narration_script(
-                        short_name=video_theme,
-                        plot_analysis=analysis_result["analysis"],
-                        subtitle_content=subtitle_content,  # 传递原始字幕内容
-                        temperature=temperature
-                    )
+                    logger.info("使用新的LLM服务架构进行字幕分析")
+                    update_waiting(tr("Analyzing subtitles with model..."))
+                    analysis_result = analyzer.analyze_subtitle(plot_analysis_input)
+
                except Exception as e:
                    logger.warning(f"使用新LLM服务失败，回退到旧实现: {str(e)}")
                    # 回退到旧的实现
-                    narration_result = generate_narration_script(
-                        short_name=video_theme,
-                        plot_analysis=analysis_result["analysis"],
-                        subtitle_content=subtitle_content,  # 传递原始字幕内容
+                    update_waiting(tr("Analyzing subtitles with model..."))
+                    analysis_result = analyze_subtitle(
+                        subtitle_content=plot_analysis_input,
                        api_key=text_api_key,
                        model=text_model,
                        base_url=text_base_url,
                        save_result=True,
                        temperature=temperature,
-                        provider=text_provider
+                        provider=text_provider,
+                        prompt_category=prompt_category,
+                    )
+            """
+            3. 根据用户审核后的文案匹配画面与时间戳
+            """
+            if analysis_result["status"] == "success":
+                logger.info("字幕分析成功！")
+                update_waiting()
+
+                try:
+                    logger.info("使用新的LLM服务架构将审核文案匹配到字幕画面")
+                    update_waiting(tr("Matching narration copy to footage..."))
+                    stream_text.info(tr("Waiting for model stream..."))
+                    narration_result = analyzer.match_narration_copy_to_script(
+                        short_name=video_theme,
+                        plot_analysis=analysis_result["analysis"],
+                        subtitle_content=subtitle_content,
+                        narration_copy=narration_copy,
+                        temperature=temperature,
+                        narration_language=narration_language,
+                        drama_genre=drama_genre,
+                        original_sound_ratio=original_sound_ratio,
+                        stream_callback=update_stream_window,
+                    )
+                except Exception as e:
+                    logger.warning(f"使用新LLM服务匹配画面失败，回退到旧实现: {str(e)}")
+                    stream_text.info(tr("Streaming unavailable fallback waiting..."))
+                    narration_result = match_narration_copy_to_script_legacy(
+                        short_name=video_theme,
+                        plot_analysis=analysis_result["analysis"],
+                        subtitle_content=subtitle_content,
+                        narration_copy=narration_copy,
+                        api_key=text_api_key,
+                        model=text_model,
+                        base_url=text_base_url,
+                        temperature=temperature,
+                        provider=text_provider,
+                        narration_language=narration_language,
+                        drama_genre=drama_genre,
+                        original_sound_ratio=original_sound_ratio,
+                        prompt_category=prompt_category,
                    )

                if narration_result["status"] == "success":
-                    logger.info("\n解说文案生成成功！")
+                    logger.info("\n剪辑脚本匹配成功！")
                    logger.info(narration_result["narration_script"])
                else:
-                    logger.info(f"\n解说文案生成失败: {narration_result['message']}")
-                    st.error("生成脚本失败，请检查日志")
+                    logger.info(f"\n剪辑脚本匹配失败: {narration_result['message']}")
+                    st.error(tr("Script generation failed check logs"))
                    st.stop()
            else:
                logger.error(f"分析失败: {analysis_result['message']}")
-                st.error("生成脚本失败，请检查日志")
+                st.error(tr("Script generation failed check logs"))
                st.stop()

            """
@ -253,37 +700,43 @@ def generate_script_short_sunmmary(params, subtitle_path, video_theme, temperatu
            # 增强JSON解析，包含错误处理和修复
            narration_dict = parse_and_fix_json(narration_script)
            if narration_dict is None:
-                st.error("生成的解说文案格式错误，无法解析为JSON")
+                st.error(tr("Generated narration JSON parse failed"))
                logger.error(f"JSON解析失败，原始内容: {narration_script}")
                st.stop()

            # 验证JSON结构
            if 'items' not in narration_dict:
-                st.error("生成的解说文案缺少必要的'items'字段")
+                st.error(tr("Generated narration missing items field"))
                logger.error(f"JSON结构错误，缺少items字段: {narration_dict}")
                st.stop()

-            script = json.dumps(narration_dict['items'], ensure_ascii=False, indent=2)
+            narration_items = _normalize_narration_items_video_sources(
+                narration_dict['items'],
+                selected_video_paths,
+            )
+            narration_items = _strip_planner_only_fields(narration_items)
+            script = json.dumps(narration_items, ensure_ascii=False, indent=2)

            if script is None:
-                st.error("生成脚本失败，请检查日志")
+                st.error(tr("Script generation failed check logs"))
                st.stop()
            logger.success(f"剪辑脚本生成完成")
            if isinstance(script, list):
                st.session_state['video_clip_json'] = script
            elif isinstance(script, str):
                st.session_state['video_clip_json'] = json.loads(script)
-            update_progress(90, "整理输出...")
+            update_progress(90, tr("Preparing output..."))

        time.sleep(0.1)
        progress_bar.progress(100)
-        status_text.text("脚本生成完成！")
-        st.success("视频脚本生成成功！")
+        status_text.text(tr("Script generation completed!"))
+        st.success(tr("Video script generated successfully"))

    except Exception as err:
-        st.error(f"生成过程中发生错误: {str(err)}")
+        st.error(f"{tr('Generation error')}: {str(err)}")
        logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}")
    finally:
        time.sleep(2)
        progress_bar.empty()
        status_text.empty()
+        stream_text.empty()
--- a/webui/tools/test_generate_short_summary_unittest.py
+++ b/webui/tools/test_generate_short_summary_unittest.py
@ -0,0 +1,27 @@
+import unittest
+
+from webui.tools.generate_short_summary import _format_progress_status, parse_and_fix_json
+
+
+class GenerateShortSummaryJsonTests(unittest.TestCase):
+    def test_progress_message_does_not_prefix_fake_percentage(self):
+        status = _format_progress_status(60, "正在生成文案...")
+
+        self.assertEqual("正在生成文案...", status)
+        self.assertNotIn("60%", status)
+
+    def test_invalid_json_does_not_create_default_fake_script(self):
+        self.assertIsNone(parse_and_fix_json("not a json response"))
+
+    def test_json_code_block_is_parsed(self):
+        parsed = parse_and_fix_json(
+            """```json
+{"items": [{"_id": 1, "timestamp": "00:00:01,000-00:00:02,000"}]}
+```"""
+        )
+
+        self.assertEqual(1, parsed["items"][0]["_id"])
+
+
+if __name__ == "__main__":
+    unittest.main()
 @ -1 +1 @@
 .7.9
 .8.1