From 342fc15f3bf3beb515655de540d9a259bc9cf337 Mon Sep 17 00:00:00 2001
From: viccy <linyqemail@gmail.com>
Date: Sun, 7 Jun 2026 01:24:32 +0800
Subject: [PATCH] =?UTF-8?q?feat(tts,search,video):=20=E6=96=B0=E5=A2=9EOmn?=
 =?UTF-8?q?iVoice=20TTS=E3=80=81=E8=81=94=E7=BD=91=E6=90=9C=E7=B4=A2?=
 =?UTF-8?q?=E4=B8=8E=E5=A4=9A=E8=A7=86=E9=A2=91=E5=89=AA=E8=BE=91=E6=94=AF?=
 =?UTF-8?q?=E6=8C=81?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

新增OmniVoice语音合成引擎全流程支持，包含配置项、WebUI界面与服务实现
集成Tavily联网搜索能力，支持短剧剧情分析前自动检索剧情背景信息
新增多视频源剪辑支持，完善脚本校验规则并重构剪辑逻辑适配多视频路径
重构LLM剧情分析Prompt，优化输出格式适配多场景与联网检索结果
调整streamlit版本至1.56.0修复兼容性问题
新增相关单元测试与多语言翻译，更新配置示例文件
---
 app/config/config.py                          |   5 +
 app/config/defaults.py                        |   3 +
 app/services/clip_video.py                    | 114 +++++++--
 app/services/jianying_task.py                 |   9 +-
 app/services/llm/unified_service.py           |  15 +-
 app/services/llm/validators.py                |  12 +
 .../short_drama_narration/plot_analysis.py    | 105 +++++----
 .../script_generation.py                      |  25 +-
 app/services/task.py                          |   2 +
 app/services/tavily_search.py                 | 116 +++++++++
 app/services/test_jianying_task_unittest.py   |  17 ++
 ...est_multi_video_script_sources_unittest.py |  84 +++++++
 app/services/voice.py                         | 165 ++++++++++++-
 app/utils/check_script.py                     |  17 ++
 config.example.toml                           |  32 ++-
 requirements.txt                              |   2 +-
 webui.py                                      |   7 +
 webui/components/audio_settings.py            | 192 ++++++++++++++-
 webui/components/basic_settings.py            |  27 +++
 webui/components/script_settings.py           | 170 +++++++++++--
 webui/components/subtitle_settings.py         |   2 +-
 webui/i18n/en.json                            |  42 ++++
 webui/i18n/zh.json                            |  42 ++++
 webui/tools/generate_short_summary.py         | 223 +++++++++++++++++-
 24 files changed, 1320 insertions(+), 108 deletions(-)
 create mode 100644 app/services/tavily_search.py
 create mode 100644 app/services/test_multi_video_script_sources_unittest.py

diff --git a/app/config/config.py b/app/config/config.py
index ae19945..de17645 100644
--- a/app/config/config.py
+++ b/app/config/config.py
@@ -13,8 +13,11 @@ INDEXTTS_ENGINE = "indextts"
 INDEXTTS_DISPLAY_NAME = "IndexTTS-1.5"
 INDEXTTS2_ENGINE = "indextts2"
 INDEXTTS2_DISPLAY_NAME = "IndexTTS-2"
+OMNIVOICE_ENGINE = "omnivoice"
+OMNIVOICE_DISPLAY_NAME = "OmniVoice"
 INDEXTTS_VOICE_PREFIX = f"{INDEXTTS_ENGINE}:"
 INDEXTTS2_VOICE_PREFIX = f"{INDEXTTS2_ENGINE}:"
+OMNIVOICE_VOICE_PREFIX = f"{OMNIVOICE_ENGINE}:"
 
 
 def normalize_tts_engine_name(tts_engine: str) -> str:
@@ -131,6 +134,7 @@ def save_config():
         _cfg["fun_asr"] = fun_asr
         _cfg["indextts"] = indextts
         _cfg["indextts2"] = indextts2
+        _cfg["omnivoice"] = omnivoice
         _cfg["doubaotts"] = doubaotts
         f.write(toml.dumps(_cfg))
 
@@ -148,6 +152,7 @@ tts_qwen = _cfg.get("tts_qwen", {})
 fun_asr = _cfg.get("fun_asr", {})
 indextts = _cfg.get("indextts", {})
 indextts2 = _cfg.get("indextts2", {})
+omnivoice = _cfg.get("omnivoice", {})
 doubaotts = _cfg.get("doubaotts", {})
 
 hostname = socket.gethostname()
diff --git a/app/config/defaults.py b/app/config/defaults.py
index a001978..9f648fa 100644
--- a/app/config/defaults.py
+++ b/app/config/defaults.py
@@ -35,6 +35,9 @@ DEFAULT_LLM_APP_CONFIG = {
     "text_openai_model_name": DEFAULT_TEXT_OPENAI_MODEL_NAME,
     "text_openai_api_key": "",
     "text_openai_base_url": DEFAULT_OPENAI_COMPATIBLE_BASE_URL,
+    "tavily_api_key": "",
+    "tavily_search_depth": "basic",
+    "tavily_max_results": 5,
 }
 DEFAULT_LLM_APP_CONFIG.update(DEFAULT_LLM_GENERATION_APP_CONFIG)
 
diff --git a/app/services/clip_video.py b/app/services/clip_video.py
index 8455703..93f9ddd 100644
--- a/app/services/clip_video.py
+++ b/app/services/clip_video.py
@@ -32,6 +32,82 @@ def parse_timestamp(timestamp: str) -> tuple:
     return start_time, end_time
 
 
+def _normalize_video_origin_paths(
+    video_origin_path: str,
+    video_origin_paths: Optional[List[str]] = None,
+) -> List[str]:
+    paths = []
+    if video_origin_paths:
+        paths.extend(video_origin_paths)
+    if video_origin_path:
+        paths.insert(0, video_origin_path)
+
+    normalized_paths = []
+    seen = set()
+    for item in paths:
+        if not isinstance(item, str):
+            continue
+        item = item.strip()
+        if not item or item in seen:
+            continue
+        normalized_paths.append(item)
+        seen.add(item)
+    return normalized_paths
+
+
+def _coerce_video_id(value) -> Optional[int]:
+    try:
+        video_id = int(value)
+    except (TypeError, ValueError):
+        return None
+    return video_id if video_id > 0 else None
+
+
+def _match_video_id_by_name(video_name: str, video_origin_paths: List[str]) -> Optional[int]:
+    video_name = str(video_name or "").strip()
+    if not video_name:
+        return None
+
+    expected_name = os.path.basename(video_name)
+    for index, video_path in enumerate(video_origin_paths, start=1):
+        if os.path.basename(video_path) == expected_name:
+            return index
+    return None
+
+
+def _resolve_script_video_path(script_item: Dict, video_origin_paths: List[str]) -> str:
+    explicit_path = (
+        script_item.get("source_video_path")
+        or script_item.get("video_origin_path")
+        or script_item.get("origin_video_path")
+    )
+    if explicit_path and os.path.exists(explicit_path):
+        return explicit_path
+
+    video_id = _coerce_video_id(script_item.get("video_id") or script_item.get("video_index"))
+    matched_video_id = _match_video_id_by_name(
+        script_item.get("video_name") or script_item.get("source_video"),
+        video_origin_paths,
+    )
+    if matched_video_id:
+        video_id = matched_video_id
+
+    if video_id is not None:
+        if video_id <= len(video_origin_paths):
+            return video_origin_paths[video_id - 1]
+        logger.warning(
+            f"片段 {script_item.get('_id')} 的 video_id={video_id} 超出视频数量 "
+            f"{len(video_origin_paths)}，默认使用第一个视频"
+        )
+
+    return video_origin_paths[0]
+
+
+def _safe_output_id(value) -> str:
+    safe_value = str(value if value is not None else "unknown")
+    return "".join(char if char.isalnum() or char in ("-", "_") else "_" for char in safe_value)
+
+
 def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
     """
     根据开始时间和持续时间计算结束时间
@@ -579,7 +655,7 @@ def _process_narration_only_segment(
     # 生成输出文件名
     safe_start_time = start_time.replace(':', '-').replace(',', '-')
     safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost0_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost0_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
     output_path = os.path.join(output_dir, output_filename)
 
     # 构建FFmpeg命令 - 移除音频
@@ -622,7 +698,7 @@ def _process_original_audio_segment(
     # 生成输出文件名
     safe_start_time = start_time.replace(':', '-').replace(',', '-')
     safe_end_time = end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost1_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost1_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
     output_path = os.path.join(output_dir, output_filename)
 
     # 构建FFmpeg命令 - 保持原声
@@ -674,7 +750,7 @@ def _process_mixed_segment(
     # 生成输出文件名
     safe_start_time = start_time.replace(':', '-').replace(',', '-')
     safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost2_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost2_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
     output_path = os.path.join(output_dir, output_filename)
 
     # 构建FFmpeg命令 - 保持原声
@@ -782,28 +858,34 @@ def clip_video_unified(
         script_list: List[Dict],
         tts_results: List[Dict],
         output_dir: Optional[str] = None,
-        task_id: Optional[str] = None
+        task_id: Optional[str] = None,
+        video_origin_paths: Optional[List[str]] = None
 ) -> Dict[str, str]:
     """
     基于OST类型的统一视频裁剪策略 - 消除双重裁剪问题
 
     Args:
-        video_origin_path: 原始视频的路径
+        video_origin_path: 原始视频的路径；旧脚本或无 video_id 片段默认使用该视频
         script_list: 完整的脚本列表，包含所有片段信息
         tts_results: TTS结果列表，仅包含OST=0和OST=2的片段
         output_dir: 输出目录路径，默认为None时会自动生成
         task_id: 任务ID，用于生成唯一的输出目录，默认为None时会自动生成
+        video_origin_paths: 多个原始视频路径，脚本片段可用 video_id/video_name 指定来源
 
     Returns:
         Dict[str, str]: 片段ID到裁剪后视频路径的映射
     """
-    # 检查视频文件是否存在
-    if not os.path.exists(video_origin_path):
-        raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
+    video_source_paths = _normalize_video_origin_paths(video_origin_path, video_origin_paths)
+    if not video_source_paths:
+        raise FileNotFoundError("视频文件不存在: 未提供原始视频路径")
+
+    missing_video_paths = [item for item in video_source_paths if not os.path.exists(item)]
+    if missing_video_paths:
+        raise FileNotFoundError(f"视频文件不存在: {', '.join(missing_video_paths)}")
 
     # 如果未提供task_id，则根据输入生成一个唯一ID
     if task_id is None:
-        content_for_hash = f"{video_origin_path}_{json.dumps(script_list)}"
+        content_for_hash = f"{json.dumps(video_source_paths, ensure_ascii=False)}_{json.dumps(script_list, ensure_ascii=False)}"
         task_id = hashlib.md5(content_for_hash.encode()).hexdigest()
 
     # 设置输出目录
@@ -840,29 +922,33 @@ def clip_video_unified(
     failed_clips = []
     success_count = 0
 
-    logger.info(f"📹 开始统一视频裁剪，总共{total_clips}个片段")
+    logger.info(f"📹 开始统一视频裁剪，总共{total_clips}个片段，源视频{len(video_source_paths)}个")
 
     for i, script_item in enumerate(script_list, 1):
         _id = script_item.get("_id")
         ost = script_item.get("OST", 0)
         timestamp = script_item["timestamp"]
+        source_video_path = _resolve_script_video_path(script_item, video_source_paths)
 
-        logger.info(f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, 时间戳:{timestamp}")
+        logger.info(
+            f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, "
+            f"视频:{os.path.basename(source_video_path)}, 时间戳:{timestamp}"
+        )
 
         try:
             if ost == 0:  # 纯解说片段
                 output_path = _process_narration_only_segment(
-                    video_origin_path, script_item, tts_map, output_dir,
+                    source_video_path, script_item, tts_map, output_dir,
                     encoder_config, hwaccel_args
                 )
             elif ost == 1:  # 纯原声片段
                 output_path = _process_original_audio_segment(
-                    video_origin_path, script_item, output_dir,
+                    source_video_path, script_item, output_dir,
                     encoder_config, hwaccel_args
                 )
             elif ost == 2:  # 解说+原声混合片段
                 output_path = _process_mixed_segment(
-                    video_origin_path, script_item, tts_map, output_dir,
+                    source_video_path, script_item, tts_map, output_dir,
                     encoder_config, hwaccel_args
                 )
             else:
diff --git a/app/services/jianying_task.py b/app/services/jianying_task.py
index 345f6b7..a24304c 100644
--- a/app/services/jianying_task.py
+++ b/app/services/jianying_task.py
@@ -107,7 +107,7 @@ def _clamp_duration_to_media(
 
 
 def _normalize_indextts_reference_audio(params: VideoClipParams) -> None:
-    """Ensure IndexTTS engines use the configured reference audio instead of a stale UI voice."""
+    """Ensure local clone TTS engines use configured reference audio instead of a stale UI voice."""
     params.tts_engine = config.normalize_tts_engine_name(params.tts_engine)
     if params.tts_engine == config.INDEXTTS_ENGINE:
         tts_config = config.indextts
@@ -117,6 +117,12 @@ def _normalize_indextts_reference_audio(params: VideoClipParams) -> None:
         tts_config = config.indextts2
         voice_prefix = config.INDEXTTS2_VOICE_PREFIX
         display_name = "IndexTTS-2"
+    elif params.tts_engine == config.OMNIVOICE_ENGINE:
+        tts_config = config.omnivoice
+        if tts_config.get("mode", "auto") != "voice_clone":
+            return
+        voice_prefix = config.OMNIVOICE_VOICE_PREFIX
+        display_name = "OmniVoice"
     else:
         return
 
@@ -199,6 +205,7 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
     logger.info("\n\n## 3. 统一视频裁剪（基于OST类型）")
     video_clip_result = clip_video.clip_video_unified(
         video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
         script_list=list_script,
         tts_results=tts_results
     )
diff --git a/app/services/llm/unified_service.py b/app/services/llm/unified_service.py
index 0c31b5a..63cc48f 100644
--- a/app/services/llm/unified_service.py
+++ b/app/services/llm/unified_service.py
@@ -12,6 +12,7 @@ from loguru import logger
 from .manager import LLMServiceManager
 from .validators import OutputValidator
 from .exceptions import LLMServiceError
+from app.services.prompts import PromptManager
 
 # 提供商注册由 webui.py:main() 显式调用（见 LLM 提供商注册机制重构）
 # 这样更可靠，错误也更容易调试
@@ -181,12 +182,20 @@ class UnifiedLLMService:
             LLMServiceError: 服务调用失败时抛出
         """
         try:
-            # 构建分析提示词
-            system_prompt = "你是一位专业的剧本分析师和剧情概括助手。请仔细分析字幕内容，提取关键剧情信息。"
+            prompt = PromptManager.get_prompt(
+                category="short_drama_narration",
+                name="plot_analysis",
+                parameters={"subtitle_content": subtitle_content},
+            )
+            prompt_object = PromptManager.get_prompt_object(
+                category="short_drama_narration",
+                name="plot_analysis",
+            )
+            system_prompt = prompt_object.get_system_prompt()
             
             # 生成分析结果
             result = await UnifiedLLMService.generate_text(
-                prompt=subtitle_content,
+                prompt=prompt,
                 system_prompt=system_prompt,
                 provider=provider,
                 temperature=temperature,
diff --git a/app/services/llm/validators.py b/app/services/llm/validators.py
index 1614e14..1ef30e2 100644
--- a/app/services/llm/validators.py
+++ b/app/services/llm/validators.py
@@ -113,6 +113,8 @@ class OutputValidator:
                             "required": ["_id", "timestamp", "picture", "narration"],
                             "properties": {
                                 "_id": {"type": "number"},
+                                "video_id": {"type": "number"},
+                                "video_name": {"type": "string"},
                                 "timestamp": {"type": "string"},
                                 "picture": {"type": "string"},
                                 "narration": {"type": "string"},
@@ -161,6 +163,16 @@ class OutputValidator:
         item_id = item.get("_id")
         if not isinstance(item_id, (int, float)) or item_id <= 0:
             raise ValidationError(f"第{index+1}项ID必须为正整数: {item_id}", "invalid_id")
+
+        video_id = item.get("video_id")
+        if video_id not in (None, "") and (
+            not isinstance(video_id, (int, float)) or video_id <= 0
+        ):
+            raise ValidationError(f"第{index+1}项video_id必须为正整数: {video_id}", "invalid_video_id")
+
+        video_name = item.get("video_name")
+        if video_name not in (None, "") and not isinstance(video_name, str):
+            raise ValidationError(f"第{index+1}项video_name必须为字符串: {video_name}", "invalid_video_name")
     
     @staticmethod
     def validate_subtitle_analysis(output: str) -> str:
diff --git a/app/services/prompts/short_drama_narration/plot_analysis.py b/app/services/prompts/short_drama_narration/plot_analysis.py
index 0f8ffb1..a50dbe7 100644
--- a/app/services/prompts/short_drama_narration/plot_analysis.py
+++ b/app/services/prompts/short_drama_narration/plot_analysis.py
@@ -19,72 +19,79 @@ class PlotAnalysisPrompt(TextPrompt):
         metadata = PromptMetadata(
             name="plot_analysis",
             category="short_drama_narration",
-            version="v1.0",
-            description="分析短剧字幕内容，提供详细的剧情分析和分段解析",
+            version="v1.1",
+            description="结合字幕和可选联网检索上下文，输出适合短剧解说脚本生成的结构化剧情理解",
             model_type=ModelType.TEXT,
             output_format=OutputFormat.TEXT,
-            tags=["短剧", "剧情分析", "字幕解析", "分段分析"],
+            tags=["短剧", "剧情分析", "字幕解析", "分段分析", "联网检索", "解说脚本素材"],
             parameters=["subtitle_content"]
         )
         super().__init__(metadata)
         
-        self._system_prompt = "你是一位专业的剧本分析师和剧情概括助手。"
+        self._system_prompt = "你是一位专业的短剧解说策划和剧本分析师。请输出克制、结构化、可直接供下游解说脚本生成使用的剧情理解材料。"
         
     def get_template(self) -> str:
         return """# 角色
-你是一位专业的剧本分析师和剧情概括助手。
+你是一位专业的短剧解说策划和剧本分析师。你的输出不是给观众看的成片文案，而是给下游“短剧解说脚本生成器”使用的结构化剧情理解材料。
 
-# 任务
-我将为你提供一部短剧的完整字幕文本。请你基于这些字幕，完成以下任务：
-1.  **整体剧情分析**：简要概括整个短剧的核心剧情脉络、主要冲突和结局（如果有的话）。
-2.  **分段剧情解析与时间戳定位**：
-    *   将整个短剧划分为若干个关键的剧情段落（例如：开端、发展、转折、高潮、结局，或根据具体情节自然划分）。
-    *   段落数应该与字幕长度成正比。
-    *   对于每一个剧情段落：
-        *   **概括该段落的主要内容**：用简洁的语言描述这段剧情发生了什么。
-        *   **标注对应的时间戳范围**：明确指出该剧情段落对应的开始字幕时间戳和结束字幕时间戳。请直接从字幕中提取时间信息。
+# 输入说明
+下面的输入可能只包含一个视频的原始字幕，也可能包含多个视频文件的字幕；也可能同时包含 Tavily 联网检索结果和原始字幕。
+- 联网检索结果只能用于辅助识别短剧名称、人物关系、时代背景、公开剧情梗概。
+- 原始字幕是唯一可信的当前片段事实来源。
+- 如果联网检索结果与字幕冲突，必须以字幕为准。
+- 如果联网检索结果包含当前字幕尚未出现的后续剧情，只能放在“字幕未覆盖/需谨慎信息”中，不能写进当前剧情事实。
+- 多个视频字幕会以“视频 1: 文件名”“视频 2: 文件名”等标题分隔。时间戳均为对应视频内部时间，不是拼接后的累计时间。
 
-# 输入格式
-字幕内容通常包含时间戳和对话，例如：
-```
-00:00:05,000 --> 00:00:10,000
-[角色A]: 你好吗？
-00:00:10,500 --> 00:00:15,000
-[角色B]: 我很好，谢谢。发生了一些有趣的事情。
-... (更多字幕内容) ...
-```
-我将把实际字幕粘贴在下方。
+# 核心任务
+请基于输入完成剧情理解，目标是帮助后续生成高质量短剧解说脚本：
+1. 识别短剧名称、当前字幕范围、视频来源、联网检索辅助信息和字幕事实边界。
+2. 统一人物称呼，避免同一人物出现多个名字写法。
+3. 用 100-180 字概括当前字幕覆盖的剧情，不提前剧透字幕未出现的内容。
+4. 按视频来源和字幕时间顺序拆分关键剧情段落，并为每段标注准确 video_id / video_name / 时间戳。
+5. 提炼解说创作可用的钩子、冲突、爽点/泪点/悬念点和建议保留原声片段。
 
-# 输出格式要求
-请按照以下格式清晰地呈现分析结果：
+# 强制输出规则
+1. 禁止输出寒暄、解释身份或“好的，我将……”等聊天式开场。
+2. 禁止编造字幕中没有的具体事件、对白、关系进展或结局。
+3. 时间戳必须直接来自对应视频字幕；无法确定时写“字幕未明确”，不要猜测。
+4. 多视频场景下必须明确每段来自哪个视频文件，禁止把不同视频的同名时间戳混在一起。
+5. 人名必须统一：优先采用联网检索中的正式名称；如果字幕写法不同，在人物表中保留“字幕称呼”。
+6. 内容要简洁、客观、可复用，避免散文化长段落。
+7. 必须严格按照下面的 Markdown 格式输出，不要添加额外章节。
 
-**一、整体剧情概括：**
-[此处填写对整个短剧剧情的概括]
+# 输出格式
+## 一、基础识别
+- 短剧名称：[如输入可判断则填写，否则写“未知”]
+- 当前字幕范围：[开始时间戳] --> [结束时间戳]；无法确定则写“字幕未明确”
+- 视频来源：[列出视频编号、文件名和各自字幕时间范围；单视频也要写]
+- 联网检索确认：[仅写可辅助理解的公开信息；没有联网结果则写“未启用/未提供”]
+- 字幕内实际出现：[列出当前字幕真实出现的关键事实，2-4 条]
+- 字幕未覆盖/需谨慎信息：[列出联网结果提到但当前字幕未发生的内容；没有则写“无”]
 
-**二、分段剧情解析：**
+## 二、人物与关系
+| 统一称呼 | 字幕称呼 | 身份/关系 | 当前剧情作用 | 确定性 |
+|---|---|---|---|---|
+| [人物名] | [字幕原文称呼] | [身份或关系] | [在当前片段中的作用] | 字幕明确/联网辅助/合理推断 |
 
-**剧情段落 1：[段落主题/概括，例如：主角登场与背景介绍]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 三、整体剧情概括
+[100-180 字，只概括当前字幕覆盖的剧情。必须包含核心冲突、人物动机和当前悬念。]
 
-**剧情段落 2：[段落主题/概括，例如：第一个冲突出现]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 四、分段剧情解析
+| 视频 | 时间戳 | 段落主题 | 剧情事件 | 情绪/冲突功能 |
+|---|---|---|---|---|
+| [video_id + video_name] | [开始] --> [结束] | [简短主题] | [当前段落发生了什么] | [铺垫/冲突升级/人物塑造/反转/悬念/情绪爆发等] |
 
-... (根据实际剧情段落数量继续) ...
+## 五、解说创作重点
+- 开场钩子：[用一句话指出最适合开场抓人的冲突或疑问]
+- 核心冲突：[当前片段最主要的矛盾]
+- 爽点/泪点/情绪点：[列 1-3 条，没有则写“无明显”]
+- 悬念点：[当前片段留下的疑问或后续期待]
+- 建议保留原声片段：
+  1. [video_id + video_name + 时间戳]：[保留理由；如果没有合适原声，写“无明显”]
 
-**剧情段落 N：[段落主题/概括，例如：结局与反思]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 六、联网信息校验
+- 可用于辅助理解的信息：[联网结果中可帮助理解当前字幕的信息；没有则写“无”]
+- 与字幕不一致或字幕未覆盖的信息：[必须列出，不要混入当前剧情事实；没有则写“无”]
 
-# 注意事项
-*   请确保时间戳的准确性，直接引用字幕中的时间。
-*   剧情段落的划分应合乎逻辑，能够反映剧情的起承转合。
-*   语言表达应简洁、准确、客观。
-
-# 限制
-1. 严禁输出与分析结果无关的内容
-2. 时间戳必须严格按照字幕中的实际时间
-
-# 请处理以下字幕：
+# 输入内容
 ${subtitle_content}"""
diff --git a/app/services/prompts/short_drama_narration/script_generation.py b/app/services/prompts/short_drama_narration/script_generation.py
index 0184cb1..234fc98 100644
--- a/app/services/prompts/short_drama_narration/script_generation.py
+++ b/app/services/prompts/short_drama_narration/script_generation.py
@@ -43,11 +43,14 @@ class ScriptGenerationPrompt(ParameterizedPrompt):
 ${plot_analysis}
 </plot>
 
-### 原始字幕（含精确时间戳）
+### 原始字幕（含视频编号和精确时间戳）
 <subtitles>
 ${subtitle_content}
 </subtitles>
 
+字幕可能来自多个视频文件。每个字幕分段标题会以“视频 1: 文件名”“视频 2: 文件名”等形式标识来源。
+生成脚本时必须把每个片段绑定到对应视频来源，时间戳表示该视频文件内部的局部时间，不是把多个视频拼接后的全局时间。
+
 ## 短剧解说创作核心要素
 
 ### 1. 黄金开场（3秒法则）
@@ -137,11 +140,18 @@ ${subtitle_content}
 
 ### 时间戳管理（绝对不能违反）
 - **时间戳绝对不能重叠**，确保剪辑后无重复画面
-- **时间段必须连续且不交叉**，严格按时间顺序排列
-- **每个时间戳都必须在原始字幕中找到对应范围**
+- **同一个 video_id 内的时间段必须连续且不交叉**，严格按该视频内时间顺序排列
+- **跨视频可以切换 video_id**，但每个时间戳都必须来自对应视频字幕分段
+- **每个时间戳都必须在对应视频的原始字幕中找到对应范围**
 - 可以拆分原时间片段，但必须保持时间连续性
 - 时间戳的格式必须与原始字幕中的格式完全一致
 
+### 多视频来源规范（多集/多文件必须遵守）
+- **video_id**：必须填写，取字幕分段标题里的视频编号，例如“视频 3”就填 3
+- **video_name**：必须填写对应的视频文件名，例如“3_20260607002212.mp4”
+- **timestamp**：只填写对应 video_id 内部的时间范围，不要换算成多个视频拼接后的累计时间
+- 如果剧情跨多个视频推进，脚本可以按故事顺序在不同 video_id 之间切换，但不得把视频 2 的时间戳写到 video_id=1
+
 ### 时长控制（1/3原则）
 - **解说视频总长度 = 原视频长度的 1/3**
 - 精确控制节奏和密度，既不能过短也不能过长
@@ -159,6 +169,8 @@ ${subtitle_content}
 ```json
 {
   "_id": 序号,
+  "video_id": 视频编号,
+  "video_name": "视频文件名",
   "timestamp": "开始时间-结束时间",
   "picture": "画面内容描述",
   "narration": "播放原片+序号",
@@ -242,6 +254,8 @@ ${subtitle_content}
   "items": [
     {
         "_id": 1,
+        "video_id": 1,
+        "video_name": "1.mp4",
         "timestamp": "00:00:01,000-00:00:05,500",
         "picture": "女主角林小雨慌张地道歉，男主角沈墨轩冷漠地看着她",
         "narration": "一个普通女孩的命运即将因为一杯咖啡彻底改变！她撞到的这个男人，竟然是...",
@@ -249,6 +263,8 @@ ${subtitle_content}
     },
     {
         "_id": 2,
+        "video_id": 1,
+        "video_name": "1.mp4",
         "timestamp": "00:00:05,500-00:00:08,000",
         "picture": "沈墨轩质问林小雨，语气冷厉威严",
         "narration": "播放原片2",
@@ -256,6 +272,8 @@ ${subtitle_content}
     },
     {
         "_id": 3,
+        "video_id": 2,
+        "video_name": "2.mp4",
         "timestamp": "00:00:08,000-00:00:12,000",
         "picture": "林小雨惊慌失措，沈墨轩眼中闪过一丝兴趣",
         "narration": "霸道总裁的经典开场！一杯咖啡引发的爱情故事就这样开始了...",
@@ -281,6 +299,7 @@ ${subtitle_content}
 - **原声片段标识**：OST=1表示原声，OST=0表示解说
 - **原声格式规范**：narration字段必须使用"播放原片+序号"格式
 - **关键情绪点**：必须保留原片原声，增强观众代入感
+- **视频来源**：每个片段必须包含 video_id 和 video_name，用于定位多个上传视频中的源文件
 - **时间戳精度**：精确到毫秒级别，确保与字幕完美匹配
 - **逻辑连贯性**：严格遵循剧情发展顺序
 
diff --git a/app/services/task.py b/app/services/task.py
index bf8c45b..28b05ea 100644
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -225,6 +225,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
     # 使用新的统一裁剪策略
     video_clip_result = clip_video.clip_video_unified(
         video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
         script_list=list_script,
         tts_results=tts_results
     )
@@ -477,6 +478,7 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
     # 使用新的统一裁剪策略
     video_clip_result = clip_video.clip_video_unified(
         video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
         script_list=list_script,
         tts_results=tts_results
     )
diff --git a/app/services/tavily_search.py b/app/services/tavily_search.py
new file mode 100644
index 0000000..586a7ee
--- /dev/null
+++ b/app/services/tavily_search.py
@@ -0,0 +1,116 @@
+"""Tavily-powered web search helpers for plot analysis."""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import requests
+from loguru import logger
+
+
+TAVILY_API_BASE_URL = "https://api.tavily.com"
+DEFAULT_SEARCH_DEPTH = "basic"
+DEFAULT_MAX_RESULTS = 5
+DEFAULT_TIMEOUT = 20
+
+
+class TavilySearchError(RuntimeError):
+    """Raised when Tavily search cannot be completed."""
+
+
+def _trim_text(value: Any, max_chars: int) -> str:
+    text = str(value or "").strip()
+    if len(text) <= max_chars:
+        return text
+    return f"{text[:max_chars].rstrip()}..."
+
+
+def search_short_drama(
+    short_name: str,
+    api_key: str | None = None,
+    *,
+    search_depth: str = DEFAULT_SEARCH_DEPTH,
+    max_results: int = DEFAULT_MAX_RESULTS,
+    timeout: int = DEFAULT_TIMEOUT,
+) -> dict[str, Any]:
+    """Search web context for a short drama name with Tavily."""
+    short_name = str(short_name or "").strip()
+    if not short_name:
+        raise TavilySearchError("短剧名称不能为空")
+
+    api_key = (api_key or os.getenv("TAVILY_API_KEY") or "").strip()
+    if not api_key:
+        raise TavilySearchError("Tavily API Key 未配置")
+
+    query = f"{short_name} 短剧 剧情 介绍 人物 结局"
+    payload = {
+        "query": query,
+        "search_depth": search_depth or DEFAULT_SEARCH_DEPTH,
+        "topic": "general",
+        "max_results": max(1, min(int(max_results or DEFAULT_MAX_RESULTS), 10)),
+        "include_answer": True,
+        "include_raw_content": False,
+        "include_images": False,
+    }
+
+    try:
+        response = requests.post(
+            f"{TAVILY_API_BASE_URL}/search",
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            },
+            json=payload,
+            timeout=timeout,
+        )
+    except requests.RequestException as exc:
+        raise TavilySearchError(f"Tavily 请求失败: {exc}") from exc
+
+    if response.status_code >= 400:
+        message = _trim_text(response.text, 500)
+        raise TavilySearchError(f"Tavily 请求失败: HTTP {response.status_code} {message}")
+
+    try:
+        data = response.json()
+    except ValueError as exc:
+        raise TavilySearchError("Tavily 返回内容不是有效 JSON") from exc
+
+    logger.info(
+        "Tavily 短剧检索完成: query={}, results={}",
+        query,
+        len(data.get("results") or []),
+    )
+    return data
+
+
+def format_search_context(search_data: dict[str, Any], *, max_chars: int = 6000) -> str:
+    """Format Tavily response into compact LLM context."""
+    if not search_data:
+        return ""
+
+    lines = [
+        "# Tavily 联网检索结果",
+        f"检索 query: {search_data.get('query', '')}",
+    ]
+
+    answer = _trim_text(search_data.get("answer"), 1200)
+    if answer:
+        lines.extend(["", "## 综合回答", answer])
+
+    results = search_data.get("results") or []
+    if results:
+        lines.extend(["", "## 搜索来源"])
+    for index, result in enumerate(results, start=1):
+        title = _trim_text(result.get("title"), 120)
+        url = _trim_text(result.get("url"), 240)
+        content = _trim_text(result.get("content") or result.get("raw_content"), 700)
+        lines.extend(
+            [
+                f"{index}. 标题: {title}",
+                f"   来源: {url}",
+                f"   摘要: {content}",
+            ]
+        )
+
+    return _trim_text("\n".join(lines).strip(), max_chars)
diff --git a/app/services/test_jianying_task_unittest.py b/app/services/test_jianying_task_unittest.py
index 18897a4..0a1660f 100644
--- a/app/services/test_jianying_task_unittest.py
+++ b/app/services/test_jianying_task_unittest.py
@@ -51,6 +51,23 @@ class JianyingTaskTests(unittest.TestCase):
 
             self.assertEqual(f"indextts2:{ref_path}", params.voice_name)
 
+    def test_normalize_omnivoice_clone_uses_valid_param_reference(self):
+        with tempfile.NamedTemporaryFile(suffix=".wav") as ref:
+            params = VideoClipParams(tts_engine="omnivoice", voice_name=f"omnivoice:{ref.name}")
+
+            with patch.dict(jianying_task.config.omnivoice, {"mode": "voice_clone"}, clear=False):
+                jianying_task._normalize_indextts_reference_audio(params)
+
+            self.assertEqual(f"omnivoice:{ref.name}", params.voice_name)
+
+    def test_normalize_omnivoice_auto_does_not_require_reference(self):
+        params = VideoClipParams(tts_engine="omnivoice", voice_name="omnivoice:auto")
+
+        with patch.dict(jianying_task.config.omnivoice, {"mode": "auto", "reference_audio": ""}, clear=False):
+            jianying_task._normalize_indextts_reference_audio(params)
+
+        self.assertEqual("omnivoice:auto", params.voice_name)
+
     def test_normalize_indextts_requires_existing_reference_audio(self):
         params = VideoClipParams(tts_engine="indextts", voice_name="zh-CN-YunjianNeural")
 
diff --git a/app/services/test_multi_video_script_sources_unittest.py b/app/services/test_multi_video_script_sources_unittest.py
new file mode 100644
index 0000000..dd6fce8
--- /dev/null
+++ b/app/services/test_multi_video_script_sources_unittest.py
@@ -0,0 +1,84 @@
+import json
+import os
+import tempfile
+import unittest
+from unittest import mock
+
+from app.services import clip_video
+from app.utils import check_script
+
+
+class TestMultiVideoScriptSources(unittest.TestCase):
+    def test_check_format_accepts_optional_video_source_fields(self):
+        script = [
+            {
+                "_id": 1,
+                "video_id": 2,
+                "video_name": "2.mp4",
+                "timestamp": "00:00:00,000-00:00:03,000",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            }
+        ]
+
+        result = check_script.check_format(json.dumps(script, ensure_ascii=False))
+
+        self.assertTrue(result["success"])
+
+    def test_clip_video_unified_resolves_source_by_video_id_and_name(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            video_1 = os.path.join(temp_dir, "1.mp4")
+            video_2 = os.path.join(temp_dir, "2.mp4")
+            for video_path in [video_1, video_2]:
+                with open(video_path, "wb") as file:
+                    file.write(b"video")
+
+            output_dir = os.path.join(temp_dir, "clips")
+            used_sources = []
+
+            def fake_process(source_video_path, script_item, output_dir_arg, *_args):
+                used_sources.append(source_video_path)
+                output_path = os.path.join(output_dir_arg, f"{script_item['_id']}.mp4")
+                with open(output_path, "wb") as file:
+                    file.write(b"clip")
+                return output_path
+
+            script_list = [
+                {
+                    "_id": 1,
+                    "video_id": 2,
+                    "timestamp": "00:00:00,000-00:00:03,000",
+                    "picture": "视频2画面",
+                    "narration": "播放原片1",
+                    "OST": 1,
+                },
+                {
+                    "_id": 2,
+                    "video_name": "1.mp4",
+                    "timestamp": "00:00:03,000-00:00:06,000",
+                    "picture": "视频1画面",
+                    "narration": "播放原片2",
+                    "OST": 1,
+                },
+            ]
+
+            with (
+                mock.patch.object(clip_video, "check_hardware_acceleration", return_value=None),
+                mock.patch.object(clip_video, "_process_original_audio_segment", side_effect=fake_process),
+            ):
+                result = clip_video.clip_video_unified(
+                    video_origin_path=video_1,
+                    video_origin_paths=[video_1, video_2],
+                    script_list=script_list,
+                    tts_results=[],
+                    output_dir=output_dir,
+                    task_id="multi-video-test",
+                )
+
+            self.assertEqual([video_2, video_1], used_sources)
+            self.assertEqual({1, 2}, set(result.keys()))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/app/services/voice.py b/app/services/voice.py
index 2be5c87..476c2fe 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import re
 import json
@@ -1298,6 +1300,10 @@ def tts(
     if tts_engine == config.INDEXTTS2_ENGINE:
         logger.info("分发到 IndexTTS-2")
         return indextts2_tts(text, voice_name, voice_file)
+
+    if tts_engine == config.OMNIVOICE_ENGINE:
+        logger.info("分发到 OmniVoice")
+        return omnivoice_tts(text, voice_name, voice_file, speed=voice_rate)
     
     if tts_engine == "doubaotts":
         logger.info("分发到豆包语音 TTS")
@@ -1783,7 +1789,11 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
     voice_name = config.normalize_indextts_voice_prefix(parse_voice_name(voice_name))
     output_dir = utils.task_dir(task_id)
     tts_results = []
-    audio_extension = ".wav" if tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE) else ".mp3"
+    audio_extension = ".wav" if tts_engine in (
+        config.INDEXTTS_ENGINE,
+        config.INDEXTTS2_ENGINE,
+        config.OMNIVOICE_ENGINE,
+    ) else ".mp3"
 
     for item in list_script:
         if item['OST'] != 1:
@@ -1809,11 +1819,11 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
                              f"或者使用其他 tts 引擎")
                 continue
             else:
-                # SoulVoice、Qwen3、IndexTTS、豆包语音 引擎不生成精确字幕文件
+                # SoulVoice、Qwen3、IndexTTS、OmniVoice、豆包语音 引擎不生成精确字幕文件
                 if (
                     is_soulvoice_voice(voice_name)
                     or is_qwen_engine(tts_engine)
-                    or tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE)
+                    or tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE, config.OMNIVOICE_ENGINE)
                     or tts_engine == "doubaotts"
                 ):
                     # 获取实际音频文件的时长
@@ -2256,6 +2266,17 @@ def parse_indextts2_voice(voice_name: str) -> str:
     return voice_name
 
 
+def parse_omnivoice_voice(voice_name: str) -> str:
+    """
+    解析 OmniVoice 语音名称
+    支持格式：omnivoice:reference_audio_path
+    返回参考音频文件路径或模式名
+    """
+    if isinstance(voice_name, str) and voice_name.startswith(config.OMNIVOICE_VOICE_PREFIX):
+        return voice_name[len(config.OMNIVOICE_VOICE_PREFIX):]
+    return voice_name
+
+
 def indextts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
     """
     使用 IndexTTS-1.5 API 进行零样本语音克隆
@@ -2493,3 +2514,141 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str) -> Union[SubMaker
 
     logger.error("IndexTTS-2 TTS 生成失败，已达到最大重试次数")
     return None
+
+
+def _normalize_omnivoice_api_url(api_url: str) -> str:
+    api_url = (api_url or "http://127.0.0.1:7866/tts").strip()
+    if api_url.endswith("/tts"):
+        return api_url
+    if api_url.endswith("/tts/json"):
+        return f"{api_url[:-len('/tts/json')]}/tts"
+    return f"{api_url.rstrip('/')}/tts"
+
+
+def _download_omnivoice_audio(response: requests.Response, api_url: str, voice_file: str, proxies: dict) -> bool:
+    content_type = response.headers.get("content-type", "").lower()
+    if "application/json" not in content_type:
+        with open(voice_file, "wb") as f:
+            f.write(response.content)
+        return os.path.getsize(voice_file) > 0
+
+    result = response.json()
+    audio_url = result.get("audio_url") if isinstance(result, dict) else ""
+    if not audio_url:
+        logger.error(f"OmniVoice API 响应中没有音频下载地址: {result}")
+        return False
+
+    audio_response = requests.get(urljoin(api_url, audio_url), proxies=proxies, timeout=180)
+    if audio_response.status_code != 200:
+        logger.error(f"OmniVoice 音频下载失败: {audio_response.status_code} - {audio_response.text}")
+        return False
+
+    with open(voice_file, "wb") as f:
+        f.write(audio_response.content)
+    return os.path.getsize(voice_file) > 0
+
+
+def _optional_omnivoice_generation_data(voice_speed: float) -> dict:
+    omnivoice_config = getattr(config, "omnivoice", {}) or {}
+    data = {
+        "speed": voice_speed or omnivoice_config.get("speed", 1.0),
+    }
+
+    optional_fields = {
+        "num_step": omnivoice_config.get("num_step"),
+        "guidance_scale": omnivoice_config.get("guidance_scale"),
+        "duration": omnivoice_config.get("duration"),
+    }
+    for key, value in optional_fields.items():
+        if value not in (None, ""):
+            data[key] = value
+
+    for key in ("denoise", "postprocess_output", "preprocess_prompt"):
+        if key in omnivoice_config:
+            data[key] = str(bool(omnivoice_config.get(key))).lower()
+
+    return data
+
+
+def omnivoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
+    """
+    使用 OmniVoice-Pack FastAPI 服务进行语音合成。
+    支持自动音色、指令音色和参考音频克隆三种模式。
+    """
+    omnivoice_config = getattr(config, "omnivoice", {}) or {}
+    api_url = _normalize_omnivoice_api_url(omnivoice_config.get("api_url", "http://127.0.0.1:7866/tts"))
+    mode = omnivoice_config.get("mode", "auto")
+    language = (omnivoice_config.get("language", "zh") or "").strip()
+    instruct = (omnivoice_config.get("instruct", "") or "").strip()
+    ref_text = (omnivoice_config.get("ref_text", "") or "").strip()
+    parsed_voice = parse_omnivoice_voice(voice_name)
+    if mode != "voice_clone" and parsed_voice and os.path.isfile(parsed_voice):
+        mode = "voice_clone"
+
+    reference_audio_path = ""
+    if mode == "voice_clone":
+        candidate = parsed_voice
+        if candidate and os.path.isfile(candidate):
+            reference_audio_path = candidate
+        else:
+            reference_audio_path = parse_omnivoice_voice(omnivoice_config.get("reference_audio", "") or "")
+
+        if not reference_audio_path or not os.path.exists(reference_audio_path):
+            logger.error(f"OmniVoice 参考音频文件不存在: {reference_audio_path}")
+            return None
+    elif mode != "voice_design":
+        instruct = ""
+
+    data = {
+        "text": text.strip(),
+        "language": language,
+        **_optional_omnivoice_generation_data(speed),
+    }
+    if mode == "voice_design" and instruct:
+        data["instruct"] = instruct
+    if mode == "voice_clone" and ref_text:
+        data["ref_text"] = ref_text
+
+    proxies = _get_configured_proxies()
+    for attempt in range(3):
+        files = {}
+        try:
+            if reference_audio_path:
+                files["ref_audio"] = open(reference_audio_path, "rb")
+
+            logger.info(f"第 {attempt + 1} 次调用 OmniVoice API: {api_url}, mode={mode}")
+            response = requests.post(
+                api_url,
+                files=files or None,
+                data=data,
+                proxies=proxies,
+                timeout=240,
+            )
+
+            if response.status_code == 200 and _download_omnivoice_audio(response, api_url, voice_file, proxies):
+                logger.info(f"OmniVoice 成功生成音频: {voice_file}, 大小: {os.path.getsize(voice_file)} 字节")
+                sub_maker = new_sub_maker()
+                duration = get_audio_duration_from_file(voice_file)
+                duration_ms = int(duration * 1000) if duration > 0 else max(1000, int(len(text) * 200))
+                add_subtitle_event(sub_maker, 0, duration_ms * 10000, text)
+                return sub_maker
+
+            logger.error(f"OmniVoice API 调用失败: {response.status_code} - {response.text}")
+        except requests.exceptions.Timeout:
+            logger.error(f"OmniVoice API 调用超时 (尝试 {attempt + 1}/3)")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"OmniVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
+        except Exception as e:
+            logger.error(f"OmniVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
+        finally:
+            for file_obj in files.values():
+                try:
+                    file_obj.close()
+                except Exception:
+                    pass
+
+        if attempt < 2:
+            time.sleep(2)
+
+    logger.error("OmniVoice TTS 生成失败，已达到最大重试次数")
+    return None
diff --git a/app/utils/check_script.py b/app/utils/check_script.py
index 9c745e6..0e6f692 100644
--- a/app/utils/check_script.py
+++ b/app/utils/check_script.py
@@ -57,6 +57,23 @@ def check_format(script_content: str) -> Dict[str, Any]:
                     'details': f'当前值: {clip["_id"]} (类型: {type(clip["_id"]).__name__})'
                 }
 
+            # 验证可选视频来源字段。旧脚本可以不包含，新脚本用于多视频定位。
+            if 'video_id' in clip and clip['video_id'] not in ("", None):
+                if not isinstance(clip['video_id'], int) or clip['video_id'] <= 0:
+                    return {
+                        'success': False,
+                        'message': f'第{i+1}个片段的video_id必须是正整数',
+                        'details': f'当前值: {clip["video_id"]} (类型: {type(clip["video_id"]).__name__})'
+                    }
+
+            if 'video_name' in clip and clip['video_name'] not in ("", None):
+                if not isinstance(clip['video_name'], str):
+                    return {
+                        'success': False,
+                        'message': f'第{i+1}个片段的video_name必须是字符串',
+                        'details': f'当前值: {clip["video_name"]} (类型: {type(clip["video_name"]).__name__})'
+                    }
+
             # 验证 timestamp 字段格式
             timestamp_pattern = r'^\d{2}:\d{2}:\d{2},\d{3}-\d{2}:\d{2}:\d{2},\d{3}$'
             if not isinstance(clip['timestamp'], str) or not re.match(timestamp_pattern, clip['timestamp']):
diff --git a/config.example.toml b/config.example.toml
index 0b807e3..547724e 100644
--- a/config.example.toml
+++ b/config.example.toml
@@ -49,6 +49,12 @@
     text_openai_max_tokens = 65536
     text_openai_thinking_level = "auto"  # auto/off/low/medium/high
 
+    # ===== Tavily 联网搜索配置 =====
+    # 用于短剧剧情理解前，按短剧名称检索公开剧情/人物/分集信息
+    tavily_api_key = ""  # 获取地址：https://app.tavily.com
+    tavily_search_depth = "basic"  # basic / advanced / fast / ultra-fast
+    tavily_max_results = 5
+
     # ===== API Keys 参考 =====
     # 主流 LLM Providers API Key 获取地址：
     #
@@ -171,6 +177,30 @@
     repetition_penalty = 10.0
     max_mel_tokens = 1500
 
+[omnivoice]
+    # OmniVoice-Pack 语音合成配置
+    # 支持 OmniVoice-Pack FastAPI 接口：POST /tts
+    api_url = "http://127.0.0.1:7866/tts"
+    language = "zh"
+
+    # 生成模式：auto / voice_design / voice_clone
+    mode = "auto"
+    instruct = ""
+
+    # voice_clone 模式下使用，音色列表复用 IndexTTS-1.5 的资源目录
+    reference_audio_source = "resource"
+    reference_audio = ""
+    ref_text = ""
+
+    # 高级生成参数
+    num_step = 32
+    guidance_scale = 2.0
+    speed = 1.0
+    duration = ""
+    denoise = true
+    postprocess_output = true
+    preprocess_prompt = true
+
 [doubaotts]
     # 豆包语音 TTS 配置
     # 申请流程：
@@ -189,7 +219,7 @@
     silence_duration = 0.125
 
 [ui]
-    # TTS引擎选择 (indextts, indextts2, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
+    # TTS引擎选择 (indextts, indextts2, omnivoice, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
     tts_engine = "indextts"
 
     # Edge TTS 配置
diff --git a/requirements.txt b/requirements.txt
index c6011de..be125ac 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,7 +2,7 @@
 requests>=2.32.0
 moviepy==2.1.1
 edge-tts==7.2.7
-streamlit>=1.57.0
+streamlit==1.56.0
 watchdog==6.0.0
 loguru>=0.7.3
 tomli>=2.2.1
diff --git a/webui.py b/webui.py
index 68c24a7..5ba26a3 100644
--- a/webui.py
+++ b/webui.py
@@ -243,6 +243,12 @@ def get_voice_name_for_tts_engine(tts_engine: str) -> str:
         if reference_audio:
             return f"{config.INDEXTTS_VOICE_PREFIX}{reference_audio}"
         return config.ui.get('voice_name', '')
+    if tts_engine == config.OMNIVOICE_ENGINE:
+        mode = config.omnivoice.get('mode', 'auto')
+        reference_audio = config.omnivoice.get('reference_audio', '')
+        if mode == 'voice_clone' and reference_audio:
+            return f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
+        return f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
     if tts_engine == 'doubaotts':
         return config.ui.get('doubaotts_voice_type', 'BV700_streaming')
     if tts_engine == 'soulvoice':
@@ -263,6 +269,7 @@ def get_jianying_export_params(draft_name=None) -> VideoClipParams:
     return VideoClipParams(
         video_clip_json_path=st.session_state['video_clip_json_path'],
         video_origin_path=st.session_state['video_origin_path'],
+        video_origin_paths=st.session_state.get('video_origin_paths', []),
         tts_engine=tts_engine,
         voice_name=voice_name,
         voice_rate=voice_rate,
diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py
index c5ec08c..cab5413 100644
--- a/webui/components/audio_settings.py
+++ b/webui/components/audio_settings.py
@@ -40,6 +40,11 @@ BGM_RESOURCE_DIR = "/Users/viccy/Downloads/tts-mp3-clone/bgms-safe"
 BGM_TRACKS_JSON = os.path.join(BGM_RESOURCE_DIR, "tracks.json")
 BGM_UPLOAD_SUBDIR = "uploaded_bgms"
 BGM_AUDIO_EXTENSIONS = (".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg")
+LOCAL_TTS_ENGINES = {
+    config.INDEXTTS_ENGINE,
+    config.INDEXTTS2_ENGINE,
+    config.OMNIVOICE_ENGINE,
+}
 
 
 def get_soulvoice_voices():
@@ -55,9 +60,10 @@ def get_soulvoice_voices():
 
 def get_tts_engine_options(tr=lambda key: key):
     """获取TTS引擎选项"""
-    return {
+    engine_options = {
         config.INDEXTTS_ENGINE: config.INDEXTTS_DISPLAY_NAME,
         config.INDEXTTS2_ENGINE: config.INDEXTTS2_DISPLAY_NAME,
+        config.OMNIVOICE_ENGINE: config.OMNIVOICE_DISPLAY_NAME,
         "edge_tts": "Edge TTS",
         "qwen3_tts": tr("Tongyi Qwen3 TTS"),
         "tencent_tts": tr("Tencent Cloud TTS"),
@@ -65,6 +71,25 @@ def get_tts_engine_options(tr=lambda key: key):
         "azure_speech": "Azure Speech Services"
     }
 
+    return {
+        engine: format_tts_engine_option(engine, display_name, tr)
+        for engine, display_name in engine_options.items()
+    }
+
+
+def get_tts_engine_deployment_label(tts_engine, tr=lambda key: key):
+    """获取TTS引擎部署类型标签"""
+    if tts_engine in LOCAL_TTS_ENGINES:
+        return tr("Local Deployment")
+
+    return tr("Cloud Service")
+
+
+def format_tts_engine_option(tts_engine, display_name, tr=lambda key: key):
+    """格式化TTS引擎下拉显示名"""
+    deployment_label = get_tts_engine_deployment_label(tts_engine, tr)
+    return f"{display_name} [{deployment_label}]"
+
 
 def get_tts_engine_descriptions(tr=lambda key: key):
     """获取TTS引擎详细描述"""
@@ -105,6 +130,12 @@ def get_tts_engine_descriptions(tr=lambda key: key):
             "use_case": tr("IndexTTS2 use case"),
             "registration": None
         },
+        config.OMNIVOICE_ENGINE: {
+            "title": config.OMNIVOICE_DISPLAY_NAME,
+            "features": tr("OmniVoice features"),
+            "use_case": tr("OmniVoice use case"),
+            "registration": None
+        },
         "doubaotts": {
             "title": tr("Doubao TTS"),
             "features": tr("Doubao TTS features"),
@@ -546,6 +577,8 @@ def render_tts_settings(tr):
         render_indextts_tts_settings(tr)
     elif selected_engine == config.INDEXTTS2_ENGINE:
         render_indextts2_tts_settings(tr)
+    elif selected_engine == config.OMNIVOICE_ENGINE:
+        render_omnivoice_tts_settings(tr)
     elif selected_engine == "doubaotts":
         render_doubaotts_settings(tr)
 
@@ -1274,6 +1307,148 @@ def render_indextts2_tts_settings(tr):
     st.session_state['voice_pitch'] = 1.0
 
 
+def render_omnivoice_tts_settings(tr):
+    """渲染 OmniVoice TTS 设置"""
+    omnivoice_config = config.omnivoice
+
+    api_url = st.text_input(
+        tr("API URL"),
+        value=omnivoice_config.get("api_url", "http://127.0.0.1:7866/tts"),
+        help=tr("OmniVoice API URL Help"),
+    )
+
+    language = st.text_input(
+        tr("OmniVoice Language Code"),
+        value=omnivoice_config.get("language", "zh"),
+        help=tr("OmniVoice Language Code Help"),
+        placeholder="zh",
+    )
+
+    mode_options = [
+        ("auto", tr("OmniVoice Mode Auto")),
+        ("voice_design", tr("OmniVoice Mode Voice Design")),
+        ("voice_clone", tr("OmniVoice Mode Voice Clone")),
+    ]
+    mode_values = [item[0] for item in mode_options]
+    saved_mode = omnivoice_config.get("mode", "auto")
+    if saved_mode not in mode_values:
+        saved_mode = "auto"
+
+    mode = mode_options[st.selectbox(
+        tr("OmniVoice Generation Mode"),
+        options=range(len(mode_options)),
+        index=mode_values.index(saved_mode),
+        format_func=lambda x: mode_options[x][1],
+        help=tr("OmniVoice Generation Mode Help"),
+    )][0]
+
+    instruct = omnivoice_config.get("instruct", "")
+    reference_audio_source = omnivoice_config.get("reference_audio_source", "resource")
+    reference_audio = omnivoice_config.get("reference_audio", "")
+    ref_text = omnivoice_config.get("ref_text", "")
+
+    if mode == "voice_design":
+        instruct = st.text_area(
+            tr("OmniVoice Instruct"),
+            value=instruct,
+            help=tr("OmniVoice Instruct Help"),
+            placeholder=tr("OmniVoice Instruct Placeholder"),
+            height=80,
+        )
+    elif mode == "voice_clone":
+        reference_audio_source, reference_audio = render_indextts_reference_audio_selector(
+            tr,
+            omnivoice_config,
+            "omnivoice",
+        )
+        ref_text = st.text_area(
+            tr("OmniVoice Reference Text"),
+            value=ref_text,
+            help=tr("OmniVoice Reference Text Help"),
+            placeholder=tr("OmniVoice Reference Text Placeholder"),
+            height=90,
+        )
+
+    with st.expander(tr("Advanced Parameters"), expanded=False):
+        col1, col2 = st.columns(2)
+        with col1:
+            num_step = st.slider(
+                "Num Step",
+                min_value=4,
+                max_value=64,
+                value=int(omnivoice_config.get("num_step", 32)),
+                step=1,
+                help=tr("OmniVoice Num Step Help"),
+            )
+            guidance_scale = st.slider(
+                "Guidance Scale",
+                min_value=0.1,
+                max_value=10.0,
+                value=float(omnivoice_config.get("guidance_scale", 2.0)),
+                step=0.1,
+                help=tr("OmniVoice Guidance Scale Help"),
+            )
+            voice_rate = st.slider(
+                tr("Voice Rate"),
+                min_value=0.5,
+                max_value=2.0,
+                value=float(omnivoice_config.get("speed", 1.0)),
+                step=0.1,
+                help=tr("Voice Rate Help 0.5-2.0"),
+            )
+        with col2:
+            saved_duration = omnivoice_config.get("duration", "")
+            duration_value = float(saved_duration) if saved_duration not in (None, "") else 0.0
+            duration = st.number_input(
+                tr("OmniVoice Duration"),
+                min_value=0.0,
+                max_value=120.0,
+                value=duration_value,
+                step=0.5,
+                help=tr("OmniVoice Duration Help"),
+            )
+            denoise = st.checkbox(
+                tr("OmniVoice Denoise"),
+                value=bool(omnivoice_config.get("denoise", True)),
+                help=tr("OmniVoice Denoise Help"),
+            )
+            postprocess_output = st.checkbox(
+                tr("OmniVoice Postprocess Output"),
+                value=bool(omnivoice_config.get("postprocess_output", True)),
+                help=tr("OmniVoice Postprocess Output Help"),
+            )
+            preprocess_prompt = st.checkbox(
+                tr("OmniVoice Preprocess Prompt"),
+                value=bool(omnivoice_config.get("preprocess_prompt", True)),
+                help=tr("OmniVoice Preprocess Prompt Help"),
+            )
+
+    with st.expander(tr("OmniVoice Usage Instructions Title"), expanded=False):
+        st.markdown(tr("OmniVoice Usage Instructions"))
+
+    config.omnivoice["api_url"] = api_url
+    config.omnivoice["language"] = language
+    config.omnivoice["mode"] = mode
+    config.omnivoice["instruct"] = instruct
+    config.omnivoice["reference_audio_source"] = reference_audio_source
+    config.omnivoice["reference_audio"] = reference_audio
+    config.omnivoice["ref_text"] = ref_text
+    config.omnivoice["num_step"] = num_step
+    config.omnivoice["guidance_scale"] = guidance_scale
+    config.omnivoice["speed"] = voice_rate
+    config.omnivoice["duration"] = duration if duration > 0 else ""
+    config.omnivoice["denoise"] = denoise
+    config.omnivoice["postprocess_output"] = postprocess_output
+    config.omnivoice["preprocess_prompt"] = preprocess_prompt
+
+    if mode == "voice_clone" and reference_audio:
+        config.ui["voice_name"] = f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
+    else:
+        config.ui["voice_name"] = f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
+    st.session_state["voice_rate"] = voice_rate
+    st.session_state["voice_pitch"] = 1.0
+
+
 def render_doubaotts_settings(tr):
     """渲染豆包语音 TTS 设置"""
     # AK 输入
@@ -1567,6 +1742,15 @@ def render_voice_preview_new(tr, selected_engine):
                 voice_name = f"{config.INDEXTTS2_VOICE_PREFIX}{reference_audio}"
             voice_rate = 1.0  # IndexTTS-2 使用自身生成参数
             voice_pitch = 1.0
+        elif selected_engine == config.OMNIVOICE_ENGINE:
+            mode = config.omnivoice.get("mode", "auto")
+            reference_audio = config.omnivoice.get("reference_audio", "")
+            if mode == "voice_clone" and reference_audio:
+                voice_name = f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
+            else:
+                voice_name = f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
+            voice_rate = config.omnivoice.get("speed", 1.0)
+            voice_pitch = 1.0
         elif selected_engine == "doubaotts":
             voice_type = config.ui.get("doubaotts_voice_type", "BV700_streaming")
             voice_name = voice_type
@@ -1579,7 +1763,11 @@ def render_voice_preview_new(tr, selected_engine):
 
         with st.spinner(tr("Synthesizing Voice")):
             temp_dir = utils.storage_dir("temp", create=True)
-            audio_format = "audio/wav" if selected_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE) else "audio/mp3"
+            audio_format = "audio/wav" if selected_engine in (
+                config.INDEXTTS_ENGINE,
+                config.INDEXTTS2_ENGINE,
+                config.OMNIVOICE_ENGINE,
+            ) else "audio/mp3"
             audio_extension = ".wav" if audio_format == "audio/wav" else ".mp3"
             audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}{audio_extension}")
 
diff --git a/webui/components/basic_settings.py b/webui/components/basic_settings.py
index a8185bc..1ea746c 100644
--- a/webui/components/basic_settings.py
+++ b/webui/components/basic_settings.py
@@ -260,6 +260,7 @@ def render_basic_settings(tr):
         with left_config_panel:
             render_language_settings(tr)
             render_proxy_settings(tr)
+            render_tavily_search_settings(tr)
 
         with middle_config_panel:
             render_vision_llm_settings(tr)  # 视觉分析模型设置
@@ -345,6 +346,32 @@ def render_proxy_settings(tr):
     config.ui["jianying_draft_path"] = jianying_draft_path
 
 
+def render_tavily_search_settings(tr):
+    """Render Tavily API key settings used by short drama web search."""
+    st.subheader(tr("Tavily Search Settings"))
+    st.markdown(
+        f"{tr('API Key URL')}: "
+        "[https://app.tavily.com](https://app.tavily.com)"
+    )
+
+    tavily_api_key = st.text_input(
+        tr("Tavily API Key"),
+        value=config.app.get("tavily_api_key", ""),
+        type="password",
+        help=tr("Tavily API Key Help"),
+        key="tavily_api_key_input",
+    )
+
+    if update_app_config_if_changed("tavily_api_key", str(tavily_api_key or "").strip()):
+        try:
+            config.save_config()
+            st.session_state["tavily_api_key"] = str(tavily_api_key or "").strip()
+            st.success(tr("Tavily config saved"))
+        except Exception as e:
+            st.error(f"{tr('Failed to save config')}: {str(e)}")
+            logger.error(f"保存 Tavily 配置失败: {str(e)}")
+
+
 def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
     """测试视觉模型连接
 
diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py
index 9b03457..d8b296e 100644
--- a/webui/components/script_settings.py
+++ b/webui/components/script_settings.py
@@ -17,7 +17,7 @@ from webui.tools.generate_script_short import generate_script_short
 from webui.tools.generate_short_summary import analyze_short_drama_plot, generate_script_short_sunmmary
 
 
-SCRIPT_TABLE_BASE_COLUMNS = ["_id", "timestamp", "picture", "narration", "OST"]
+SCRIPT_TABLE_BASE_COLUMNS = ["_id", "video_id", "video_name", "timestamp", "picture", "narration", "OST"]
 VIDEO_UPLOAD_TYPES = ["mp4", "mov", "avi", "flv", "mkv", "mpeg4"]
 VIDEO_GLOB_PATTERNS = [f"*.{suffix}" for suffix in VIDEO_UPLOAD_TYPES]
 
@@ -99,15 +99,24 @@ def _read_subtitle_file(path):
             return f.read()
 
 
-def _build_combined_subtitle_content(subtitle_paths):
+def _build_combined_subtitle_content(subtitle_paths, video_paths=None):
     sections = []
     subtitle_contents = {}
-    for subtitle_path in subtitle_paths:
+    video_paths = _normalize_video_paths(video_paths)
+    for index, subtitle_path in enumerate(subtitle_paths, start=1):
         if not subtitle_path or not os.path.exists(subtitle_path):
             continue
         content = _read_subtitle_file(subtitle_path)
         subtitle_contents[subtitle_path] = content
-        sections.append(f"# {os.path.basename(subtitle_path)}\n{content}".strip())
+        video_path = video_paths[index - 1] if index <= len(video_paths) else ""
+        if video_path:
+            header = (
+                f"# 视频 {index}: {os.path.basename(video_path)}\n"
+                f"字幕文件: {os.path.basename(subtitle_path)}"
+            )
+        else:
+            header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
+        sections.append(f"{header}\n{content}".strip())
     return "\n\n".join(sections), subtitle_contents
 
 
@@ -120,7 +129,10 @@ def _selected_subtitle_paths():
 
 def _set_subtitle_state(subtitle_paths):
     subtitle_paths = _normalize_video_paths(subtitle_paths)
-    subtitle_content, subtitle_contents = _build_combined_subtitle_content(subtitle_paths)
+    subtitle_content, subtitle_contents = _build_combined_subtitle_content(
+        subtitle_paths,
+        _selected_video_paths(),
+    )
     st.session_state['subtitle_path'] = subtitle_paths[0] if subtitle_paths else None
     st.session_state['subtitle_paths'] = subtitle_paths
     st.session_state['subtitle_content'] = subtitle_content if subtitle_content else None
@@ -128,6 +140,20 @@ def _set_subtitle_state(subtitle_paths):
     st.session_state['subtitle_file_processed'] = bool(subtitle_paths)
 
 
+def _short_drama_plot_analysis_signature(subtitle_paths, video_theme, web_search_enabled, video_paths=None):
+    theme = str(video_theme or "").strip() if web_search_enabled else ""
+    return json.dumps(
+        {
+            "subtitle_paths": _normalize_video_paths(subtitle_paths),
+            "video_paths": _normalize_video_paths(video_paths),
+            "video_theme": theme,
+            "web_search_enabled": bool(web_search_enabled),
+        },
+        ensure_ascii=False,
+        sort_keys=True,
+    )
+
+
 def render_script_panel(tr):
     """渲染脚本配置面板"""
     with st.container(border=True):
@@ -525,16 +551,71 @@ def short_drama_summary(tr):
     render_fun_asr_transcription(tr)
     render_subtitle_preview(tr)
 
-    current_subtitle_path = st.session_state.get('subtitle_path', '')
-    plot_analysis_source = st.session_state.get('short_drama_plot_analysis_subtitle_path')
-    if plot_analysis_source and plot_analysis_source != current_subtitle_path:
-        st.session_state['short_drama_plot_analysis'] = ""
-        st.session_state['short_drama_plot_analysis_subtitle_path'] = ""
+    current_subtitle_paths = _selected_subtitle_paths()
+    current_subtitle_path = current_subtitle_paths[0] if current_subtitle_paths else ''
 
-    name_cols = st.columns([4, 1.2], vertical_alignment="bottom")
+    st.markdown(
+        """
+        <style>
+        .st-key-short_drama_web_search_enabled [data-testid="stMarkdownContainer"] {
+            display: none;
+        }
+        .st-key-short_drama_web_search_enabled [data-testid="stWidgetLabel"] {
+            min-width: 0;
+            transform: translateX(-1.2rem);
+        }
+        .st-key-short_drama_web_search_enabled label {
+            align-items: center;
+            gap: 0.45rem;
+        }
+        .st-key-short_drama_web_search_enabled label > div:first-child {
+            width: 3rem !important;
+            min-width: 3rem !important;
+            height: 1.55rem !important;
+            border-radius: 999px !important;
+            border: 1px solid #d1d5db !important;
+            background: #e5e7eb !important;
+            box-shadow: inset 0 1px 2px rgba(15, 23, 42, 0.08) !important;
+            transition: background 160ms ease, border-color 160ms ease, box-shadow 160ms ease !important;
+        }
+        .st-key-short_drama_web_search_enabled label:hover > div:first-child {
+            background: #dbe3ef !important;
+            border-color: #b8c2d3 !important;
+        }
+        .st-key-short_drama_web_search_enabled label:has(input[aria-checked="true"]) > div:first-child {
+            border-color: transparent !important;
+            background: linear-gradient(135deg, #2563eb, #14b8a6) !important;
+            box-shadow: 0 6px 14px rgba(37, 99, 235, 0.22) !important;
+        }
+        .st-key-short_drama_web_search_enabled label > div:first-child > div {
+            width: 1.05rem !important;
+            height: 1.05rem !important;
+            border-radius: 999px !important;
+            background: #ffffff !important;
+            box-shadow: 0 2px 6px rgba(15, 23, 42, 0.24) !important;
+        }
+        .st-key-short_drama_web_search_enabled button[aria-label^="Help for"] {
+            color: #6b7280 !important;
+        }
+        .st-key-short_drama_web_search_enabled button[aria-label^="Help for"]:hover {
+            color: #2563eb !important;
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
+    name_cols = st.columns([3.4, 1.1, 2], vertical_alignment="bottom")
     with name_cols[0]:
         video_theme = st.text_input(tr("短剧名称"))
     with name_cols[1]:
+        web_search_enabled = st.toggle(
+            tr("联网搜索"),
+            key="short_drama_web_search_enabled",
+            help=tr("Enable Web Search Help"),
+            disabled=not current_subtitle_path,
+        )
+    with name_cols[2]:
         analyze_plot_clicked = st.button(
             tr("剧情理解"),
             key="short_drama_plot_analysis_button",
@@ -543,17 +624,37 @@ def short_drama_summary(tr):
         )
     st.session_state['video_theme'] = video_theme
 
+    current_signature = _short_drama_plot_analysis_signature(
+        current_subtitle_paths,
+        video_theme,
+        web_search_enabled,
+        _selected_video_paths(),
+    )
+    saved_signature = st.session_state.get('short_drama_plot_analysis_signature')
+    legacy_source = st.session_state.get('short_drama_plot_analysis_subtitle_path')
+    if (
+        (saved_signature and saved_signature != current_signature)
+        or (legacy_source and legacy_source != current_subtitle_path)
+    ):
+        st.session_state['short_drama_plot_analysis'] = ""
+        st.session_state['short_drama_plot_analysis_subtitle_path'] = ""
+        st.session_state['short_drama_plot_analysis_signature'] = ""
+
     if analyze_plot_clicked:
         with st.spinner(tr("Analyzing plot...")):
             plot_analysis = analyze_short_drama_plot(
-                current_subtitle_path,
+                current_subtitle_paths,
                 st.session_state.get('temperature', 0.7),
                 tr,
                 subtitle_content=st.session_state.get('subtitle_content', ''),
+                short_name=video_theme,
+                enable_web_search=web_search_enabled,
+                video_paths=_selected_video_paths(),
             )
         if plot_analysis:
             st.session_state['short_drama_plot_analysis'] = plot_analysis
             st.session_state['short_drama_plot_analysis_subtitle_path'] = current_subtitle_path
+            st.session_state['short_drama_plot_analysis_signature'] = current_signature
             st.success(tr("Plot analysis completed"))
 
     if st.session_state.get('short_drama_plot_analysis'):
@@ -575,7 +676,10 @@ def render_subtitle_preview(tr):
         subtitle_contents = {}
 
     if subtitle_paths and (not subtitle_content or not subtitle_contents):
-        subtitle_content, subtitle_contents = _build_combined_subtitle_content(subtitle_paths)
+        subtitle_content, subtitle_contents = _build_combined_subtitle_content(
+            subtitle_paths,
+            _selected_video_paths(),
+        )
         st.session_state['subtitle_content'] = subtitle_content
         st.session_state['subtitle_contents'] = subtitle_contents
 
@@ -724,7 +828,7 @@ def _normalize_script_table_value(column, value):
     if _is_blank_table_value(value):
         return ""
 
-    if column in {"_id", "OST"}:
+    if column in {"_id", "video_id", "OST"}:
         try:
             return int(value)
         except (TypeError, ValueError):
@@ -783,6 +887,14 @@ def render_video_script_editor(tr):
             column_order=column_order,
             column_config={
                 "_id": st.column_config.NumberColumn(tr("Script Column ID"), step=1, format="%d", width=52),
+                "video_id": st.column_config.NumberColumn(
+                    tr("Script Column Video ID"),
+                    min_value=1,
+                    step=1,
+                    format="%d",
+                    width=80,
+                ),
+                "video_name": st.column_config.TextColumn(tr("Script Column Video Name"), width=180),
                 "timestamp": st.column_config.TextColumn(tr("Script Column Timestamp"), width=200),
                 "picture": st.column_config.TextColumn(tr("Script Column Picture"), width=320),
                 "narration": st.column_config.TextColumn(tr("Script Column Narration"), width=480),
@@ -1057,7 +1169,10 @@ def render_fun_asr_transcription(tr):
             st.error(tr("Fun-ASR failed without subtitle file"))
             return
 
-        subtitle_content, subtitle_contents = _build_combined_subtitle_content(generated_paths)
+        subtitle_content, subtitle_contents = _build_combined_subtitle_content(
+            generated_paths,
+            media_paths,
+        )
         if not subtitle_content.strip():
             clear_fun_asr_subtitle_state()
             st.error(tr("Fun-ASR failed without subtitle file"))
@@ -1112,20 +1227,35 @@ def render_script_buttons(tr, params):
             generate_script_short(tr, params, custom_clips)
         elif script_path == "summary":
             # 执行 短剧解说 脚本生成
-            subtitle_path = st.session_state.get('subtitle_path')
+            subtitle_paths = _selected_subtitle_paths()
+            subtitle_path = subtitle_paths[0] if subtitle_paths else None
             video_theme = st.session_state.get('video_theme')
             temperature = st.session_state.get('temperature')
+            web_search_enabled = bool(st.session_state.get('short_drama_web_search_enabled', False))
+            current_signature = _short_drama_plot_analysis_signature(
+                subtitle_paths,
+                video_theme,
+                web_search_enabled,
+                _selected_video_paths(),
+            )
             plot_analysis = ""
-            if st.session_state.get('short_drama_plot_analysis_subtitle_path') == subtitle_path:
+            if st.session_state.get('short_drama_plot_analysis_signature') == current_signature:
+                plot_analysis = st.session_state.get('short_drama_plot_analysis', '')
+            elif (
+                not web_search_enabled
+                and st.session_state.get('short_drama_plot_analysis_subtitle_path') == subtitle_path
+            ):
                 plot_analysis = st.session_state.get('short_drama_plot_analysis', '')
             generate_script_short_sunmmary(
                 params,
-                subtitle_path,
+                subtitle_paths,
                 video_theme,
                 temperature,
                 tr,
                 plot_analysis=plot_analysis,
                 subtitle_content=st.session_state.get('subtitle_content', ''),
+                enable_web_search=web_search_enabled,
+                video_paths=_selected_video_paths(),
             )
         else:
             load_script(tr, script_path)
@@ -1172,6 +1302,8 @@ def save_script_with_validation(tr, video_clip_json_details):
                 example_script = [
                     {
                         "_id": 1,
+                        "video_id": 1,
+                        "video_name": "1.mp4",
                         "timestamp": "00:00:00,600-00:00:07,559",
                         "picture": "工地上，蔡晓艳奋力救人，场面混乱",
                         "narration": "灾后重建，工地上险象环生！泼辣女工蔡晓艳挺身而出，救人第一！",
@@ -1179,6 +1311,8 @@ def save_script_with_validation(tr, video_clip_json_details):
                     },
                     {
                         "_id": 2,
+                        "video_id": 2,
+                        "video_name": "2.mp4",
                         "timestamp": "00:00:08,240-00:00:12,359",
                         "picture": "领导视察，蔡晓艳不屑一顾",
                         "narration": "播放原片4",
diff --git a/webui/components/subtitle_settings.py b/webui/components/subtitle_settings.py
index f719d5e..96a7a7a 100644
--- a/webui/components/subtitle_settings.py
+++ b/webui/components/subtitle_settings.py
@@ -604,7 +604,7 @@ def render_font_settings(tr):
 
 def is_disabled_subtitle_settings(tts_engine:str)->bool:
     """是否禁用字幕设置"""
-    return tts_engine=="soulvoice" or tts_engine=="qwen3_tts"
+    return tts_engine=="soulvoice" or tts_engine=="qwen3_tts" or tts_engine==config.OMNIVOICE_ENGINE
 
 def render_position_settings(tr):
     """渲染位置设置"""
diff --git a/webui/i18n/en.json b/webui/i18n/en.json
index 8e3356c..284d9a6 100644
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@@ -15,6 +15,8 @@
     "Video script table help": "Edit the full script JSON as a table. You can add or delete rows; saving will validate and write the script file again.",
     "Raw JSON Preview": "Raw JSON Preview",
     "Script Column ID": "ID",
+    "Script Column Video ID": "Video",
+    "Script Column Video Name": "Video Name",
     "Script Column Timestamp": "Timestamp",
     "Script Column Picture": "Picture",
     "Script Column Narration": "Narration",
@@ -286,7 +288,11 @@
     "IndexTTS download link": "Download link: https://pan.quark.cn/s/0767c9bcefd5",
     "IndexTTS2 features": "A locally or privately deployed IndexTTS-2 voice-cloning engine with emotion control and fuller generation parameters.",
     "IndexTTS2 use case": "Best for fixed voices, emotional narration, and local speech synthesis workflows that need finer sampling controls. Start the IndexTTS-2 API service before use.",
+    "OmniVoice features": "A locally or privately deployed OmniVoice-Pack multilingual TTS engine with automatic voice generation, voice design, and reference-audio cloning.",
+    "OmniVoice use case": "Best for local controllable multilingual narration, voice design, or reference-audio cloning. Start the OmniVoice-Pack API service before use.",
     "Doubao TTS features": "Volcengine Doubao speech synthesis with multiple voices and emotions, plus fast access in mainland China.",
+    "Local Deployment": "Local Deployment",
+    "Cloud Service": "Cloud Service",
     "Select TTS Engine": "Select TTS Engine",
     "Select TTS Engine Help": "Choose the text-to-speech engine you want to use.",
     "TTS Engine Details": "📋 {engine} Details",
@@ -413,6 +419,16 @@
     "Subtitle calibration succeeded for multiple files": "Subtitle calibration succeeded for {count} files: {files}",
     "Subtitle calibration failed": "Subtitle calibration failed",
     "Transcribed subtitles storage hint": "Previously transcribed subtitles are saved in {path}; drag a file from that folder to upload",
+    "Tavily Search Settings": "Tavily Web Search",
+    "Tavily API Key": "Tavily API Key",
+    "Tavily API Key Help": "Used for web search before short drama plot analysis. When Web Search is enabled, the app searches plot, character, and episode context by drama name, then combines it with subtitles.",
+    "Tavily config saved": "Tavily configuration saved",
+    "联网搜索": "Web Search",
+    "Enable Web Search Help": "When enabled, plot analysis searches the web with Tavily by short drama name before combining those results with subtitles.",
+    "Please configure Tavily API Key in Basic Settings": "Please configure the Tavily API Key in Basic Settings first",
+    "Please enter short drama name before web search": "Please enter the short drama name before enabling web search",
+    "Searching short drama with Tavily...": "Searching short drama context with Tavily...",
+    "Tavily search failed": "Tavily search failed",
     "剧情理解": "Plot Analysis",
     "剧情理解结果": "Plot Analysis Result",
     "Analyzing plot...": "Analyzing plot...",
@@ -443,6 +459,30 @@
     "API URL": "API URL",
     "IndexTTS API URL Help": "IndexTTS-1.5 API service URL",
     "IndexTTS2 API URL Help": "IndexTTS-2 API service URL. You can enter the service root or the full /tts endpoint.",
+    "OmniVoice API URL Help": "OmniVoice-Pack API service URL. You can enter the service root or the full /tts endpoint.",
+    "OmniVoice Language Code": "Synthesis Language",
+    "OmniVoice Language Code Help": "The language parameter sent to OmniVoice-Pack, such as zh or en.",
+    "OmniVoice Generation Mode": "Generation Mode",
+    "OmniVoice Generation Mode Help": "Automatic voice needs no extra fields; voice design uses an instruction; reference-audio cloning needs reference audio and matching text.",
+    "OmniVoice Mode Auto": "Automatic Voice",
+    "OmniVoice Mode Voice Design": "Voice Design",
+    "OmniVoice Mode Voice Clone": "Reference Audio Clone",
+    "OmniVoice Instruct": "Voice Instruction",
+    "OmniVoice Instruct Help": "Describe the desired voice, such as gender, pitch, accent, or style.",
+    "OmniVoice Instruct Placeholder": "e.g. female, low pitch, british accent",
+    "OmniVoice Reference Text": "Reference Audio Text",
+    "OmniVoice Reference Text Help": "The exact transcript of the reference audio. Required when the deployed service has ASR disabled.",
+    "OmniVoice Reference Text Placeholder": "Enter the text spoken in the reference audio",
+    "OmniVoice Num Step Help": "Diffusion generation steps. Higher values usually improve quality but slow generation.",
+    "OmniVoice Guidance Scale Help": "Controls how strongly text conditions guide generation.",
+    "OmniVoice Duration": "Target Duration (seconds)",
+    "OmniVoice Duration Help": "0 lets the model decide the duration automatically.",
+    "OmniVoice Denoise": "Enable Denoise",
+    "OmniVoice Denoise Help": "Ask OmniVoice-Pack to denoise the generated output.",
+    "OmniVoice Postprocess Output": "Postprocess Output",
+    "OmniVoice Postprocess Output Help": "Enable OmniVoice-Pack output post-processing.",
+    "OmniVoice Preprocess Prompt": "Preprocess Text",
+    "OmniVoice Preprocess Prompt Help": "Enable OmniVoice-Pack text preprocessing.",
     "Reference Audio Source": "Reference Audio Source",
     "Reference Audio Source Help": "Choose a reference audio from the resource directory or upload a new one.",
     "Select from Resource Directory": "Select from Resource Directory",
@@ -502,6 +542,8 @@
     "Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.",
     "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 Usage Instructions",
     "IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments",
+    "OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions",
+    "OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
     "Volcengine Access Key Help": "Volcengine Access Key",
     "Volcengine Secret Key Help": "Volcengine Secret Key",
     "Doubao AppID Help": "Doubao TTS application AppID",
diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json
index 4b16d7e..76872eb 100644
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@@ -159,6 +159,8 @@
     "Video script table help": "在表格中编辑完整脚本 JSON。可新增、删除行；保存时会重新校验并写入脚本文件。",
     "Raw JSON Preview": "原始 JSON 预览",
     "Script Column ID": "序号",
+    "Script Column Video ID": "视频",
+    "Script Column Video Name": "视频文件",
     "Script Column Timestamp": "时间戳",
     "Script Column Picture": "画面描述",
     "Script Column Narration": "解说台词",
@@ -267,7 +269,11 @@
     "IndexTTS download link": "下载地址：https://pan.quark.cn/s/0767c9bcefd5",
     "IndexTTS2 features": "本地/私有部署的 IndexTTS-2 语音克隆引擎，支持情感控制和更完整的生成参数。",
     "IndexTTS2 use case": "适合需要固定音色、情绪化旁白或更细致采样控制的本地语音合成场景。使用前请先启动 IndexTTS-2 API 服务。",
+    "OmniVoice features": "本地/私有部署的 OmniVoice-Pack 多语种语音合成引擎，支持自动音色、指令音色和参考音频克隆。",
+    "OmniVoice use case": "适合需要本地可控、多语言旁白、音色设计或参考音频克隆的场景。使用前请先启动 OmniVoice-Pack API 服务。",
     "Doubao TTS features": "火山引擎豆包语音合成，支持多种音色和情感，国内访问速度快",
+    "Local Deployment": "本地部署",
+    "Cloud Service": "云端服务",
     "Select TTS Engine": "选择 TTS 引擎",
     "Select TTS Engine Help": "选择您要使用的文本转语音引擎",
     "TTS Engine Details": "📋 {engine} 详细说明",
@@ -395,6 +401,16 @@
     "Subtitle calibration succeeded for multiple files": "字幕校准成功，共 {count} 个文件: {files}",
     "Subtitle calibration failed": "字幕校准失败",
     "Transcribed subtitles storage hint": "之前转录生成的字幕保存在 {path}，可从该目录拖入上传",
+    "Tavily Search Settings": "Tavily 联网搜索",
+    "Tavily API Key": "Tavily API Key",
+    "Tavily API Key Help": "用于短剧剧情理解前的联网检索。开启“联网搜索”后，会先按短剧名称检索剧情、人物和分集信息，再结合字幕分析。",
+    "Tavily config saved": "Tavily 配置已保存",
+    "联网搜索": "联网搜索",
+    "Enable Web Search Help": "开启后，剧情理解会先使用 Tavily 按短剧名称联网检索，再结合检索结果和字幕分析剧情。",
+    "Please configure Tavily API Key in Basic Settings": "请先在基础设置中配置 Tavily API Key",
+    "Please enter short drama name before web search": "开启联网搜索前，请先填写短剧名称",
+    "Searching short drama with Tavily...": "正在使用 Tavily 检索短剧信息...",
+    "Tavily search failed": "Tavily 检索失败",
     "剧情理解": "剧情理解",
     "剧情理解结果": "剧情理解结果",
     "Analyzing plot...": "正在理解剧情...",
@@ -425,6 +441,30 @@
     "API URL": "API 地址",
     "IndexTTS API URL Help": "IndexTTS-1.5 API 服务地址",
     "IndexTTS2 API URL Help": "IndexTTS-2 API 服务地址，可填写服务根地址或完整 /tts 地址",
+    "OmniVoice API URL Help": "OmniVoice-Pack API 服务地址，可填写服务根地址或完整 /tts 地址",
+    "OmniVoice Language Code": "合成语言",
+    "OmniVoice Language Code Help": "传给 OmniVoice-Pack 的 language 参数，例如 zh、en。",
+    "OmniVoice Generation Mode": "生成模式",
+    "OmniVoice Generation Mode Help": "自动音色无需额外参数；指令音色使用描述词；参考音频克隆需要参考音频和对应文本。",
+    "OmniVoice Mode Auto": "自动音色",
+    "OmniVoice Mode Voice Design": "指令音色",
+    "OmniVoice Mode Voice Clone": "参考音频克隆",
+    "OmniVoice Instruct": "音色指令",
+    "OmniVoice Instruct Help": "描述希望生成的音色，例如性别、音高、口音或风格。",
+    "OmniVoice Instruct Placeholder": "例如：female, low pitch, british accent",
+    "OmniVoice Reference Text": "参考音频文本",
+    "OmniVoice Reference Text Help": "参考音频对应的逐字文本；当前部署未启用 ASR 时必须填写。",
+    "OmniVoice Reference Text Placeholder": "请输入参考音频中实际朗读的内容",
+    "OmniVoice Num Step Help": "扩散生成步数，值越大通常质量更高但速度更慢。",
+    "OmniVoice Guidance Scale Help": "控制文本条件的引导强度。",
+    "OmniVoice Duration": "目标时长（秒）",
+    "OmniVoice Duration Help": "0 表示由模型自动决定时长。",
+    "OmniVoice Denoise": "启用降噪",
+    "OmniVoice Denoise Help": "让 OmniVoice-Pack 对生成结果执行降噪处理。",
+    "OmniVoice Postprocess Output": "后处理输出",
+    "OmniVoice Postprocess Output Help": "启用 OmniVoice-Pack 的输出后处理。",
+    "OmniVoice Preprocess Prompt": "预处理文本",
+    "OmniVoice Preprocess Prompt Help": "启用 OmniVoice-Pack 的文本预处理。",
     "Reference Audio Source": "参考音频来源",
     "Reference Audio Source Help": "选择从资源目录选择参考音频，或上传新的参考音频",
     "Select from Resource Directory": "从资源目录选择",
@@ -484,6 +524,8 @@
     "Max Mel Tokens Help": "控制单次生成的最大 mel token 数，值越大可生成更长音频",
     "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 使用说明",
     "IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**：复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**：例如 http://192.168.3.6:7863/tts，也可以填写服务根地址\n3. **调整情感参数**：默认使用 speaker，可按需切换到 audio、vector 或 text\n4. **调整生成参数**：temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**：\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型，耗时更长\n- CPU 部署生成速度会明显慢于 GPU",
+    "OmniVoice Usage Instructions Title": "OmniVoice 使用说明",
+    "OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**：只需要设置 API 地址和语言，可直接合成。\n2. **指令音色**：填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**：上传或选择参考音频，并填写该音频对应文本。\n\n**注意事项**：\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频，系统会按音频时长估算字幕段落",
     "Volcengine Access Key Help": "火山引擎 Access Key",
     "Volcengine Secret Key Help": "火山引擎 Secret Key",
     "Doubao AppID Help": "豆包语音应用 AppID",
diff --git a/webui/tools/generate_short_summary.py b/webui/tools/generate_short_summary.py
index eb42361..d06431c 100644
--- a/webui/tools/generate_short_summary.py
+++ b/webui/tools/generate_short_summary.py
@@ -17,12 +17,101 @@ from loguru import logger
 from app.config import config
 from app.services.SDE.short_drama_explanation import analyze_subtitle, generate_narration_script
 from app.services.subtitle_text import read_subtitle_text
+from app.services.tavily_search import TavilySearchError, format_search_context, search_short_drama
 # 导入新的LLM服务模块 - 确保提供商被注册
 import app.services.llm  # 这会触发提供商注册
 from app.services.llm.migration_adapter import SubtitleAnalyzerAdapter
 import re
 
 
+def _normalize_paths(paths):
+    if isinstance(paths, str):
+        paths = [paths]
+    if not paths:
+        return []
+
+    normalized_paths = []
+    seen = set()
+    for path in paths:
+        if not isinstance(path, str):
+            continue
+        path = path.strip()
+        if not path or path in seen:
+            continue
+        normalized_paths.append(path)
+        seen.add(path)
+    return normalized_paths
+
+
+def _build_combined_subtitle_content(subtitle_paths, video_paths=None):
+    sections = []
+    video_paths = _normalize_paths(video_paths)
+    for index, subtitle_path in enumerate(_normalize_paths(subtitle_paths), start=1):
+        if not os.path.exists(subtitle_path):
+            continue
+
+        video_path = video_paths[index - 1] if index <= len(video_paths) else ""
+        if video_path:
+            header = (
+                f"# 视频 {index}: {os.path.basename(video_path)}\n"
+                f"字幕文件: {os.path.basename(subtitle_path)}"
+            )
+        else:
+            header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
+        sections.append(f"{header}\n{read_subtitle_text(subtitle_path).text}".strip())
+
+    return "\n\n".join(sections)
+
+
+def _coerce_video_id(value):
+    try:
+        video_id = int(value)
+    except (TypeError, ValueError):
+        return None
+    return video_id if video_id > 0 else None
+
+
+def _match_video_id_by_name(video_name, video_paths):
+    video_name = str(video_name or "").strip()
+    if not video_name:
+        return None
+
+    for index, video_path in enumerate(video_paths, start=1):
+        if os.path.basename(video_path) == os.path.basename(video_name):
+            return index
+    return None
+
+
+def _normalize_narration_items_video_sources(items, video_paths):
+    video_paths = _normalize_paths(video_paths)
+    if not video_paths:
+        return items
+
+    normalized_items = []
+    for item in items:
+        if not isinstance(item, dict):
+            normalized_items.append(item)
+            continue
+
+        item_copy = item.copy()
+        video_id = _coerce_video_id(item_copy.get("video_id") or item_copy.get("video_index"))
+        matched_video_id = _match_video_id_by_name(
+            item_copy.get("video_name") or item_copy.get("source_video"),
+            video_paths,
+        )
+        if matched_video_id:
+            video_id = matched_video_id
+        if video_id is None or video_id > len(video_paths):
+            logger.warning(f"片段 {item_copy.get('_id')} 未提供有效 video_id，默认使用视频 1")
+            video_id = 1
+
+        item_copy["video_id"] = video_id
+        item_copy["video_name"] = os.path.basename(video_paths[video_id - 1])
+        normalized_items.append(item_copy)
+
+    return normalized_items
+
+
 def parse_and_fix_json(json_string):
     """
     解析并修复JSON字符串
@@ -135,12 +224,83 @@ def parse_and_fix_json(json_string):
         return None
 
 
-def analyze_short_drama_plot(subtitle_path, temperature, tr=lambda key: key, subtitle_content=None):
+def _get_tavily_api_key() -> str:
+    return (
+        st.session_state.get("tavily_api_key")
+        or config.app.get("tavily_api_key")
+        or ""
+    ).strip()
+
+
+def _build_tavily_context(short_name: str, tr=lambda key: key) -> str | None:
+    short_name = str(short_name or "").strip()
+    if not short_name:
+        st.error(tr("Please enter short drama name before web search"))
+        return None
+
+    api_key = _get_tavily_api_key()
+    if not api_key:
+        st.error(tr("Please configure Tavily API Key in Basic Settings"))
+        return None
+
+    try:
+        search_data = search_short_drama(
+            short_name,
+            api_key,
+            search_depth=config.app.get("tavily_search_depth", "basic"),
+            max_results=config.app.get("tavily_max_results", 5),
+        )
+        return format_search_context(search_data)
+    except TavilySearchError as e:
+        logger.error(f"Tavily 短剧检索失败: {str(e)}")
+        st.error(f"{tr('Tavily search failed')}: {str(e)}")
+        return None
+    except Exception as e:
+        logger.error(f"Tavily 短剧检索异常: {traceback.format_exc()}")
+        st.error(f"{tr('Tavily search failed')}: {str(e)}")
+        return None
+
+
+def _build_plot_analysis_input(
+    subtitle_content: str,
+    short_name: str = "",
+    enable_web_search: bool = False,
+    tr=lambda key: key,
+) -> str | None:
+    subtitle_content = str(subtitle_content or "").strip()
+    if not enable_web_search:
+        return subtitle_content
+
+    tavily_context = _build_tavily_context(short_name, tr)
+    if tavily_context is None:
+        return None
+
+    return f"""# 分析补充说明
+请先参考 Tavily 联网检索结果理解短剧名称、人物关系、剧情背景和公开剧情梗概，再结合原始字幕完成剧情理解。
+如果联网检索结果与字幕内容冲突，请以字幕内容为准；时间戳必须只从字幕内容中提取。
+
+{tavily_context}
+
+# 原始字幕
+{subtitle_content}"""
+
+
+def analyze_short_drama_plot(
+    subtitle_path,
+    temperature,
+    tr=lambda key: key,
+    subtitle_content=None,
+    short_name: str = "",
+    enable_web_search: bool = False,
+    video_paths=None,
+):
     """仅执行短剧字幕剧情理解，返回可编辑的剧情分析文本。"""
-    if not subtitle_path:
+    subtitle_paths = _normalize_paths(subtitle_path)
+    if not subtitle_paths:
         st.error(tr("Please generate or upload subtitles first"))
         return None
-    if not os.path.exists(subtitle_path):
+    missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
+    if missing_subtitle_paths:
         st.error(tr("Subtitle file does not exist"))
         return None
 
@@ -149,19 +309,31 @@ def analyze_short_drama_plot(subtitle_path, temperature, tr=lambda key: key, sub
     text_model = config.app.get(f'text_{text_provider}_model_name')
     text_base_url = config.app.get(f'text_{text_provider}_base_url')
 
-    subtitle_content = str(subtitle_content or "").strip() or read_subtitle_text(subtitle_path).text
+    subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
+        subtitle_paths,
+        video_paths,
+    )
     if not subtitle_content:
         st.error(tr("Subtitle file is empty or unreadable"))
         return None
 
+    plot_analysis_input = _build_plot_analysis_input(
+        subtitle_content,
+        short_name=short_name,
+        enable_web_search=enable_web_search,
+        tr=tr,
+    )
+    if plot_analysis_input is None:
+        return None
+
     try:
         logger.info("使用新的LLM服务架构进行字幕分析")
         analyzer = SubtitleAnalyzerAdapter(text_api_key, text_model, text_base_url, text_provider)
-        analysis_result = analyzer.analyze_subtitle(subtitle_content)
+        analysis_result = analyzer.analyze_subtitle(plot_analysis_input)
     except Exception as e:
         logger.warning(f"使用新LLM服务失败，回退到旧实现: {str(e)}")
         analysis_result = analyze_subtitle(
-            subtitle_content=subtitle_content,
+            subtitle_content=plot_analysis_input,
             api_key=text_api_key,
             model=text_model,
             base_url=text_base_url,
@@ -186,6 +358,8 @@ def generate_script_short_sunmmary(
     tr=lambda key: key,
     plot_analysis=None,
     subtitle_content=None,
+    enable_web_search: bool = False,
+    video_paths=None,
 ):
     """
     生成 短剧解说 视频脚本
@@ -204,7 +378,12 @@ def generate_script_short_sunmmary(
 
     try:
         with st.spinner(tr("Generating script...")):
-            if not params.video_origin_path:
+            selected_video_paths = _normalize_paths(
+                video_paths
+                or getattr(params, "video_origin_paths", [])
+                or getattr(params, "video_origin_path", "")
+            )
+            if not selected_video_paths:
                 st.error(tr("Please select video file first"))
                 return
             """
@@ -212,7 +391,9 @@ def generate_script_short_sunmmary(
             """
             update_progress(30, tr("Parsing subtitles..."))
             # 判断字幕文件是否存在
-            if not os.path.exists(subtitle_path):
+            subtitle_paths = _normalize_paths(subtitle_path)
+            missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
+            if not subtitle_paths or missing_subtitle_paths:
                 st.error(tr("Subtitle file does not exist"))
                 return
 
@@ -225,7 +406,10 @@ def generate_script_short_sunmmary(
             text_base_url = config.app.get(f'text_{text_provider}_base_url')
 
             # 读取字幕文件内容（无论使用哪种实现都需要）
-            subtitle_content = str(subtitle_content or "").strip() or read_subtitle_text(subtitle_path).text
+            subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
+                subtitle_paths,
+                selected_video_paths,
+            )
             if not subtitle_content:
                 st.error(tr("Subtitle file is empty or unreadable"))
                 return
@@ -238,16 +422,27 @@ def generate_script_short_sunmmary(
                     "analysis": str(plot_analysis).strip(),
                 }
             else:
+                plot_analysis_input = subtitle_content
+                if enable_web_search:
+                    update_progress(40, tr("Searching short drama with Tavily..."))
+                    plot_analysis_input = _build_plot_analysis_input(
+                        subtitle_content,
+                        short_name=video_theme,
+                        enable_web_search=True,
+                        tr=tr,
+                    )
+                    if plot_analysis_input is None:
+                        return
                 try:
                     # 优先使用新的LLM服务架构
                     logger.info("使用新的LLM服务架构进行字幕分析")
-                    analysis_result = analyzer.analyze_subtitle(subtitle_content)
+                    analysis_result = analyzer.analyze_subtitle(plot_analysis_input)
 
                 except Exception as e:
                     logger.warning(f"使用新LLM服务失败，回退到旧实现: {str(e)}")
                     # 回退到旧的实现
                     analysis_result = analyze_subtitle(
-                        subtitle_content=subtitle_content,
+                        subtitle_content=plot_analysis_input,
                         api_key=text_api_key,
                         model=text_model,
                         base_url=text_base_url,
@@ -320,7 +515,11 @@ def generate_script_short_sunmmary(
                 logger.error(f"JSON结构错误，缺少items字段: {narration_dict}")
                 st.stop()
 
-            script = json.dumps(narration_dict['items'], ensure_ascii=False, indent=2)
+            narration_items = _normalize_narration_items_video_sources(
+                narration_dict['items'],
+                selected_video_paths,
+            )
+            script = json.dumps(narration_items, ensure_ascii=False, indent=2)
 
             if script is None:
                 st.error(tr("Script generation failed check logs"))