feat(tts,search,video): 新增OmniVoice TTS、联网搜索与多视频剪辑支持

新增OmniVoice语音合成引擎全流程支持，包含配置项、WebUI界面与服务实现集成Tavily联网搜索能力，支持短剧剧情分析前自动检索剧情背景信息新增多视频源剪辑支持，完善脚本校验规则并重构剪辑逻辑适配多视频路径重构LLM剧情分析Prompt，优化输出格式适配多场景与联网检索结果调整streamlit版本至1.56.0修复兼容性问题新增相关单元测试与多语言翻译，更新配置示例文件
2026-08-01 10:55:54 +00:00 · 2026-06-07 01:24:32 +08:00 · 2026-06-07 01:24:32 +08:00 · 342fc15f3b
commit 342fc15f3b
parent d147fe66e4
24 changed files with 1320 additions and 108 deletions
--- a/app/config/config.py
+++ b/app/config/config.py
@ -13,8 +13,11 @@ INDEXTTS_ENGINE = "indextts"
 INDEXTTS_DISPLAY_NAME = "IndexTTS-1.5"
 INDEXTTS2_ENGINE = "indextts2"
 INDEXTTS2_DISPLAY_NAME = "IndexTTS-2"
+OMNIVOICE_ENGINE = "omnivoice"
+OMNIVOICE_DISPLAY_NAME = "OmniVoice"
 INDEXTTS_VOICE_PREFIX = f"{INDEXTTS_ENGINE}:"
 INDEXTTS2_VOICE_PREFIX = f"{INDEXTTS2_ENGINE}:"
+OMNIVOICE_VOICE_PREFIX = f"{OMNIVOICE_ENGINE}:"


 def normalize_tts_engine_name(tts_engine: str) -> str:
@ -131,6 +134,7 @@ def save_config():
        _cfg["fun_asr"] = fun_asr
        _cfg["indextts"] = indextts
        _cfg["indextts2"] = indextts2
+        _cfg["omnivoice"] = omnivoice
        _cfg["doubaotts"] = doubaotts
        f.write(toml.dumps(_cfg))

@ -148,6 +152,7 @@ tts_qwen = _cfg.get("tts_qwen", {})
 fun_asr = _cfg.get("fun_asr", {})
 indextts = _cfg.get("indextts", {})
 indextts2 = _cfg.get("indextts2", {})
+omnivoice = _cfg.get("omnivoice", {})
 doubaotts = _cfg.get("doubaotts", {})

 hostname = socket.gethostname()
--- a/app/config/defaults.py
+++ b/app/config/defaults.py
@ -35,6 +35,9 @@ DEFAULT_LLM_APP_CONFIG = {
    "text_openai_model_name": DEFAULT_TEXT_OPENAI_MODEL_NAME,
    "text_openai_api_key": "",
    "text_openai_base_url": DEFAULT_OPENAI_COMPATIBLE_BASE_URL,
+    "tavily_api_key": "",
+    "tavily_search_depth": "basic",
+    "tavily_max_results": 5,
 }
 DEFAULT_LLM_APP_CONFIG.update(DEFAULT_LLM_GENERATION_APP_CONFIG)

--- a/app/services/clip_video.py
+++ b/app/services/clip_video.py
@ -32,6 +32,82 @@ def parse_timestamp(timestamp: str) -> tuple:
    return start_time, end_time


+def _normalize_video_origin_paths(
+    video_origin_path: str,
+    video_origin_paths: Optional[List[str]] = None,
+) -> List[str]:
+    paths = []
+    if video_origin_paths:
+        paths.extend(video_origin_paths)
+    if video_origin_path:
+        paths.insert(0, video_origin_path)
+
+    normalized_paths = []
+    seen = set()
+    for item in paths:
+        if not isinstance(item, str):
+            continue
+        item = item.strip()
+        if not item or item in seen:
+            continue
+        normalized_paths.append(item)
+        seen.add(item)
+    return normalized_paths
+
+
+def _coerce_video_id(value) -> Optional[int]:
+    try:
+        video_id = int(value)
+    except (TypeError, ValueError):
+        return None
+    return video_id if video_id > 0 else None
+
+
+def _match_video_id_by_name(video_name: str, video_origin_paths: List[str]) -> Optional[int]:
+    video_name = str(video_name or "").strip()
+    if not video_name:
+        return None
+
+    expected_name = os.path.basename(video_name)
+    for index, video_path in enumerate(video_origin_paths, start=1):
+        if os.path.basename(video_path) == expected_name:
+            return index
+    return None
+
+
+def _resolve_script_video_path(script_item: Dict, video_origin_paths: List[str]) -> str:
+    explicit_path = (
+        script_item.get("source_video_path")
+        or script_item.get("video_origin_path")
+        or script_item.get("origin_video_path")
+    )
+    if explicit_path and os.path.exists(explicit_path):
+        return explicit_path
+
+    video_id = _coerce_video_id(script_item.get("video_id") or script_item.get("video_index"))
+    matched_video_id = _match_video_id_by_name(
+        script_item.get("video_name") or script_item.get("source_video"),
+        video_origin_paths,
+    )
+    if matched_video_id:
+        video_id = matched_video_id
+
+    if video_id is not None:
+        if video_id <= len(video_origin_paths):
+            return video_origin_paths[video_id - 1]
+        logger.warning(
+            f"片段 {script_item.get('_id')} 的 video_id={video_id} 超出视频数量 "
+            f"{len(video_origin_paths)}，默认使用第一个视频"
+        )
+
+    return video_origin_paths[0]
+
+
+def _safe_output_id(value) -> str:
+    safe_value = str(value if value is not None else "unknown")
+    return "".join(char if char.isalnum() or char in ("-", "_") else "_" for char in safe_value)
+
+
 def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
    """
    根据开始时间和持续时间计算结束时间
@ -579,7 +655,7 @@ def _process_narration_only_segment(
    # 生成输出文件名
    safe_start_time = start_time.replace(':', '-').replace(',', '-')
    safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost0_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost0_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
    output_path = os.path.join(output_dir, output_filename)

    # 构建FFmpeg命令 - 移除音频
@ -622,7 +698,7 @@ def _process_original_audio_segment(
    # 生成输出文件名
    safe_start_time = start_time.replace(':', '-').replace(',', '-')
    safe_end_time = end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost1_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost1_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
    output_path = os.path.join(output_dir, output_filename)

    # 构建FFmpeg命令 - 保持原声
@ -674,7 +750,7 @@ def _process_mixed_segment(
    # 生成输出文件名
    safe_start_time = start_time.replace(':', '-').replace(',', '-')
    safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
-    output_filename = f"ost2_vid_{safe_start_time}@{safe_end_time}.mp4"
+    output_filename = f"ost2_{_safe_output_id(_id)}_vid_{safe_start_time}@{safe_end_time}.mp4"
    output_path = os.path.join(output_dir, output_filename)

    # 构建FFmpeg命令 - 保持原声
@ -782,28 +858,34 @@ def clip_video_unified(
        script_list: List[Dict],
        tts_results: List[Dict],
        output_dir: Optional[str] = None,
-        task_id: Optional[str] = None
+        task_id: Optional[str] = None,
+        video_origin_paths: Optional[List[str]] = None
 ) -> Dict[str, str]:
    """
    基于OST类型的统一视频裁剪策略 - 消除双重裁剪问题

    Args:
-        video_origin_path: 原始视频的路径
+        video_origin_path: 原始视频的路径；旧脚本或无 video_id 片段默认使用该视频
        script_list: 完整的脚本列表，包含所有片段信息
        tts_results: TTS结果列表，仅包含OST=0和OST=2的片段
        output_dir: 输出目录路径，默认为None时会自动生成
        task_id: 任务ID，用于生成唯一的输出目录，默认为None时会自动生成
+        video_origin_paths: 多个原始视频路径，脚本片段可用 video_id/video_name 指定来源

    Returns:
        Dict[str, str]: 片段ID到裁剪后视频路径的映射
    """
-    # 检查视频文件是否存在
-    if not os.path.exists(video_origin_path):
-        raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
+    video_source_paths = _normalize_video_origin_paths(video_origin_path, video_origin_paths)
+    if not video_source_paths:
+        raise FileNotFoundError("视频文件不存在: 未提供原始视频路径")
+
+    missing_video_paths = [item for item in video_source_paths if not os.path.exists(item)]
+    if missing_video_paths:
+        raise FileNotFoundError(f"视频文件不存在: {', '.join(missing_video_paths)}")

    # 如果未提供task_id，则根据输入生成一个唯一ID
    if task_id is None:
-        content_for_hash = f"{video_origin_path}_{json.dumps(script_list)}"
+        content_for_hash = f"{json.dumps(video_source_paths, ensure_ascii=False)}_{json.dumps(script_list, ensure_ascii=False)}"
        task_id = hashlib.md5(content_for_hash.encode()).hexdigest()

    # 设置输出目录
@ -840,29 +922,33 @@ def clip_video_unified(
    failed_clips = []
    success_count = 0

-    logger.info(f"📹 开始统一视频裁剪，总共{total_clips}个片段")
+    logger.info(f"📹 开始统一视频裁剪，总共{total_clips}个片段，源视频{len(video_source_paths)}个")

    for i, script_item in enumerate(script_list, 1):
        _id = script_item.get("_id")
        ost = script_item.get("OST", 0)
        timestamp = script_item["timestamp"]
+        source_video_path = _resolve_script_video_path(script_item, video_source_paths)

-        logger.info(f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, 时间戳:{timestamp}")
+        logger.info(
+            f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, "
+            f"视频:{os.path.basename(source_video_path)}, 时间戳:{timestamp}"
+        )

        try:
            if ost == 0:  # 纯解说片段
                output_path = _process_narration_only_segment(
-                    video_origin_path, script_item, tts_map, output_dir,
+                    source_video_path, script_item, tts_map, output_dir,
                    encoder_config, hwaccel_args
                )
            elif ost == 1:  # 纯原声片段
                output_path = _process_original_audio_segment(
-                    video_origin_path, script_item, output_dir,
+                    source_video_path, script_item, output_dir,
                    encoder_config, hwaccel_args
                )
            elif ost == 2:  # 解说+原声混合片段
                output_path = _process_mixed_segment(
-                    video_origin_path, script_item, tts_map, output_dir,
+                    source_video_path, script_item, tts_map, output_dir,
                    encoder_config, hwaccel_args
                )
            else:
--- a/app/services/jianying_task.py
+++ b/app/services/jianying_task.py
@ -107,7 +107,7 @@ def _clamp_duration_to_media(


 def _normalize_indextts_reference_audio(params: VideoClipParams) -> None:
-    """Ensure IndexTTS engines use the configured reference audio instead of a stale UI voice."""
+    """Ensure local clone TTS engines use configured reference audio instead of a stale UI voice."""
    params.tts_engine = config.normalize_tts_engine_name(params.tts_engine)
    if params.tts_engine == config.INDEXTTS_ENGINE:
        tts_config = config.indextts
@ -117,6 +117,12 @@ def _normalize_indextts_reference_audio(params: VideoClipParams) -> None:
        tts_config = config.indextts2
        voice_prefix = config.INDEXTTS2_VOICE_PREFIX
        display_name = "IndexTTS-2"
+    elif params.tts_engine == config.OMNIVOICE_ENGINE:
+        tts_config = config.omnivoice
+        if tts_config.get("mode", "auto") != "voice_clone":
+            return
+        voice_prefix = config.OMNIVOICE_VOICE_PREFIX
+        display_name = "OmniVoice"
    else:
        return

@ -199,6 +205,7 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
    logger.info("\n\n## 3. 统一视频裁剪（基于OST类型）")
    video_clip_result = clip_video.clip_video_unified(
        video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
        script_list=list_script,
        tts_results=tts_results
    )
--- a/app/services/llm/unified_service.py
+++ b/app/services/llm/unified_service.py
@ -12,6 +12,7 @@ from loguru import logger
 from .manager import LLMServiceManager
 from .validators import OutputValidator
 from .exceptions import LLMServiceError
+from app.services.prompts import PromptManager

 # 提供商注册由 webui.py:main() 显式调用（见 LLM 提供商注册机制重构）
 # 这样更可靠，错误也更容易调试
@ -181,12 +182,20 @@ class UnifiedLLMService:
            LLMServiceError: 服务调用失败时抛出
        """
        try:
-            # 构建分析提示词
-            system_prompt = "你是一位专业的剧本分析师和剧情概括助手。请仔细分析字幕内容，提取关键剧情信息。"
+            prompt = PromptManager.get_prompt(
+                category="short_drama_narration",
+                name="plot_analysis",
+                parameters={"subtitle_content": subtitle_content},
+            )
+            prompt_object = PromptManager.get_prompt_object(
+                category="short_drama_narration",
+                name="plot_analysis",
+            )
+            system_prompt = prompt_object.get_system_prompt()
            
            # 生成分析结果
            result = await UnifiedLLMService.generate_text(
-                prompt=subtitle_content,
+                prompt=prompt,
                system_prompt=system_prompt,
                provider=provider,
                temperature=temperature,
--- a/app/services/llm/validators.py
+++ b/app/services/llm/validators.py
@ -113,6 +113,8 @@ class OutputValidator:
                            "required": ["_id", "timestamp", "picture", "narration"],
                            "properties": {
                                "_id": {"type": "number"},
+                                "video_id": {"type": "number"},
+                                "video_name": {"type": "string"},
                                "timestamp": {"type": "string"},
                                "picture": {"type": "string"},
                                "narration": {"type": "string"},
@ -161,6 +163,16 @@ class OutputValidator:
        item_id = item.get("_id")
        if not isinstance(item_id, (int, float)) or item_id <= 0:
            raise ValidationError(f"第{index+1}项ID必须为正整数: {item_id}", "invalid_id")
+
+        video_id = item.get("video_id")
+        if video_id not in (None, "") and (
+            not isinstance(video_id, (int, float)) or video_id <= 0
+        ):
+            raise ValidationError(f"第{index+1}项video_id必须为正整数: {video_id}", "invalid_video_id")
+
+        video_name = item.get("video_name")
+        if video_name not in (None, "") and not isinstance(video_name, str):
+            raise ValidationError(f"第{index+1}项video_name必须为字符串: {video_name}", "invalid_video_name")
    
    @staticmethod
    def validate_subtitle_analysis(output: str) -> str:
--- a/app/services/prompts/short_drama_narration/plot_analysis.py
+++ b/app/services/prompts/short_drama_narration/plot_analysis.py
@ -19,72 +19,79 @@ class PlotAnalysisPrompt(TextPrompt):
        metadata = PromptMetadata(
            name="plot_analysis",
            category="short_drama_narration",
-            version="v1.0",
-            description="分析短剧字幕内容，提供详细的剧情分析和分段解析",
+            version="v1.1",
+            description="结合字幕和可选联网检索上下文，输出适合短剧解说脚本生成的结构化剧情理解",
            model_type=ModelType.TEXT,
            output_format=OutputFormat.TEXT,
-            tags=["短剧", "剧情分析", "字幕解析", "分段分析"],
+            tags=["短剧", "剧情分析", "字幕解析", "分段分析", "联网检索", "解说脚本素材"],
            parameters=["subtitle_content"]
        )
        super().__init__(metadata)
        
-        self._system_prompt = "你是一位专业的剧本分析师和剧情概括助手。"
+        self._system_prompt = "你是一位专业的短剧解说策划和剧本分析师。请输出克制、结构化、可直接供下游解说脚本生成使用的剧情理解材料。"
        
    def get_template(self) -> str:
        return """# 角色
-你是一位专业的剧本分析师和剧情概括助手。
+你是一位专业的短剧解说策划和剧本分析师。你的输出不是给观众看的成片文案，而是给下游“短剧解说脚本生成器”使用的结构化剧情理解材料。

-# 任务
-我将为你提供一部短剧的完整字幕文本。请你基于这些字幕，完成以下任务：
-1.  **整体剧情分析**：简要概括整个短剧的核心剧情脉络、主要冲突和结局（如果有的话）。
-2.  **分段剧情解析与时间戳定位**：
-    *   将整个短剧划分为若干个关键的剧情段落（例如：开端、发展、转折、高潮、结局，或根据具体情节自然划分）。
-    *   段落数应该与字幕长度成正比。
-    *   对于每一个剧情段落：
-        *   **概括该段落的主要内容**：用简洁的语言描述这段剧情发生了什么。
-        *   **标注对应的时间戳范围**：明确指出该剧情段落对应的开始字幕时间戳和结束字幕时间戳。请直接从字幕中提取时间信息。
+# 输入说明
+下面的输入可能只包含一个视频的原始字幕，也可能包含多个视频文件的字幕；也可能同时包含 Tavily 联网检索结果和原始字幕。
+- 联网检索结果只能用于辅助识别短剧名称、人物关系、时代背景、公开剧情梗概。
+- 原始字幕是唯一可信的当前片段事实来源。
+- 如果联网检索结果与字幕冲突，必须以字幕为准。
+- 如果联网检索结果包含当前字幕尚未出现的后续剧情，只能放在“字幕未覆盖/需谨慎信息”中，不能写进当前剧情事实。
+- 多个视频字幕会以“视频 1: 文件名”“视频 2: 文件名”等标题分隔。时间戳均为对应视频内部时间，不是拼接后的累计时间。

-# 输入格式
-字幕内容通常包含时间戳和对话，例如：
-```
-00:00:05,000 --> 00:00:10,000
-[角色A]: 你好吗？
-00:00:10,500 --> 00:00:15,000
-[角色B]: 我很好，谢谢。发生了一些有趣的事情。
-... (更多字幕内容) ...
-```
-我将把实际字幕粘贴在下方。
+# 核心任务
+请基于输入完成剧情理解，目标是帮助后续生成高质量短剧解说脚本：
+1. 识别短剧名称、当前字幕范围、视频来源、联网检索辅助信息和字幕事实边界。
+2. 统一人物称呼，避免同一人物出现多个名字写法。
+3. 用 100-180 字概括当前字幕覆盖的剧情，不提前剧透字幕未出现的内容。
+4. 按视频来源和字幕时间顺序拆分关键剧情段落，并为每段标注准确 video_id / video_name / 时间戳。
+5. 提炼解说创作可用的钩子、冲突、爽点/泪点/悬念点和建议保留原声片段。

-# 输出格式要求
-请按照以下格式清晰地呈现分析结果：
+# 强制输出规则
+1. 禁止输出寒暄、解释身份或“好的，我将……”等聊天式开场。
+2. 禁止编造字幕中没有的具体事件、对白、关系进展或结局。
+3. 时间戳必须直接来自对应视频字幕；无法确定时写“字幕未明确”，不要猜测。
+4. 多视频场景下必须明确每段来自哪个视频文件，禁止把不同视频的同名时间戳混在一起。
+5. 人名必须统一：优先采用联网检索中的正式名称；如果字幕写法不同，在人物表中保留“字幕称呼”。
+6. 内容要简洁、客观、可复用，避免散文化长段落。
+7. 必须严格按照下面的 Markdown 格式输出，不要添加额外章节。

-**一、整体剧情概括：**
-[此处填写对整个短剧剧情的概括]
+# 输出格式
+## 一、基础识别
+- 短剧名称：[如输入可判断则填写，否则写“未知”]
+- 当前字幕范围：[开始时间戳] --> [结束时间戳]；无法确定则写“字幕未明确”
+- 视频来源：[列出视频编号、文件名和各自字幕时间范围；单视频也要写]
+- 联网检索确认：[仅写可辅助理解的公开信息；没有联网结果则写“未启用/未提供”]
+- 字幕内实际出现：[列出当前字幕真实出现的关键事实，2-4 条]
+- 字幕未覆盖/需谨慎信息：[列出联网结果提到但当前字幕未发生的内容；没有则写“无”]

-**二、分段剧情解析：**
+## 二、人物与关系
+| 统一称呼 | 字幕称呼 | 身份/关系 | 当前剧情作用 | 确定性 |
+|---|---|---|---|---|
+| [人物名] | [字幕原文称呼] | [身份或关系] | [在当前片段中的作用] | 字幕明确/联网辅助/合理推断 |

-**剧情段落 1：[段落主题/概括，例如：主角登场与背景介绍]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 三、整体剧情概括
+[100-180 字，只概括当前字幕覆盖的剧情。必须包含核心冲突、人物动机和当前悬念。]

-**剧情段落 2：[段落主题/概括，例如：第一个冲突出现]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 四、分段剧情解析
+| 视频 | 时间戳 | 段落主题 | 剧情事件 | 情绪/冲突功能 |
+|---|---|---|---|---|
+| [video_id + video_name] | [开始] --> [结束] | [简短主题] | [当前段落发生了什么] | [铺垫/冲突升级/人物塑造/反转/悬念/情绪爆发等] |

-... (根据实际剧情段落数量继续) ...
+## 五、解说创作重点
+- 开场钩子：[用一句话指出最适合开场抓人的冲突或疑问]
+- 核心冲突：[当前片段最主要的矛盾]
+- 爽点/泪点/情绪点：[列 1-3 条，没有则写“无明显”]
+- 悬念点：[当前片段留下的疑问或后续期待]
+- 建议保留原声片段：
+  1. [video_id + video_name + 时间戳]：[保留理由；如果没有合适原声，写“无明显”]

-**剧情段落 N：[段落主题/概括，例如：结局与反思]**
-*   **时间戳：** [开始时间戳] --> [结束时间戳]
-*   **内容概要：** [对这段剧情的详细描述]
+## 六、联网信息校验
+- 可用于辅助理解的信息：[联网结果中可帮助理解当前字幕的信息；没有则写“无”]
+- 与字幕不一致或字幕未覆盖的信息：[必须列出，不要混入当前剧情事实；没有则写“无”]

-# 注意事项
-*   请确保时间戳的准确性，直接引用字幕中的时间。
-*   剧情段落的划分应合乎逻辑，能够反映剧情的起承转合。
-*   语言表达应简洁、准确、客观。
-
-# 限制
-1. 严禁输出与分析结果无关的内容
-2. 时间戳必须严格按照字幕中的实际时间
-
-# 请处理以下字幕：
+# 输入内容
 ${subtitle_content}"""
--- a/app/services/prompts/short_drama_narration/script_generation.py
+++ b/app/services/prompts/short_drama_narration/script_generation.py
@ -43,11 +43,14 @@ class ScriptGenerationPrompt(ParameterizedPrompt):
 ${plot_analysis}
 </plot>

-### 原始字幕（含精确时间戳）
+### 原始字幕（含视频编号和精确时间戳）
 <subtitles>
 ${subtitle_content}
 </subtitles>

+字幕可能来自多个视频文件。每个字幕分段标题会以“视频 1: 文件名”“视频 2: 文件名”等形式标识来源。
+生成脚本时必须把每个片段绑定到对应视频来源，时间戳表示该视频文件内部的局部时间，不是把多个视频拼接后的全局时间。
+
 ## 短剧解说创作核心要素

 ### 1. 黄金开场（3秒法则）
@ -137,11 +140,18 @@ ${subtitle_content}

 ### 时间戳管理（绝对不能违反）
 - **时间戳绝对不能重叠**，确保剪辑后无重复画面
- **时间段必须连续且不交叉**，严格按时间顺序排列
- **每个时间戳都必须在原始字幕中找到对应范围**
+- **同一个 video_id 内的时间段必须连续且不交叉**，严格按该视频内时间顺序排列
+- **跨视频可以切换 video_id**，但每个时间戳都必须来自对应视频字幕分段
+- **每个时间戳都必须在对应视频的原始字幕中找到对应范围**
 - 可以拆分原时间片段，但必须保持时间连续性
 - 时间戳的格式必须与原始字幕中的格式完全一致

+### 多视频来源规范（多集/多文件必须遵守）
+- **video_id**：必须填写，取字幕分段标题里的视频编号，例如“视频 3”就填 3
+- **video_name**：必须填写对应的视频文件名，例如“3_20260607002212.mp4”
+- **timestamp**：只填写对应 video_id 内部的时间范围，不要换算成多个视频拼接后的累计时间
+- 如果剧情跨多个视频推进，脚本可以按故事顺序在不同 video_id 之间切换，但不得把视频 2 的时间戳写到 video_id=1
+
 ### 时长控制（1/3原则）
 - **解说视频总长度 = 原视频长度的 1/3**
 - 精确控制节奏和密度，既不能过短也不能过长
@ -159,6 +169,8 @@ ${subtitle_content}
 ```json
 {
  "_id": 序号,
+  "video_id": 视频编号,
+  "video_name": "视频文件名",
  "timestamp": "开始时间-结束时间",
  "picture": "画面内容描述",
  "narration": "播放原片+序号",
@ -242,6 +254,8 @@ ${subtitle_content}
  "items": [
    {
        "_id": 1,
+        "video_id": 1,
+        "video_name": "1.mp4",
        "timestamp": "00:00:01,000-00:00:05,500",
        "picture": "女主角林小雨慌张地道歉，男主角沈墨轩冷漠地看着她",
        "narration": "一个普通女孩的命运即将因为一杯咖啡彻底改变！她撞到的这个男人，竟然是...",
@ -249,6 +263,8 @@ ${subtitle_content}
    },
    {
        "_id": 2,
+        "video_id": 1,
+        "video_name": "1.mp4",
        "timestamp": "00:00:05,500-00:00:08,000",
        "picture": "沈墨轩质问林小雨，语气冷厉威严",
        "narration": "播放原片2",
@ -256,6 +272,8 @@ ${subtitle_content}
    },
    {
        "_id": 3,
+        "video_id": 2,
+        "video_name": "2.mp4",
        "timestamp": "00:00:08,000-00:00:12,000",
        "picture": "林小雨惊慌失措，沈墨轩眼中闪过一丝兴趣",
        "narration": "霸道总裁的经典开场！一杯咖啡引发的爱情故事就这样开始了...",
@ -281,6 +299,7 @@ ${subtitle_content}
 - **原声片段标识**：OST=1表示原声，OST=0表示解说
 - **原声格式规范**：narration字段必须使用"播放原片+序号"格式
 - **关键情绪点**：必须保留原片原声，增强观众代入感
+- **视频来源**：每个片段必须包含 video_id 和 video_name，用于定位多个上传视频中的源文件
 - **时间戳精度**：精确到毫秒级别，确保与字幕完美匹配
 - **逻辑连贯性**：严格遵循剧情发展顺序

--- a/app/services/task.py
+++ b/app/services/task.py
@ -225,6 +225,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
    # 使用新的统一裁剪策略
    video_clip_result = clip_video.clip_video_unified(
        video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
        script_list=list_script,
        tts_results=tts_results
    )
@ -477,6 +478,7 @@ def start_subclip_unified(task_id: str, params: VideoClipParams):
    # 使用新的统一裁剪策略
    video_clip_result = clip_video.clip_video_unified(
        video_origin_path=params.video_origin_path,
+        video_origin_paths=getattr(params, "video_origin_paths", []),
        script_list=list_script,
        tts_results=tts_results
    )
--- a/app/services/tavily_search.py
+++ b/app/services/tavily_search.py
@ -0,0 +1,116 @@
+"""Tavily-powered web search helpers for plot analysis."""
+
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import requests
+from loguru import logger
+
+
+TAVILY_API_BASE_URL = "https://api.tavily.com"
+DEFAULT_SEARCH_DEPTH = "basic"
+DEFAULT_MAX_RESULTS = 5
+DEFAULT_TIMEOUT = 20
+
+
+class TavilySearchError(RuntimeError):
+    """Raised when Tavily search cannot be completed."""
+
+
+def _trim_text(value: Any, max_chars: int) -> str:
+    text = str(value or "").strip()
+    if len(text) <= max_chars:
+        return text
+    return f"{text[:max_chars].rstrip()}..."
+
+
+def search_short_drama(
+    short_name: str,
+    api_key: str | None = None,
+    *,
+    search_depth: str = DEFAULT_SEARCH_DEPTH,
+    max_results: int = DEFAULT_MAX_RESULTS,
+    timeout: int = DEFAULT_TIMEOUT,
+) -> dict[str, Any]:
+    """Search web context for a short drama name with Tavily."""
+    short_name = str(short_name or "").strip()
+    if not short_name:
+        raise TavilySearchError("短剧名称不能为空")
+
+    api_key = (api_key or os.getenv("TAVILY_API_KEY") or "").strip()
+    if not api_key:
+        raise TavilySearchError("Tavily API Key 未配置")
+
+    query = f"{short_name} 短剧 剧情 介绍 人物 结局"
+    payload = {
+        "query": query,
+        "search_depth": search_depth or DEFAULT_SEARCH_DEPTH,
+        "topic": "general",
+        "max_results": max(1, min(int(max_results or DEFAULT_MAX_RESULTS), 10)),
+        "include_answer": True,
+        "include_raw_content": False,
+        "include_images": False,
+    }
+
+    try:
+        response = requests.post(
+            f"{TAVILY_API_BASE_URL}/search",
+            headers={
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            },
+            json=payload,
+            timeout=timeout,
+        )
+    except requests.RequestException as exc:
+        raise TavilySearchError(f"Tavily 请求失败: {exc}") from exc
+
+    if response.status_code >= 400:
+        message = _trim_text(response.text, 500)
+        raise TavilySearchError(f"Tavily 请求失败: HTTP {response.status_code} {message}")
+
+    try:
+        data = response.json()
+    except ValueError as exc:
+        raise TavilySearchError("Tavily 返回内容不是有效 JSON") from exc
+
+    logger.info(
+        "Tavily 短剧检索完成: query={}, results={}",
+        query,
+        len(data.get("results") or []),
+    )
+    return data
+
+
+def format_search_context(search_data: dict[str, Any], *, max_chars: int = 6000) -> str:
+    """Format Tavily response into compact LLM context."""
+    if not search_data:
+        return ""
+
+    lines = [
+        "# Tavily 联网检索结果",
+        f"检索 query: {search_data.get('query', '')}",
+    ]
+
+    answer = _trim_text(search_data.get("answer"), 1200)
+    if answer:
+        lines.extend(["", "## 综合回答", answer])
+
+    results = search_data.get("results") or []
+    if results:
+        lines.extend(["", "## 搜索来源"])
+    for index, result in enumerate(results, start=1):
+        title = _trim_text(result.get("title"), 120)
+        url = _trim_text(result.get("url"), 240)
+        content = _trim_text(result.get("content") or result.get("raw_content"), 700)
+        lines.extend(
+            [
+                f"{index}. 标题: {title}",
+                f"   来源: {url}",
+                f"   摘要: {content}",
+            ]
+        )
+
+    return _trim_text("\n".join(lines).strip(), max_chars)
--- a/app/services/test_jianying_task_unittest.py
+++ b/app/services/test_jianying_task_unittest.py
@ -51,6 +51,23 @@ class JianyingTaskTests(unittest.TestCase):

            self.assertEqual(f"indextts2:{ref_path}", params.voice_name)

+    def test_normalize_omnivoice_clone_uses_valid_param_reference(self):
+        with tempfile.NamedTemporaryFile(suffix=".wav") as ref:
+            params = VideoClipParams(tts_engine="omnivoice", voice_name=f"omnivoice:{ref.name}")
+
+            with patch.dict(jianying_task.config.omnivoice, {"mode": "voice_clone"}, clear=False):
+                jianying_task._normalize_indextts_reference_audio(params)
+
+            self.assertEqual(f"omnivoice:{ref.name}", params.voice_name)
+
+    def test_normalize_omnivoice_auto_does_not_require_reference(self):
+        params = VideoClipParams(tts_engine="omnivoice", voice_name="omnivoice:auto")
+
+        with patch.dict(jianying_task.config.omnivoice, {"mode": "auto", "reference_audio": ""}, clear=False):
+            jianying_task._normalize_indextts_reference_audio(params)
+
+        self.assertEqual("omnivoice:auto", params.voice_name)
+
    def test_normalize_indextts_requires_existing_reference_audio(self):
        params = VideoClipParams(tts_engine="indextts", voice_name="zh-CN-YunjianNeural")

--- a/app/services/test_multi_video_script_sources_unittest.py
+++ b/app/services/test_multi_video_script_sources_unittest.py
@ -0,0 +1,84 @@
+import json
+import os
+import tempfile
+import unittest
+from unittest import mock
+
+from app.services import clip_video
+from app.utils import check_script
+
+
+class TestMultiVideoScriptSources(unittest.TestCase):
+    def test_check_format_accepts_optional_video_source_fields(self):
+        script = [
+            {
+                "_id": 1,
+                "video_id": 2,
+                "video_name": "2.mp4",
+                "timestamp": "00:00:00,000-00:00:03,000",
+                "picture": "画面",
+                "narration": "解说",
+                "OST": 0,
+            }
+        ]
+
+        result = check_script.check_format(json.dumps(script, ensure_ascii=False))
+
+        self.assertTrue(result["success"])
+
+    def test_clip_video_unified_resolves_source_by_video_id_and_name(self):
+        with tempfile.TemporaryDirectory() as temp_dir:
+            video_1 = os.path.join(temp_dir, "1.mp4")
+            video_2 = os.path.join(temp_dir, "2.mp4")
+            for video_path in [video_1, video_2]:
+                with open(video_path, "wb") as file:
+                    file.write(b"video")
+
+            output_dir = os.path.join(temp_dir, "clips")
+            used_sources = []
+
+            def fake_process(source_video_path, script_item, output_dir_arg, *_args):
+                used_sources.append(source_video_path)
+                output_path = os.path.join(output_dir_arg, f"{script_item['_id']}.mp4")
+                with open(output_path, "wb") as file:
+                    file.write(b"clip")
+                return output_path
+
+            script_list = [
+                {
+                    "_id": 1,
+                    "video_id": 2,
+                    "timestamp": "00:00:00,000-00:00:03,000",
+                    "picture": "视频2画面",
+                    "narration": "播放原片1",
+                    "OST": 1,
+                },
+                {
+                    "_id": 2,
+                    "video_name": "1.mp4",
+                    "timestamp": "00:00:03,000-00:00:06,000",
+                    "picture": "视频1画面",
+                    "narration": "播放原片2",
+                    "OST": 1,
+                },
+            ]
+
+            with (
+                mock.patch.object(clip_video, "check_hardware_acceleration", return_value=None),
+                mock.patch.object(clip_video, "_process_original_audio_segment", side_effect=fake_process),
+            ):
+                result = clip_video.clip_video_unified(
+                    video_origin_path=video_1,
+                    video_origin_paths=[video_1, video_2],
+                    script_list=script_list,
+                    tts_results=[],
+                    output_dir=output_dir,
+                    task_id="multi-video-test",
+                )
+
+            self.assertEqual([video_2, video_1], used_sources)
+            self.assertEqual({1, 2}, set(result.keys()))
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/app/services/voice.py
+++ b/app/services/voice.py
@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import os
 import re
 import json
@ -1298,6 +1300,10 @@ def tts(
    if tts_engine == config.INDEXTTS2_ENGINE:
        logger.info("分发到 IndexTTS-2")
        return indextts2_tts(text, voice_name, voice_file)
+
+    if tts_engine == config.OMNIVOICE_ENGINE:
+        logger.info("分发到 OmniVoice")
+        return omnivoice_tts(text, voice_name, voice_file, speed=voice_rate)
    
    if tts_engine == "doubaotts":
        logger.info("分发到豆包语音 TTS")
@ -1783,7 +1789,11 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
    voice_name = config.normalize_indextts_voice_prefix(parse_voice_name(voice_name))
    output_dir = utils.task_dir(task_id)
    tts_results = []
-    audio_extension = ".wav" if tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE) else ".mp3"
+    audio_extension = ".wav" if tts_engine in (
+        config.INDEXTTS_ENGINE,
+        config.INDEXTTS2_ENGINE,
+        config.OMNIVOICE_ENGINE,
+    ) else ".mp3"

    for item in list_script:
        if item['OST'] != 1:
@ -1809,11 +1819,11 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
                             f"或者使用其他 tts 引擎")
                continue
            else:
-                # SoulVoice、Qwen3、IndexTTS、豆包语音 引擎不生成精确字幕文件
+                # SoulVoice、Qwen3、IndexTTS、OmniVoice、豆包语音 引擎不生成精确字幕文件
                if (
                    is_soulvoice_voice(voice_name)
                    or is_qwen_engine(tts_engine)
-                    or tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE)
+                    or tts_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE, config.OMNIVOICE_ENGINE)
                    or tts_engine == "doubaotts"
                ):
                    # 获取实际音频文件的时长
@ -2256,6 +2266,17 @@ def parse_indextts2_voice(voice_name: str) -> str:
    return voice_name


+def parse_omnivoice_voice(voice_name: str) -> str:
+    """
+    解析 OmniVoice 语音名称
+    支持格式：omnivoice:reference_audio_path
+    返回参考音频文件路径或模式名
+    """
+    if isinstance(voice_name, str) and voice_name.startswith(config.OMNIVOICE_VOICE_PREFIX):
+        return voice_name[len(config.OMNIVOICE_VOICE_PREFIX):]
+    return voice_name
+
+
 def indextts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
    """
    使用 IndexTTS-1.5 API 进行零样本语音克隆
@ -2493,3 +2514,141 @@ def indextts2_tts(text: str, voice_name: str, voice_file: str) -> Union[SubMaker

    logger.error("IndexTTS-2 TTS 生成失败，已达到最大重试次数")
    return None
+
+
+def _normalize_omnivoice_api_url(api_url: str) -> str:
+    api_url = (api_url or "http://127.0.0.1:7866/tts").strip()
+    if api_url.endswith("/tts"):
+        return api_url
+    if api_url.endswith("/tts/json"):
+        return f"{api_url[:-len('/tts/json')]}/tts"
+    return f"{api_url.rstrip('/')}/tts"
+
+
+def _download_omnivoice_audio(response: requests.Response, api_url: str, voice_file: str, proxies: dict) -> bool:
+    content_type = response.headers.get("content-type", "").lower()
+    if "application/json" not in content_type:
+        with open(voice_file, "wb") as f:
+            f.write(response.content)
+        return os.path.getsize(voice_file) > 0
+
+    result = response.json()
+    audio_url = result.get("audio_url") if isinstance(result, dict) else ""
+    if not audio_url:
+        logger.error(f"OmniVoice API 响应中没有音频下载地址: {result}")
+        return False
+
+    audio_response = requests.get(urljoin(api_url, audio_url), proxies=proxies, timeout=180)
+    if audio_response.status_code != 200:
+        logger.error(f"OmniVoice 音频下载失败: {audio_response.status_code} - {audio_response.text}")
+        return False
+
+    with open(voice_file, "wb") as f:
+        f.write(audio_response.content)
+    return os.path.getsize(voice_file) > 0
+
+
+def _optional_omnivoice_generation_data(voice_speed: float) -> dict:
+    omnivoice_config = getattr(config, "omnivoice", {}) or {}
+    data = {
+        "speed": voice_speed or omnivoice_config.get("speed", 1.0),
+    }
+
+    optional_fields = {
+        "num_step": omnivoice_config.get("num_step"),
+        "guidance_scale": omnivoice_config.get("guidance_scale"),
+        "duration": omnivoice_config.get("duration"),
+    }
+    for key, value in optional_fields.items():
+        if value not in (None, ""):
+            data[key] = value
+
+    for key in ("denoise", "postprocess_output", "preprocess_prompt"):
+        if key in omnivoice_config:
+            data[key] = str(bool(omnivoice_config.get(key))).lower()
+
+    return data
+
+
+def omnivoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]:
+    """
+    使用 OmniVoice-Pack FastAPI 服务进行语音合成。
+    支持自动音色、指令音色和参考音频克隆三种模式。
+    """
+    omnivoice_config = getattr(config, "omnivoice", {}) or {}
+    api_url = _normalize_omnivoice_api_url(omnivoice_config.get("api_url", "http://127.0.0.1:7866/tts"))
+    mode = omnivoice_config.get("mode", "auto")
+    language = (omnivoice_config.get("language", "zh") or "").strip()
+    instruct = (omnivoice_config.get("instruct", "") or "").strip()
+    ref_text = (omnivoice_config.get("ref_text", "") or "").strip()
+    parsed_voice = parse_omnivoice_voice(voice_name)
+    if mode != "voice_clone" and parsed_voice and os.path.isfile(parsed_voice):
+        mode = "voice_clone"
+
+    reference_audio_path = ""
+    if mode == "voice_clone":
+        candidate = parsed_voice
+        if candidate and os.path.isfile(candidate):
+            reference_audio_path = candidate
+        else:
+            reference_audio_path = parse_omnivoice_voice(omnivoice_config.get("reference_audio", "") or "")
+
+        if not reference_audio_path or not os.path.exists(reference_audio_path):
+            logger.error(f"OmniVoice 参考音频文件不存在: {reference_audio_path}")
+            return None
+    elif mode != "voice_design":
+        instruct = ""
+
+    data = {
+        "text": text.strip(),
+        "language": language,
+        **_optional_omnivoice_generation_data(speed),
+    }
+    if mode == "voice_design" and instruct:
+        data["instruct"] = instruct
+    if mode == "voice_clone" and ref_text:
+        data["ref_text"] = ref_text
+
+    proxies = _get_configured_proxies()
+    for attempt in range(3):
+        files = {}
+        try:
+            if reference_audio_path:
+                files["ref_audio"] = open(reference_audio_path, "rb")
+
+            logger.info(f"第 {attempt + 1} 次调用 OmniVoice API: {api_url}, mode={mode}")
+            response = requests.post(
+                api_url,
+                files=files or None,
+                data=data,
+                proxies=proxies,
+                timeout=240,
+            )
+
+            if response.status_code == 200 and _download_omnivoice_audio(response, api_url, voice_file, proxies):
+                logger.info(f"OmniVoice 成功生成音频: {voice_file}, 大小: {os.path.getsize(voice_file)} 字节")
+                sub_maker = new_sub_maker()
+                duration = get_audio_duration_from_file(voice_file)
+                duration_ms = int(duration * 1000) if duration > 0 else max(1000, int(len(text) * 200))
+                add_subtitle_event(sub_maker, 0, duration_ms * 10000, text)
+                return sub_maker
+
+            logger.error(f"OmniVoice API 调用失败: {response.status_code} - {response.text}")
+        except requests.exceptions.Timeout:
+            logger.error(f"OmniVoice API 调用超时 (尝试 {attempt + 1}/3)")
+        except requests.exceptions.RequestException as e:
+            logger.error(f"OmniVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)")
+        except Exception as e:
+            logger.error(f"OmniVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)")
+        finally:
+            for file_obj in files.values():
+                try:
+                    file_obj.close()
+                except Exception:
+                    pass
+
+        if attempt < 2:
+            time.sleep(2)
+
+    logger.error("OmniVoice TTS 生成失败，已达到最大重试次数")
+    return None
--- a/app/utils/check_script.py
+++ b/app/utils/check_script.py
@ -57,6 +57,23 @@ def check_format(script_content: str) -> Dict[str, Any]:
                    'details': f'当前值: {clip["_id"]} (类型: {type(clip["_id"]).__name__})'
                }

+            # 验证可选视频来源字段。旧脚本可以不包含，新脚本用于多视频定位。
+            if 'video_id' in clip and clip['video_id'] not in ("", None):
+                if not isinstance(clip['video_id'], int) or clip['video_id'] <= 0:
+                    return {
+                        'success': False,
+                        'message': f'第{i+1}个片段的video_id必须是正整数',
+                        'details': f'当前值: {clip["video_id"]} (类型: {type(clip["video_id"]).__name__})'
+                    }
+
+            if 'video_name' in clip and clip['video_name'] not in ("", None):
+                if not isinstance(clip['video_name'], str):
+                    return {
+                        'success': False,
+                        'message': f'第{i+1}个片段的video_name必须是字符串',
+                        'details': f'当前值: {clip["video_name"]} (类型: {type(clip["video_name"]).__name__})'
+                    }
+
            # 验证 timestamp 字段格式
            timestamp_pattern = r'^\d{2}:\d{2}:\d{2},\d{3}-\d{2}:\d{2}:\d{2},\d{3}$'
            if not isinstance(clip['timestamp'], str) or not re.match(timestamp_pattern, clip['timestamp']):
--- a/config.example.toml
+++ b/config.example.toml
@ -49,6 +49,12 @@
    text_openai_max_tokens = 65536
    text_openai_thinking_level = "auto"  # auto/off/low/medium/high

+    # ===== Tavily 联网搜索配置 =====
+    # 用于短剧剧情理解前，按短剧名称检索公开剧情/人物/分集信息
+    tavily_api_key = ""  # 获取地址：https://app.tavily.com
+    tavily_search_depth = "basic"  # basic / advanced / fast / ultra-fast
+    tavily_max_results = 5
+
    # ===== API Keys 参考 =====
    # 主流 LLM Providers API Key 获取地址：
    #
@ -171,6 +177,30 @@
    repetition_penalty = 10.0
    max_mel_tokens = 1500

+[omnivoice]
+    # OmniVoice-Pack 语音合成配置
+    # 支持 OmniVoice-Pack FastAPI 接口：POST /tts
+    api_url = "http://127.0.0.1:7866/tts"
+    language = "zh"
+
+    # 生成模式：auto / voice_design / voice_clone
+    mode = "auto"
+    instruct = ""
+
+    # voice_clone 模式下使用，音色列表复用 IndexTTS-1.5 的资源目录
+    reference_audio_source = "resource"
+    reference_audio = ""
+    ref_text = ""
+
+    # 高级生成参数
+    num_step = 32
+    guidance_scale = 2.0
+    speed = 1.0
+    duration = ""
+    denoise = true
+    postprocess_output = true
+    preprocess_prompt = true
+
 [doubaotts]
    # 豆包语音 TTS 配置
    # 申请流程：
@ -189,7 +219,7 @@
    silence_duration = 0.125

 [ui]
-    # TTS引擎选择 (indextts, indextts2, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
+    # TTS引擎选择 (indextts, indextts2, omnivoice, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
    tts_engine = "indextts"

    # Edge TTS 配置
--- a/requirements.txt
+++ b/requirements.txt
@ -2,7 +2,7 @@
 requests>=2.32.0
 moviepy==2.1.1
 edge-tts==7.2.7
-streamlit>=1.57.0
+streamlit==1.56.0
 watchdog==6.0.0
 loguru>=0.7.3
 tomli>=2.2.1
--- a/webui.py
+++ b/webui.py
@ -243,6 +243,12 @@ def get_voice_name_for_tts_engine(tts_engine: str) -> str:
        if reference_audio:
            return f"{config.INDEXTTS_VOICE_PREFIX}{reference_audio}"
        return config.ui.get('voice_name', '')
+    if tts_engine == config.OMNIVOICE_ENGINE:
+        mode = config.omnivoice.get('mode', 'auto')
+        reference_audio = config.omnivoice.get('reference_audio', '')
+        if mode == 'voice_clone' and reference_audio:
+            return f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
+        return f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
    if tts_engine == 'doubaotts':
        return config.ui.get('doubaotts_voice_type', 'BV700_streaming')
    if tts_engine == 'soulvoice':
@ -263,6 +269,7 @@ def get_jianying_export_params(draft_name=None) -> VideoClipParams:
    return VideoClipParams(
        video_clip_json_path=st.session_state['video_clip_json_path'],
        video_origin_path=st.session_state['video_origin_path'],
+        video_origin_paths=st.session_state.get('video_origin_paths', []),
        tts_engine=tts_engine,
        voice_name=voice_name,
        voice_rate=voice_rate,
--- a/webui/components/audio_settings.py
+++ b/webui/components/audio_settings.py
@ -40,6 +40,11 @@ BGM_RESOURCE_DIR = "/Users/viccy/Downloads/tts-mp3-clone/bgms-safe"
 BGM_TRACKS_JSON = os.path.join(BGM_RESOURCE_DIR, "tracks.json")
 BGM_UPLOAD_SUBDIR = "uploaded_bgms"
 BGM_AUDIO_EXTENSIONS = (".mp3", ".wav", ".flac", ".m4a", ".aac", ".ogg")
+LOCAL_TTS_ENGINES = {
+    config.INDEXTTS_ENGINE,
+    config.INDEXTTS2_ENGINE,
+    config.OMNIVOICE_ENGINE,
+}


 def get_soulvoice_voices():
@ -55,9 +60,10 @@ def get_soulvoice_voices():

 def get_tts_engine_options(tr=lambda key: key):
    """获取TTS引擎选项"""
-    return {
+    engine_options = {
        config.INDEXTTS_ENGINE: config.INDEXTTS_DISPLAY_NAME,
        config.INDEXTTS2_ENGINE: config.INDEXTTS2_DISPLAY_NAME,
+        config.OMNIVOICE_ENGINE: config.OMNIVOICE_DISPLAY_NAME,
        "edge_tts": "Edge TTS",
        "qwen3_tts": tr("Tongyi Qwen3 TTS"),
        "tencent_tts": tr("Tencent Cloud TTS"),
@ -65,6 +71,25 @@ def get_tts_engine_options(tr=lambda key: key):
        "azure_speech": "Azure Speech Services"
    }

+    return {
+        engine: format_tts_engine_option(engine, display_name, tr)
+        for engine, display_name in engine_options.items()
+    }
+
+
+def get_tts_engine_deployment_label(tts_engine, tr=lambda key: key):
+    """获取TTS引擎部署类型标签"""
+    if tts_engine in LOCAL_TTS_ENGINES:
+        return tr("Local Deployment")
+
+    return tr("Cloud Service")
+
+
+def format_tts_engine_option(tts_engine, display_name, tr=lambda key: key):
+    """格式化TTS引擎下拉显示名"""
+    deployment_label = get_tts_engine_deployment_label(tts_engine, tr)
+    return f"{display_name} [{deployment_label}]"
+

 def get_tts_engine_descriptions(tr=lambda key: key):
    """获取TTS引擎详细描述"""
@ -105,6 +130,12 @@ def get_tts_engine_descriptions(tr=lambda key: key):
            "use_case": tr("IndexTTS2 use case"),
            "registration": None
        },
+        config.OMNIVOICE_ENGINE: {
+            "title": config.OMNIVOICE_DISPLAY_NAME,
+            "features": tr("OmniVoice features"),
+            "use_case": tr("OmniVoice use case"),
+            "registration": None
+        },
        "doubaotts": {
            "title": tr("Doubao TTS"),
            "features": tr("Doubao TTS features"),
@ -546,6 +577,8 @@ def render_tts_settings(tr):
        render_indextts_tts_settings(tr)
    elif selected_engine == config.INDEXTTS2_ENGINE:
        render_indextts2_tts_settings(tr)
+    elif selected_engine == config.OMNIVOICE_ENGINE:
+        render_omnivoice_tts_settings(tr)
    elif selected_engine == "doubaotts":
        render_doubaotts_settings(tr)

@ -1274,6 +1307,148 @@ def render_indextts2_tts_settings(tr):
    st.session_state['voice_pitch'] = 1.0


+def render_omnivoice_tts_settings(tr):
+    """渲染 OmniVoice TTS 设置"""
+    omnivoice_config = config.omnivoice
+
+    api_url = st.text_input(
+        tr("API URL"),
+        value=omnivoice_config.get("api_url", "http://127.0.0.1:7866/tts"),
+        help=tr("OmniVoice API URL Help"),
+    )
+
+    language = st.text_input(
+        tr("OmniVoice Language Code"),
+        value=omnivoice_config.get("language", "zh"),
+        help=tr("OmniVoice Language Code Help"),
+        placeholder="zh",
+    )
+
+    mode_options = [
+        ("auto", tr("OmniVoice Mode Auto")),
+        ("voice_design", tr("OmniVoice Mode Voice Design")),
+        ("voice_clone", tr("OmniVoice Mode Voice Clone")),
+    ]
+    mode_values = [item[0] for item in mode_options]
+    saved_mode = omnivoice_config.get("mode", "auto")
+    if saved_mode not in mode_values:
+        saved_mode = "auto"
+
+    mode = mode_options[st.selectbox(
+        tr("OmniVoice Generation Mode"),
+        options=range(len(mode_options)),
+        index=mode_values.index(saved_mode),
+        format_func=lambda x: mode_options[x][1],
+        help=tr("OmniVoice Generation Mode Help"),
+    )][0]
+
+    instruct = omnivoice_config.get("instruct", "")
+    reference_audio_source = omnivoice_config.get("reference_audio_source", "resource")
+    reference_audio = omnivoice_config.get("reference_audio", "")
+    ref_text = omnivoice_config.get("ref_text", "")
+
+    if mode == "voice_design":
+        instruct = st.text_area(
+            tr("OmniVoice Instruct"),
+            value=instruct,
+            help=tr("OmniVoice Instruct Help"),
+            placeholder=tr("OmniVoice Instruct Placeholder"),
+            height=80,
+        )
+    elif mode == "voice_clone":
+        reference_audio_source, reference_audio = render_indextts_reference_audio_selector(
+            tr,
+            omnivoice_config,
+            "omnivoice",
+        )
+        ref_text = st.text_area(
+            tr("OmniVoice Reference Text"),
+            value=ref_text,
+            help=tr("OmniVoice Reference Text Help"),
+            placeholder=tr("OmniVoice Reference Text Placeholder"),
+            height=90,
+        )
+
+    with st.expander(tr("Advanced Parameters"), expanded=False):
+        col1, col2 = st.columns(2)
+        with col1:
+            num_step = st.slider(
+                "Num Step",
+                min_value=4,
+                max_value=64,
+                value=int(omnivoice_config.get("num_step", 32)),
+                step=1,
+                help=tr("OmniVoice Num Step Help"),
+            )
+            guidance_scale = st.slider(
+                "Guidance Scale",
+                min_value=0.1,
+                max_value=10.0,
+                value=float(omnivoice_config.get("guidance_scale", 2.0)),
+                step=0.1,
+                help=tr("OmniVoice Guidance Scale Help"),
+            )
+            voice_rate = st.slider(
+                tr("Voice Rate"),
+                min_value=0.5,
+                max_value=2.0,
+                value=float(omnivoice_config.get("speed", 1.0)),
+                step=0.1,
+                help=tr("Voice Rate Help 0.5-2.0"),
+            )
+        with col2:
+            saved_duration = omnivoice_config.get("duration", "")
+            duration_value = float(saved_duration) if saved_duration not in (None, "") else 0.0
+            duration = st.number_input(
+                tr("OmniVoice Duration"),
+                min_value=0.0,
+                max_value=120.0,
+                value=duration_value,
+                step=0.5,
+                help=tr("OmniVoice Duration Help"),
+            )
+            denoise = st.checkbox(
+                tr("OmniVoice Denoise"),
+                value=bool(omnivoice_config.get("denoise", True)),
+                help=tr("OmniVoice Denoise Help"),
+            )
+            postprocess_output = st.checkbox(
+                tr("OmniVoice Postprocess Output"),
+                value=bool(omnivoice_config.get("postprocess_output", True)),
+                help=tr("OmniVoice Postprocess Output Help"),
+            )
+            preprocess_prompt = st.checkbox(
+                tr("OmniVoice Preprocess Prompt"),
+                value=bool(omnivoice_config.get("preprocess_prompt", True)),
+                help=tr("OmniVoice Preprocess Prompt Help"),
+            )
+
+    with st.expander(tr("OmniVoice Usage Instructions Title"), expanded=False):
+        st.markdown(tr("OmniVoice Usage Instructions"))
+
+    config.omnivoice["api_url"] = api_url
+    config.omnivoice["language"] = language
+    config.omnivoice["mode"] = mode
+    config.omnivoice["instruct"] = instruct
+    config.omnivoice["reference_audio_source"] = reference_audio_source
+    config.omnivoice["reference_audio"] = reference_audio
+    config.omnivoice["ref_text"] = ref_text
+    config.omnivoice["num_step"] = num_step
+    config.omnivoice["guidance_scale"] = guidance_scale
+    config.omnivoice["speed"] = voice_rate
+    config.omnivoice["duration"] = duration if duration > 0 else ""
+    config.omnivoice["denoise"] = denoise
+    config.omnivoice["postprocess_output"] = postprocess_output
+    config.omnivoice["preprocess_prompt"] = preprocess_prompt
+
+    if mode == "voice_clone" and reference_audio:
+        config.ui["voice_name"] = f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
+    else:
+        config.ui["voice_name"] = f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
+    st.session_state["voice_rate"] = voice_rate
+    st.session_state["voice_pitch"] = 1.0
+
+
 def render_doubaotts_settings(tr):
    """渲染豆包语音 TTS 设置"""
    # AK 输入
@ -1567,6 +1742,15 @@ def render_voice_preview_new(tr, selected_engine):
                voice_name = f"{config.INDEXTTS2_VOICE_PREFIX}{reference_audio}"
            voice_rate = 1.0  # IndexTTS-2 使用自身生成参数
            voice_pitch = 1.0
+        elif selected_engine == config.OMNIVOICE_ENGINE:
+            mode = config.omnivoice.get("mode", "auto")
+            reference_audio = config.omnivoice.get("reference_audio", "")
+            if mode == "voice_clone" and reference_audio:
+                voice_name = f"{config.OMNIVOICE_VOICE_PREFIX}{reference_audio}"
+            else:
+                voice_name = f"{config.OMNIVOICE_VOICE_PREFIX}{mode}"
+            voice_rate = config.omnivoice.get("speed", 1.0)
+            voice_pitch = 1.0
        elif selected_engine == "doubaotts":
            voice_type = config.ui.get("doubaotts_voice_type", "BV700_streaming")
            voice_name = voice_type
@ -1579,7 +1763,11 @@ def render_voice_preview_new(tr, selected_engine):

        with st.spinner(tr("Synthesizing Voice")):
            temp_dir = utils.storage_dir("temp", create=True)
-            audio_format = "audio/wav" if selected_engine in (config.INDEXTTS_ENGINE, config.INDEXTTS2_ENGINE) else "audio/mp3"
+            audio_format = "audio/wav" if selected_engine in (
+                config.INDEXTTS_ENGINE,
+                config.INDEXTTS2_ENGINE,
+                config.OMNIVOICE_ENGINE,
+            ) else "audio/mp3"
            audio_extension = ".wav" if audio_format == "audio/wav" else ".mp3"
            audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}{audio_extension}")

--- a/webui/components/basic_settings.py
+++ b/webui/components/basic_settings.py
@ -260,6 +260,7 @@ def render_basic_settings(tr):
        with left_config_panel:
            render_language_settings(tr)
            render_proxy_settings(tr)
+            render_tavily_search_settings(tr)

        with middle_config_panel:
            render_vision_llm_settings(tr)  # 视觉分析模型设置
@ -345,6 +346,32 @@ def render_proxy_settings(tr):
    config.ui["jianying_draft_path"] = jianying_draft_path


+def render_tavily_search_settings(tr):
+    """Render Tavily API key settings used by short drama web search."""
+    st.subheader(tr("Tavily Search Settings"))
+    st.markdown(
+        f"{tr('API Key URL')}: "
+        "[https://app.tavily.com](https://app.tavily.com)"
+    )
+
+    tavily_api_key = st.text_input(
+        tr("Tavily API Key"),
+        value=config.app.get("tavily_api_key", ""),
+        type="password",
+        help=tr("Tavily API Key Help"),
+        key="tavily_api_key_input",
+    )
+
+    if update_app_config_if_changed("tavily_api_key", str(tavily_api_key or "").strip()):
+        try:
+            config.save_config()
+            st.session_state["tavily_api_key"] = str(tavily_api_key or "").strip()
+            st.success(tr("Tavily config saved"))
+        except Exception as e:
+            st.error(f"{tr('Failed to save config')}: {str(e)}")
+            logger.error(f"保存 Tavily 配置失败: {str(e)}")
+
+
 def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
    """测试视觉模型连接

--- a/webui/components/script_settings.py
+++ b/webui/components/script_settings.py
@ -17,7 +17,7 @@ from webui.tools.generate_script_short import generate_script_short
 from webui.tools.generate_short_summary import analyze_short_drama_plot, generate_script_short_sunmmary


-SCRIPT_TABLE_BASE_COLUMNS = ["_id", "timestamp", "picture", "narration", "OST"]
+SCRIPT_TABLE_BASE_COLUMNS = ["_id", "video_id", "video_name", "timestamp", "picture", "narration", "OST"]
 VIDEO_UPLOAD_TYPES = ["mp4", "mov", "avi", "flv", "mkv", "mpeg4"]
 VIDEO_GLOB_PATTERNS = [f"*.{suffix}" for suffix in VIDEO_UPLOAD_TYPES]

@ -99,15 +99,24 @@ def _read_subtitle_file(path):
            return f.read()


-def _build_combined_subtitle_content(subtitle_paths):
+def _build_combined_subtitle_content(subtitle_paths, video_paths=None):
    sections = []
    subtitle_contents = {}
-    for subtitle_path in subtitle_paths:
+    video_paths = _normalize_video_paths(video_paths)
+    for index, subtitle_path in enumerate(subtitle_paths, start=1):
        if not subtitle_path or not os.path.exists(subtitle_path):
            continue
        content = _read_subtitle_file(subtitle_path)
        subtitle_contents[subtitle_path] = content
-        sections.append(f"# {os.path.basename(subtitle_path)}\n{content}".strip())
+        video_path = video_paths[index - 1] if index <= len(video_paths) else ""
+        if video_path:
+            header = (
+                f"# 视频 {index}: {os.path.basename(video_path)}\n"
+                f"字幕文件: {os.path.basename(subtitle_path)}"
+            )
+        else:
+            header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
+        sections.append(f"{header}\n{content}".strip())
    return "\n\n".join(sections), subtitle_contents


@ -120,7 +129,10 @@ def _selected_subtitle_paths():

 def _set_subtitle_state(subtitle_paths):
    subtitle_paths = _normalize_video_paths(subtitle_paths)
-    subtitle_content, subtitle_contents = _build_combined_subtitle_content(subtitle_paths)
+    subtitle_content, subtitle_contents = _build_combined_subtitle_content(
+        subtitle_paths,
+        _selected_video_paths(),
+    )
    st.session_state['subtitle_path'] = subtitle_paths[0] if subtitle_paths else None
    st.session_state['subtitle_paths'] = subtitle_paths
    st.session_state['subtitle_content'] = subtitle_content if subtitle_content else None
@ -128,6 +140,20 @@ def _set_subtitle_state(subtitle_paths):
    st.session_state['subtitle_file_processed'] = bool(subtitle_paths)


+def _short_drama_plot_analysis_signature(subtitle_paths, video_theme, web_search_enabled, video_paths=None):
+    theme = str(video_theme or "").strip() if web_search_enabled else ""
+    return json.dumps(
+        {
+            "subtitle_paths": _normalize_video_paths(subtitle_paths),
+            "video_paths": _normalize_video_paths(video_paths),
+            "video_theme": theme,
+            "web_search_enabled": bool(web_search_enabled),
+        },
+        ensure_ascii=False,
+        sort_keys=True,
+    )
+
+
 def render_script_panel(tr):
    """渲染脚本配置面板"""
    with st.container(border=True):
@ -525,16 +551,71 @@ def short_drama_summary(tr):
    render_fun_asr_transcription(tr)
    render_subtitle_preview(tr)

-    current_subtitle_path = st.session_state.get('subtitle_path', '')
-    plot_analysis_source = st.session_state.get('short_drama_plot_analysis_subtitle_path')
-    if plot_analysis_source and plot_analysis_source != current_subtitle_path:
-        st.session_state['short_drama_plot_analysis'] = ""
-        st.session_state['short_drama_plot_analysis_subtitle_path'] = ""
+    current_subtitle_paths = _selected_subtitle_paths()
+    current_subtitle_path = current_subtitle_paths[0] if current_subtitle_paths else ''

-    name_cols = st.columns([4, 1.2], vertical_alignment="bottom")
+    st.markdown(
+        """
+        <style>
+        .st-key-short_drama_web_search_enabled [data-testid="stMarkdownContainer"] {
+            display: none;
+        }
+        .st-key-short_drama_web_search_enabled [data-testid="stWidgetLabel"] {
+            min-width: 0;
+            transform: translateX(-1.2rem);
+        }
+        .st-key-short_drama_web_search_enabled label {
+            align-items: center;
+            gap: 0.45rem;
+        }
+        .st-key-short_drama_web_search_enabled label > div:first-child {
+            width: 3rem !important;
+            min-width: 3rem !important;
+            height: 1.55rem !important;
+            border-radius: 999px !important;
+            border: 1px solid #d1d5db !important;
+            background: #e5e7eb !important;
+            box-shadow: inset 0 1px 2px rgba(15, 23, 42, 0.08) !important;
+            transition: background 160ms ease, border-color 160ms ease, box-shadow 160ms ease !important;
+        }
+        .st-key-short_drama_web_search_enabled label:hover > div:first-child {
+            background: #dbe3ef !important;
+            border-color: #b8c2d3 !important;
+        }
+        .st-key-short_drama_web_search_enabled label:has(input[aria-checked="true"]) > div:first-child {
+            border-color: transparent !important;
+            background: linear-gradient(135deg, #2563eb, #14b8a6) !important;
+            box-shadow: 0 6px 14px rgba(37, 99, 235, 0.22) !important;
+        }
+        .st-key-short_drama_web_search_enabled label > div:first-child > div {
+            width: 1.05rem !important;
+            height: 1.05rem !important;
+            border-radius: 999px !important;
+            background: #ffffff !important;
+            box-shadow: 0 2px 6px rgba(15, 23, 42, 0.24) !important;
+        }
+        .st-key-short_drama_web_search_enabled button[aria-label^="Help for"] {
+            color: #6b7280 !important;
+        }
+        .st-key-short_drama_web_search_enabled button[aria-label^="Help for"]:hover {
+            color: #2563eb !important;
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+
+    name_cols = st.columns([3.4, 1.1, 2], vertical_alignment="bottom")
    with name_cols[0]:
        video_theme = st.text_input(tr("短剧名称"))
    with name_cols[1]:
+        web_search_enabled = st.toggle(
+            tr("联网搜索"),
+            key="short_drama_web_search_enabled",
+            help=tr("Enable Web Search Help"),
+            disabled=not current_subtitle_path,
+        )
+    with name_cols[2]:
        analyze_plot_clicked = st.button(
            tr("剧情理解"),
            key="short_drama_plot_analysis_button",
@ -543,17 +624,37 @@ def short_drama_summary(tr):
        )
    st.session_state['video_theme'] = video_theme

+    current_signature = _short_drama_plot_analysis_signature(
+        current_subtitle_paths,
+        video_theme,
+        web_search_enabled,
+        _selected_video_paths(),
+    )
+    saved_signature = st.session_state.get('short_drama_plot_analysis_signature')
+    legacy_source = st.session_state.get('short_drama_plot_analysis_subtitle_path')
+    if (
+        (saved_signature and saved_signature != current_signature)
+        or (legacy_source and legacy_source != current_subtitle_path)
+    ):
+        st.session_state['short_drama_plot_analysis'] = ""
+        st.session_state['short_drama_plot_analysis_subtitle_path'] = ""
+        st.session_state['short_drama_plot_analysis_signature'] = ""
+
    if analyze_plot_clicked:
        with st.spinner(tr("Analyzing plot...")):
            plot_analysis = analyze_short_drama_plot(
-                current_subtitle_path,
+                current_subtitle_paths,
                st.session_state.get('temperature', 0.7),
                tr,
                subtitle_content=st.session_state.get('subtitle_content', ''),
+                short_name=video_theme,
+                enable_web_search=web_search_enabled,
+                video_paths=_selected_video_paths(),
            )
        if plot_analysis:
            st.session_state['short_drama_plot_analysis'] = plot_analysis
            st.session_state['short_drama_plot_analysis_subtitle_path'] = current_subtitle_path
+            st.session_state['short_drama_plot_analysis_signature'] = current_signature
            st.success(tr("Plot analysis completed"))

    if st.session_state.get('short_drama_plot_analysis'):
@ -575,7 +676,10 @@ def render_subtitle_preview(tr):
        subtitle_contents = {}

    if subtitle_paths and (not subtitle_content or not subtitle_contents):
-        subtitle_content, subtitle_contents = _build_combined_subtitle_content(subtitle_paths)
+        subtitle_content, subtitle_contents = _build_combined_subtitle_content(
+            subtitle_paths,
+            _selected_video_paths(),
+        )
        st.session_state['subtitle_content'] = subtitle_content
        st.session_state['subtitle_contents'] = subtitle_contents

@ -724,7 +828,7 @@ def _normalize_script_table_value(column, value):
    if _is_blank_table_value(value):
        return ""

-    if column in {"_id", "OST"}:
+    if column in {"_id", "video_id", "OST"}:
        try:
            return int(value)
        except (TypeError, ValueError):
@ -783,6 +887,14 @@ def render_video_script_editor(tr):
            column_order=column_order,
            column_config={
                "_id": st.column_config.NumberColumn(tr("Script Column ID"), step=1, format="%d", width=52),
+                "video_id": st.column_config.NumberColumn(
+                    tr("Script Column Video ID"),
+                    min_value=1,
+                    step=1,
+                    format="%d",
+                    width=80,
+                ),
+                "video_name": st.column_config.TextColumn(tr("Script Column Video Name"), width=180),
                "timestamp": st.column_config.TextColumn(tr("Script Column Timestamp"), width=200),
                "picture": st.column_config.TextColumn(tr("Script Column Picture"), width=320),
                "narration": st.column_config.TextColumn(tr("Script Column Narration"), width=480),
@ -1057,7 +1169,10 @@ def render_fun_asr_transcription(tr):
            st.error(tr("Fun-ASR failed without subtitle file"))
            return

-        subtitle_content, subtitle_contents = _build_combined_subtitle_content(generated_paths)
+        subtitle_content, subtitle_contents = _build_combined_subtitle_content(
+            generated_paths,
+            media_paths,
+        )
        if not subtitle_content.strip():
            clear_fun_asr_subtitle_state()
            st.error(tr("Fun-ASR failed without subtitle file"))
@ -1112,20 +1227,35 @@ def render_script_buttons(tr, params):
            generate_script_short(tr, params, custom_clips)
        elif script_path == "summary":
            # 执行 短剧解说 脚本生成
-            subtitle_path = st.session_state.get('subtitle_path')
+            subtitle_paths = _selected_subtitle_paths()
+            subtitle_path = subtitle_paths[0] if subtitle_paths else None
            video_theme = st.session_state.get('video_theme')
            temperature = st.session_state.get('temperature')
+            web_search_enabled = bool(st.session_state.get('short_drama_web_search_enabled', False))
+            current_signature = _short_drama_plot_analysis_signature(
+                subtitle_paths,
+                video_theme,
+                web_search_enabled,
+                _selected_video_paths(),
+            )
            plot_analysis = ""
-            if st.session_state.get('short_drama_plot_analysis_subtitle_path') == subtitle_path:
+            if st.session_state.get('short_drama_plot_analysis_signature') == current_signature:
+                plot_analysis = st.session_state.get('short_drama_plot_analysis', '')
+            elif (
+                not web_search_enabled
+                and st.session_state.get('short_drama_plot_analysis_subtitle_path') == subtitle_path
+            ):
                plot_analysis = st.session_state.get('short_drama_plot_analysis', '')
            generate_script_short_sunmmary(
                params,
-                subtitle_path,
+                subtitle_paths,
                video_theme,
                temperature,
                tr,
                plot_analysis=plot_analysis,
                subtitle_content=st.session_state.get('subtitle_content', ''),
+                enable_web_search=web_search_enabled,
+                video_paths=_selected_video_paths(),
            )
        else:
            load_script(tr, script_path)
@ -1172,6 +1302,8 @@ def save_script_with_validation(tr, video_clip_json_details):
                example_script = [
                    {
                        "_id": 1,
+                        "video_id": 1,
+                        "video_name": "1.mp4",
                        "timestamp": "00:00:00,600-00:00:07,559",
                        "picture": "工地上，蔡晓艳奋力救人，场面混乱",
                        "narration": "灾后重建，工地上险象环生！泼辣女工蔡晓艳挺身而出，救人第一！",
@ -1179,6 +1311,8 @@ def save_script_with_validation(tr, video_clip_json_details):
                    },
                    {
                        "_id": 2,
+                        "video_id": 2,
+                        "video_name": "2.mp4",
                        "timestamp": "00:00:08,240-00:00:12,359",
                        "picture": "领导视察，蔡晓艳不屑一顾",
                        "narration": "播放原片4",
--- a/webui/components/subtitle_settings.py
+++ b/webui/components/subtitle_settings.py
@ -604,7 +604,7 @@ def render_font_settings(tr):

 def is_disabled_subtitle_settings(tts_engine:str)->bool:
    """是否禁用字幕设置"""
-    return tts_engine=="soulvoice" or tts_engine=="qwen3_tts"
+    return tts_engine=="soulvoice" or tts_engine=="qwen3_tts" or tts_engine==config.OMNIVOICE_ENGINE

 def render_position_settings(tr):
    """渲染位置设置"""
--- a/webui/i18n/en.json
+++ b/webui/i18n/en.json
@ -15,6 +15,8 @@
    "Video script table help": "Edit the full script JSON as a table. You can add or delete rows; saving will validate and write the script file again.",
    "Raw JSON Preview": "Raw JSON Preview",
    "Script Column ID": "ID",
+    "Script Column Video ID": "Video",
+    "Script Column Video Name": "Video Name",
    "Script Column Timestamp": "Timestamp",
    "Script Column Picture": "Picture",
    "Script Column Narration": "Narration",
@ -286,7 +288,11 @@
    "IndexTTS download link": "Download link: https://pan.quark.cn/s/0767c9bcefd5",
    "IndexTTS2 features": "A locally or privately deployed IndexTTS-2 voice-cloning engine with emotion control and fuller generation parameters.",
    "IndexTTS2 use case": "Best for fixed voices, emotional narration, and local speech synthesis workflows that need finer sampling controls. Start the IndexTTS-2 API service before use.",
+    "OmniVoice features": "A locally or privately deployed OmniVoice-Pack multilingual TTS engine with automatic voice generation, voice design, and reference-audio cloning.",
+    "OmniVoice use case": "Best for local controllable multilingual narration, voice design, or reference-audio cloning. Start the OmniVoice-Pack API service before use.",
    "Doubao TTS features": "Volcengine Doubao speech synthesis with multiple voices and emotions, plus fast access in mainland China.",
+    "Local Deployment": "Local Deployment",
+    "Cloud Service": "Cloud Service",
    "Select TTS Engine": "Select TTS Engine",
    "Select TTS Engine Help": "Choose the text-to-speech engine you want to use.",
    "TTS Engine Details": "📋 {engine} Details",
@ -413,6 +419,16 @@
    "Subtitle calibration succeeded for multiple files": "Subtitle calibration succeeded for {count} files: {files}",
    "Subtitle calibration failed": "Subtitle calibration failed",
    "Transcribed subtitles storage hint": "Previously transcribed subtitles are saved in {path}; drag a file from that folder to upload",
+    "Tavily Search Settings": "Tavily Web Search",
+    "Tavily API Key": "Tavily API Key",
+    "Tavily API Key Help": "Used for web search before short drama plot analysis. When Web Search is enabled, the app searches plot, character, and episode context by drama name, then combines it with subtitles.",
+    "Tavily config saved": "Tavily configuration saved",
+    "联网搜索": "Web Search",
+    "Enable Web Search Help": "When enabled, plot analysis searches the web with Tavily by short drama name before combining those results with subtitles.",
+    "Please configure Tavily API Key in Basic Settings": "Please configure the Tavily API Key in Basic Settings first",
+    "Please enter short drama name before web search": "Please enter the short drama name before enabling web search",
+    "Searching short drama with Tavily...": "Searching short drama context with Tavily...",
+    "Tavily search failed": "Tavily search failed",
    "剧情理解": "Plot Analysis",
    "剧情理解结果": "Plot Analysis Result",
    "Analyzing plot...": "Analyzing plot...",
@ -443,6 +459,30 @@
    "API URL": "API URL",
    "IndexTTS API URL Help": "IndexTTS-1.5 API service URL",
    "IndexTTS2 API URL Help": "IndexTTS-2 API service URL. You can enter the service root or the full /tts endpoint.",
+    "OmniVoice API URL Help": "OmniVoice-Pack API service URL. You can enter the service root or the full /tts endpoint.",
+    "OmniVoice Language Code": "Synthesis Language",
+    "OmniVoice Language Code Help": "The language parameter sent to OmniVoice-Pack, such as zh or en.",
+    "OmniVoice Generation Mode": "Generation Mode",
+    "OmniVoice Generation Mode Help": "Automatic voice needs no extra fields; voice design uses an instruction; reference-audio cloning needs reference audio and matching text.",
+    "OmniVoice Mode Auto": "Automatic Voice",
+    "OmniVoice Mode Voice Design": "Voice Design",
+    "OmniVoice Mode Voice Clone": "Reference Audio Clone",
+    "OmniVoice Instruct": "Voice Instruction",
+    "OmniVoice Instruct Help": "Describe the desired voice, such as gender, pitch, accent, or style.",
+    "OmniVoice Instruct Placeholder": "e.g. female, low pitch, british accent",
+    "OmniVoice Reference Text": "Reference Audio Text",
+    "OmniVoice Reference Text Help": "The exact transcript of the reference audio. Required when the deployed service has ASR disabled.",
+    "OmniVoice Reference Text Placeholder": "Enter the text spoken in the reference audio",
+    "OmniVoice Num Step Help": "Diffusion generation steps. Higher values usually improve quality but slow generation.",
+    "OmniVoice Guidance Scale Help": "Controls how strongly text conditions guide generation.",
+    "OmniVoice Duration": "Target Duration (seconds)",
+    "OmniVoice Duration Help": "0 lets the model decide the duration automatically.",
+    "OmniVoice Denoise": "Enable Denoise",
+    "OmniVoice Denoise Help": "Ask OmniVoice-Pack to denoise the generated output.",
+    "OmniVoice Postprocess Output": "Postprocess Output",
+    "OmniVoice Postprocess Output Help": "Enable OmniVoice-Pack output post-processing.",
+    "OmniVoice Preprocess Prompt": "Preprocess Text",
+    "OmniVoice Preprocess Prompt Help": "Enable OmniVoice-Pack text preprocessing.",
    "Reference Audio Source": "Reference Audio Source",
    "Reference Audio Source Help": "Choose a reference audio from the resource directory or upload a new one.",
    "Select from Resource Directory": "Select from Resource Directory",
@ -502,6 +542,8 @@
    "Max Mel Tokens Help": "Controls the maximum mel tokens generated in one request. Higher values can produce longer audio.",
    "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 Usage Instructions",
    "IndexTTS2 Usage Instructions": "**IndexTTS-2 voice cloning**\n\n1. **Choose a voice**: reuse IndexTTS-1.5 resource audio or upload a reference audio file\n2. **Set API URL**: for example http://192.168.3.6:7863/tts, or enter the service root\n3. **Tune emotion**: speaker is the default; switch to audio, vector, or text when needed\n4. **Tune generation**: temperature, top_p, top_k, num_beams, repetition_penalty, and max_mel_tokens are sent directly to the IndexTTS-2 API\n\n**Notes**:\n- Reference audio quality directly affects cloning quality\n- The first request may load the model and take longer\n- CPU deployments are much slower than GPU deployments",
+    "OmniVoice Usage Instructions Title": "OmniVoice Usage Instructions",
+    "OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
    "Volcengine Access Key Help": "Volcengine Access Key",
    "Volcengine Secret Key Help": "Volcengine Secret Key",
    "Doubao AppID Help": "Doubao TTS application AppID",
--- a/webui/i18n/zh.json
+++ b/webui/i18n/zh.json
@ -159,6 +159,8 @@
    "Video script table help": "在表格中编辑完整脚本 JSON。可新增、删除行；保存时会重新校验并写入脚本文件。",
    "Raw JSON Preview": "原始 JSON 预览",
    "Script Column ID": "序号",
+    "Script Column Video ID": "视频",
+    "Script Column Video Name": "视频文件",
    "Script Column Timestamp": "时间戳",
    "Script Column Picture": "画面描述",
    "Script Column Narration": "解说台词",
@ -267,7 +269,11 @@
    "IndexTTS download link": "下载地址：https://pan.quark.cn/s/0767c9bcefd5",
    "IndexTTS2 features": "本地/私有部署的 IndexTTS-2 语音克隆引擎，支持情感控制和更完整的生成参数。",
    "IndexTTS2 use case": "适合需要固定音色、情绪化旁白或更细致采样控制的本地语音合成场景。使用前请先启动 IndexTTS-2 API 服务。",
+    "OmniVoice features": "本地/私有部署的 OmniVoice-Pack 多语种语音合成引擎，支持自动音色、指令音色和参考音频克隆。",
+    "OmniVoice use case": "适合需要本地可控、多语言旁白、音色设计或参考音频克隆的场景。使用前请先启动 OmniVoice-Pack API 服务。",
    "Doubao TTS features": "火山引擎豆包语音合成，支持多种音色和情感，国内访问速度快",
+    "Local Deployment": "本地部署",
+    "Cloud Service": "云端服务",
    "Select TTS Engine": "选择 TTS 引擎",
    "Select TTS Engine Help": "选择您要使用的文本转语音引擎",
    "TTS Engine Details": "📋 {engine} 详细说明",
@ -395,6 +401,16 @@
    "Subtitle calibration succeeded for multiple files": "字幕校准成功，共 {count} 个文件: {files}",
    "Subtitle calibration failed": "字幕校准失败",
    "Transcribed subtitles storage hint": "之前转录生成的字幕保存在 {path}，可从该目录拖入上传",
+    "Tavily Search Settings": "Tavily 联网搜索",
+    "Tavily API Key": "Tavily API Key",
+    "Tavily API Key Help": "用于短剧剧情理解前的联网检索。开启“联网搜索”后，会先按短剧名称检索剧情、人物和分集信息，再结合字幕分析。",
+    "Tavily config saved": "Tavily 配置已保存",
+    "联网搜索": "联网搜索",
+    "Enable Web Search Help": "开启后，剧情理解会先使用 Tavily 按短剧名称联网检索，再结合检索结果和字幕分析剧情。",
+    "Please configure Tavily API Key in Basic Settings": "请先在基础设置中配置 Tavily API Key",
+    "Please enter short drama name before web search": "开启联网搜索前，请先填写短剧名称",
+    "Searching short drama with Tavily...": "正在使用 Tavily 检索短剧信息...",
+    "Tavily search failed": "Tavily 检索失败",
    "剧情理解": "剧情理解",
    "剧情理解结果": "剧情理解结果",
    "Analyzing plot...": "正在理解剧情...",
@ -425,6 +441,30 @@
    "API URL": "API 地址",
    "IndexTTS API URL Help": "IndexTTS-1.5 API 服务地址",
    "IndexTTS2 API URL Help": "IndexTTS-2 API 服务地址，可填写服务根地址或完整 /tts 地址",
+    "OmniVoice API URL Help": "OmniVoice-Pack API 服务地址，可填写服务根地址或完整 /tts 地址",
+    "OmniVoice Language Code": "合成语言",
+    "OmniVoice Language Code Help": "传给 OmniVoice-Pack 的 language 参数，例如 zh、en。",
+    "OmniVoice Generation Mode": "生成模式",
+    "OmniVoice Generation Mode Help": "自动音色无需额外参数；指令音色使用描述词；参考音频克隆需要参考音频和对应文本。",
+    "OmniVoice Mode Auto": "自动音色",
+    "OmniVoice Mode Voice Design": "指令音色",
+    "OmniVoice Mode Voice Clone": "参考音频克隆",
+    "OmniVoice Instruct": "音色指令",
+    "OmniVoice Instruct Help": "描述希望生成的音色，例如性别、音高、口音或风格。",
+    "OmniVoice Instruct Placeholder": "例如：female, low pitch, british accent",
+    "OmniVoice Reference Text": "参考音频文本",
+    "OmniVoice Reference Text Help": "参考音频对应的逐字文本；当前部署未启用 ASR 时必须填写。",
+    "OmniVoice Reference Text Placeholder": "请输入参考音频中实际朗读的内容",
+    "OmniVoice Num Step Help": "扩散生成步数，值越大通常质量更高但速度更慢。",
+    "OmniVoice Guidance Scale Help": "控制文本条件的引导强度。",
+    "OmniVoice Duration": "目标时长（秒）",
+    "OmniVoice Duration Help": "0 表示由模型自动决定时长。",
+    "OmniVoice Denoise": "启用降噪",
+    "OmniVoice Denoise Help": "让 OmniVoice-Pack 对生成结果执行降噪处理。",
+    "OmniVoice Postprocess Output": "后处理输出",
+    "OmniVoice Postprocess Output Help": "启用 OmniVoice-Pack 的输出后处理。",
+    "OmniVoice Preprocess Prompt": "预处理文本",
+    "OmniVoice Preprocess Prompt Help": "启用 OmniVoice-Pack 的文本预处理。",
    "Reference Audio Source": "参考音频来源",
    "Reference Audio Source Help": "选择从资源目录选择参考音频，或上传新的参考音频",
    "Select from Resource Directory": "从资源目录选择",
@ -484,6 +524,8 @@
    "Max Mel Tokens Help": "控制单次生成的最大 mel token 数，值越大可生成更长音频",
    "IndexTTS2 Usage Instructions Title": "💡 IndexTTS-2 使用说明",
    "IndexTTS2 Usage Instructions": "**IndexTTS-2 语音克隆**\n\n1. **选择音色**：复用 IndexTTS-1.5 的资源音频或上传参考音频\n2. **设置 API 地址**：例如 http://192.168.3.6:7863/tts，也可以填写服务根地址\n3. **调整情感参数**：默认使用 speaker，可按需切换到 audio、vector 或 text\n4. **调整生成参数**：temperature、top_p、top_k、num_beams、repetition_penalty 和 max_mel_tokens 会直接传给 IndexTTS-2 接口\n\n**注意事项**：\n- 参考音频质量会直接影响克隆效果\n- 首次请求可能需要加载模型，耗时更长\n- CPU 部署生成速度会明显慢于 GPU",
+    "OmniVoice Usage Instructions Title": "OmniVoice 使用说明",
+    "OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**：只需要设置 API 地址和语言，可直接合成。\n2. **指令音色**：填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**：上传或选择参考音频，并填写该音频对应文本。\n\n**注意事项**：\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频，系统会按音频时长估算字幕段落",
    "Volcengine Access Key Help": "火山引擎 Access Key",
    "Volcengine Secret Key Help": "火山引擎 Secret Key",
    "Doubao AppID Help": "豆包语音应用 AppID",
--- a/webui/tools/generate_short_summary.py
+++ b/webui/tools/generate_short_summary.py
@ -17,12 +17,101 @@ from loguru import logger
 from app.config import config
 from app.services.SDE.short_drama_explanation import analyze_subtitle, generate_narration_script
 from app.services.subtitle_text import read_subtitle_text
+from app.services.tavily_search import TavilySearchError, format_search_context, search_short_drama
 # 导入新的LLM服务模块 - 确保提供商被注册
 import app.services.llm  # 这会触发提供商注册
 from app.services.llm.migration_adapter import SubtitleAnalyzerAdapter
 import re


+def _normalize_paths(paths):
+    if isinstance(paths, str):
+        paths = [paths]
+    if not paths:
+        return []
+
+    normalized_paths = []
+    seen = set()
+    for path in paths:
+        if not isinstance(path, str):
+            continue
+        path = path.strip()
+        if not path or path in seen:
+            continue
+        normalized_paths.append(path)
+        seen.add(path)
+    return normalized_paths
+
+
+def _build_combined_subtitle_content(subtitle_paths, video_paths=None):
+    sections = []
+    video_paths = _normalize_paths(video_paths)
+    for index, subtitle_path in enumerate(_normalize_paths(subtitle_paths), start=1):
+        if not os.path.exists(subtitle_path):
+            continue
+
+        video_path = video_paths[index - 1] if index <= len(video_paths) else ""
+        if video_path:
+            header = (
+                f"# 视频 {index}: {os.path.basename(video_path)}\n"
+                f"字幕文件: {os.path.basename(subtitle_path)}"
+            )
+        else:
+            header = f"# 视频 {index}\n字幕文件: {os.path.basename(subtitle_path)}"
+        sections.append(f"{header}\n{read_subtitle_text(subtitle_path).text}".strip())
+
+    return "\n\n".join(sections)
+
+
+def _coerce_video_id(value):
+    try:
+        video_id = int(value)
+    except (TypeError, ValueError):
+        return None
+    return video_id if video_id > 0 else None
+
+
+def _match_video_id_by_name(video_name, video_paths):
+    video_name = str(video_name or "").strip()
+    if not video_name:
+        return None
+
+    for index, video_path in enumerate(video_paths, start=1):
+        if os.path.basename(video_path) == os.path.basename(video_name):
+            return index
+    return None
+
+
+def _normalize_narration_items_video_sources(items, video_paths):
+    video_paths = _normalize_paths(video_paths)
+    if not video_paths:
+        return items
+
+    normalized_items = []
+    for item in items:
+        if not isinstance(item, dict):
+            normalized_items.append(item)
+            continue
+
+        item_copy = item.copy()
+        video_id = _coerce_video_id(item_copy.get("video_id") or item_copy.get("video_index"))
+        matched_video_id = _match_video_id_by_name(
+            item_copy.get("video_name") or item_copy.get("source_video"),
+            video_paths,
+        )
+        if matched_video_id:
+            video_id = matched_video_id
+        if video_id is None or video_id > len(video_paths):
+            logger.warning(f"片段 {item_copy.get('_id')} 未提供有效 video_id，默认使用视频 1")
+            video_id = 1
+
+        item_copy["video_id"] = video_id
+        item_copy["video_name"] = os.path.basename(video_paths[video_id - 1])
+        normalized_items.append(item_copy)
+
+    return normalized_items
+
+
 def parse_and_fix_json(json_string):
    """
    解析并修复JSON字符串
@ -135,12 +224,83 @@ def parse_and_fix_json(json_string):
        return None


-def analyze_short_drama_plot(subtitle_path, temperature, tr=lambda key: key, subtitle_content=None):
+def _get_tavily_api_key() -> str:
+    return (
+        st.session_state.get("tavily_api_key")
+        or config.app.get("tavily_api_key")
+        or ""
+    ).strip()
+
+
+def _build_tavily_context(short_name: str, tr=lambda key: key) -> str | None:
+    short_name = str(short_name or "").strip()
+    if not short_name:
+        st.error(tr("Please enter short drama name before web search"))
+        return None
+
+    api_key = _get_tavily_api_key()
+    if not api_key:
+        st.error(tr("Please configure Tavily API Key in Basic Settings"))
+        return None
+
+    try:
+        search_data = search_short_drama(
+            short_name,
+            api_key,
+            search_depth=config.app.get("tavily_search_depth", "basic"),
+            max_results=config.app.get("tavily_max_results", 5),
+        )
+        return format_search_context(search_data)
+    except TavilySearchError as e:
+        logger.error(f"Tavily 短剧检索失败: {str(e)}")
+        st.error(f"{tr('Tavily search failed')}: {str(e)}")
+        return None
+    except Exception as e:
+        logger.error(f"Tavily 短剧检索异常: {traceback.format_exc()}")
+        st.error(f"{tr('Tavily search failed')}: {str(e)}")
+        return None
+
+
+def _build_plot_analysis_input(
+    subtitle_content: str,
+    short_name: str = "",
+    enable_web_search: bool = False,
+    tr=lambda key: key,
+) -> str | None:
+    subtitle_content = str(subtitle_content or "").strip()
+    if not enable_web_search:
+        return subtitle_content
+
+    tavily_context = _build_tavily_context(short_name, tr)
+    if tavily_context is None:
+        return None
+
+    return f"""# 分析补充说明
+请先参考 Tavily 联网检索结果理解短剧名称、人物关系、剧情背景和公开剧情梗概，再结合原始字幕完成剧情理解。
+如果联网检索结果与字幕内容冲突，请以字幕内容为准；时间戳必须只从字幕内容中提取。
+
+{tavily_context}
+
+# 原始字幕
+{subtitle_content}"""
+
+
+def analyze_short_drama_plot(
+    subtitle_path,
+    temperature,
+    tr=lambda key: key,
+    subtitle_content=None,
+    short_name: str = "",
+    enable_web_search: bool = False,
+    video_paths=None,
+):
    """仅执行短剧字幕剧情理解，返回可编辑的剧情分析文本。"""
-    if not subtitle_path:
+    subtitle_paths = _normalize_paths(subtitle_path)
+    if not subtitle_paths:
        st.error(tr("Please generate or upload subtitles first"))
        return None
-    if not os.path.exists(subtitle_path):
+    missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
+    if missing_subtitle_paths:
        st.error(tr("Subtitle file does not exist"))
        return None

@ -149,19 +309,31 @@ def analyze_short_drama_plot(subtitle_path, temperature, tr=lambda key: key, sub
    text_model = config.app.get(f'text_{text_provider}_model_name')
    text_base_url = config.app.get(f'text_{text_provider}_base_url')

-    subtitle_content = str(subtitle_content or "").strip() or read_subtitle_text(subtitle_path).text
+    subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
+        subtitle_paths,
+        video_paths,
+    )
    if not subtitle_content:
        st.error(tr("Subtitle file is empty or unreadable"))
        return None

+    plot_analysis_input = _build_plot_analysis_input(
+        subtitle_content,
+        short_name=short_name,
+        enable_web_search=enable_web_search,
+        tr=tr,
+    )
+    if plot_analysis_input is None:
+        return None
+
    try:
        logger.info("使用新的LLM服务架构进行字幕分析")
        analyzer = SubtitleAnalyzerAdapter(text_api_key, text_model, text_base_url, text_provider)
-        analysis_result = analyzer.analyze_subtitle(subtitle_content)
+        analysis_result = analyzer.analyze_subtitle(plot_analysis_input)
    except Exception as e:
        logger.warning(f"使用新LLM服务失败，回退到旧实现: {str(e)}")
        analysis_result = analyze_subtitle(
-            subtitle_content=subtitle_content,
+            subtitle_content=plot_analysis_input,
            api_key=text_api_key,
            model=text_model,
            base_url=text_base_url,
@ -186,6 +358,8 @@ def generate_script_short_sunmmary(
    tr=lambda key: key,
    plot_analysis=None,
    subtitle_content=None,
+    enable_web_search: bool = False,
+    video_paths=None,
 ):
    """
    生成 短剧解说 视频脚本
@ -204,7 +378,12 @@ def generate_script_short_sunmmary(

    try:
        with st.spinner(tr("Generating script...")):
-            if not params.video_origin_path:
+            selected_video_paths = _normalize_paths(
+                video_paths
+                or getattr(params, "video_origin_paths", [])
+                or getattr(params, "video_origin_path", "")
+            )
+            if not selected_video_paths:
                st.error(tr("Please select video file first"))
                return
            """
@ -212,7 +391,9 @@ def generate_script_short_sunmmary(
            """
            update_progress(30, tr("Parsing subtitles..."))
            # 判断字幕文件是否存在
-            if not os.path.exists(subtitle_path):
+            subtitle_paths = _normalize_paths(subtitle_path)
+            missing_subtitle_paths = [path for path in subtitle_paths if not os.path.exists(path)]
+            if not subtitle_paths or missing_subtitle_paths:
                st.error(tr("Subtitle file does not exist"))
                return

@ -225,7 +406,10 @@ def generate_script_short_sunmmary(
            text_base_url = config.app.get(f'text_{text_provider}_base_url')

            # 读取字幕文件内容（无论使用哪种实现都需要）
-            subtitle_content = str(subtitle_content or "").strip() or read_subtitle_text(subtitle_path).text
+            subtitle_content = str(subtitle_content or "").strip() or _build_combined_subtitle_content(
+                subtitle_paths,
+                selected_video_paths,
+            )
            if not subtitle_content:
                st.error(tr("Subtitle file is empty or unreadable"))
                return
@ -238,16 +422,27 @@ def generate_script_short_sunmmary(
                    "analysis": str(plot_analysis).strip(),
                }
            else:
+                plot_analysis_input = subtitle_content
+                if enable_web_search:
+                    update_progress(40, tr("Searching short drama with Tavily..."))
+                    plot_analysis_input = _build_plot_analysis_input(
+                        subtitle_content,
+                        short_name=video_theme,
+                        enable_web_search=True,
+                        tr=tr,
+                    )
+                    if plot_analysis_input is None:
+                        return
                try:
                    # 优先使用新的LLM服务架构
                    logger.info("使用新的LLM服务架构进行字幕分析")
-                    analysis_result = analyzer.analyze_subtitle(subtitle_content)
+                    analysis_result = analyzer.analyze_subtitle(plot_analysis_input)

                except Exception as e:
                    logger.warning(f"使用新LLM服务失败，回退到旧实现: {str(e)}")
                    # 回退到旧的实现
                    analysis_result = analyze_subtitle(
-                        subtitle_content=subtitle_content,
+                        subtitle_content=plot_analysis_input,
                        api_key=text_api_key,
                        model=text_model,
                        base_url=text_base_url,
@ -320,7 +515,11 @@ def generate_script_short_sunmmary(
                logger.error(f"JSON结构错误，缺少items字段: {narration_dict}")
                st.stop()

-            script = json.dumps(narration_dict['items'], ensure_ascii=False, indent=2)
+            narration_items = _normalize_narration_items_video_sources(
+                narration_dict['items'],
+                selected_video_paths,
+            )
+            script = json.dumps(narration_items, ensure_ascii=False, indent=2)

            if script is None:
                st.error(tr("Script generation failed check logs"))