(webfeatui): 重构视觉分析功能并添加新模型支持

- 移除了对 QwenVL模型的特定逻辑，改为更通用的实现 - 添加了对 OpenAI 视觉模型的支持- 更新了视觉模型设置界面，增加了新的模型选项 - 重构了测试连接和创建分析器的代码，提高了可维护性 - 调整了配置文件结构，简化了视觉模型的配置
2025-12-12 11:22:51 +00:00 · 2025-05-09 12:03:27 +08:00 · 2025-05-09 12:03:27 +08:00 · 3fe8eb50c0
commit 3fe8eb50c0
parent afeeb7c516
6 changed files with 147 additions and 241 deletions
--- a/app/services/generate_narration_script.py
+++ b/app/services/generate_narration_script.py
@ -237,28 +237,28 @@ if __name__ == '__main__':
    video_frame_description_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1139.json"
    # 测试新的JSON文件
-    test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1458.json"
+    test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_2258.json"
    markdown_output = parse_frame_analysis_to_markdown(test_file_path)
    # print(markdown_output)
    # 输出到文件以便检查格式
-    output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/narration_script.md"
+    output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/家里家外1-5.md"
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(markdown_output)
    # print(f"\n已将Markdown输出保存到: {output_file}")
-    # 生成解说文案
+    # # 生成解说文案
-    narration = generate_narration(
+    # narration = generate_narration(
-        markdown_output, 
+    #     markdown_output,
-        text_api_key,
+    #     text_api_key,
-        base_url=text_base_url,
+    #     base_url=text_base_url,
-        model=text_model
+    #     model=text_model
-    )
+    # )
-    
+    #
-    # 保存解说文案
+    # # 保存解说文案
-    print(narration)
+    # print(narration)
-    print(type(narration))
+    # print(type(narration))
-    narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
+    # narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
-    with open(narration_file, 'w', encoding='utf-8') as f:
+    # with open(narration_file, 'w', encoding='utf-8') as f:
-        f.write(narration)
+    #     f.write(narration)
-    print(f"\n已将解说文案保存到: {narration_file}")
+    # print(f"\n已将解说文案保存到: {narration_file}")
--- a/config.example.toml
+++ b/config.example.toml
@ -1,175 +1,85 @@
 [app]
    project_version="0.6.0"
    # 支持视频理解的大模型提供商
-    #   gemini
+    #   gemini  (谷歌, 需要 VPN)
-    #   qwenvl
+    #   siliconflow (硅基流动)
-    vision_llm_provider="qwenvl"
+    #   qwenvl  (通义千问)
    vision_llm_provider="Siliconflow"
-    ########## Vision Gemini API Key
+    ########## Gemini 视觉模型
    vision_gemini_api_key = ""
-    vision_gemini_model_name = "gemini-2.0-flash"
+    vision_gemini_model_name = "gemini-2.0-flash-lite"
-    ########## Vision Qwen API Key (默认使用“硅基流动”的QwenVL模型)
+    ########## QwenVL 视觉模型
    vision_qwenvl_api_key = ""
-    vision_qwenvl_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
+    vision_qwenvl_model_name = "qwen2.5-vl-32b-instruct"
-    vision_qwenvl_base_url = "https://api.siliconflow.cn/v1"
+    vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
-    ########### Vision NarratoAPI Key
+    ########## siliconflow 视觉模型
    vision_siliconflow_api_key = ""
    vision_siliconflow_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
    vision_siliconflow_base_url = "https://api.siliconflow.cn/v1"
    ########## OpenAI 视觉模型
    vision_openai_api_key = ""
    vision_openai_model_name = "gpt-4.1-nano-2025-04-14"
    vision_openai_base_url = "https://api.openai.com/v1"
    ########### NarratoAPI 微调模型 (未发布)
    narrato_api_key = "ggyY91BAO-_ULvAqKum3XexcyN1G3dP86DEzvjZDcrg"
    narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
-    narrato_vision_model = "gemini-1.5-flash"
+    narrato_model = "narra-1.0-2025-05-09"
    narrato_vision_key = ""
    narrato_llm_model = "gpt-4o"
    narrato_llm_key = ""
    # 用于生成文案的大模型支持的提供商 (Supported providers):
-    #   openai (默认)
+    #   openai (默认, 需要 VPN)
-    #   deepseek (默认使用“硅基流动”的模型)
+    #   siliconflow (硅基流动)
-    #   moonshot (月之暗面)
+    #   deepseek (深度求索)
    #   gemini (谷歌, 需要 VPN)
    #   qwen (通义千问)
-    #   gemini
+    #   moonshot (月之暗面)
-    text_llm_provider="deepseek"
+    text_llm_provider="openai"
    ########## OpenAI API Key
    # Get your API key at https://platform.openai.com/api-keys
    text_openai_api_key = ""
    text_openai_base_url = "https://api.openai.com/v1"
-    text_openai_model_name = "gpt-4o-mini"
+    text_openai_model_name = "gpt-4.1-mini-2025-04-14"
    # 使用 硅基流动 第三方 API Key，使用手机号注册：https://cloud.siliconflow.cn/i/pyOKqFCV
    # 访问 https://cloud.siliconflow.cn/account/ak 获取你的 API 密钥
    text_siliconflow_api_key = ""
    text_siliconflow_base_url = "https://api.siliconflow.cn/v1"
    text_siliconflow_model_name = "deepseek-ai/DeepSeek-R1"
    ########## DeepSeek API Key
-    # 使用 硅基流动 第三方 API Key，使用手机号注册：https://cloud.siliconflow.cn/i/pyOKqFCV
+    # 访问 https://platform.deepseek.com/api_keys 获取你的 API 密钥
    text_deepseek_api_key = ""
-    text_deepseek_base_url = "https://api.siliconflow.cn/v1"
+    text_deepseek_base_url = "https://api.deepseek.com"
-    text_deepseek_model_name = "deepseek-ai/DeepSeek-V3"
+    text_deepseek_model_name = "deepseek-chat"
    ########## Moonshot API Key
    # Visit https://platform.moonshot.cn/console/api-keys to get your API key.
    text_moonshot_api_key=""
    text_moonshot_base_url = "https://api.moonshot.cn/v1"
    text_moonshot_model_name = "moonshot-v1-8k"
    ########## G4F
    # Visit https://github.com/xtekky/gpt4free to get more details
    # Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
    text_g4f_model_name = "gpt-3.5-turbo"
    ########## Azure API Key
    # Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
    # API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
    text_azure_api_key = ""
    text_azure_base_url=""
    text_azure_model_name="gpt-35-turbo" # replace with your model deployment name
    text_azure_api_version = "2024-02-15-preview"
    ########## Gemini API Key
    text_gemini_api_key=""
-    text_gemini_model_name = "gemini-1.5-flash"
+    text_gemini_model_name = "gemini-2.0-flash"
    ########## Qwen API Key
-    # Visit https://dashscope.console.aliyun.com/apiKey to get your API key
+    # 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
    # Visit below links to get more details
    # https://tongyi.aliyun.com/qianwen/
    # https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
    text_qwen_api_key = ""
    text_qwen_model_name = "qwen-plus-1127"
    text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
-
+    ########## Moonshot API Key
-    # 字幕提供商、可选，支持 whisper 和 faster-whisper-large-v2"whisper"
+    # 访问 https://platform.moonshot.cn/console/api-keys 获取你的 API 密钥
-    # 默认为 faster-whisper-large-v2 模型地址：https://huggingface.co/guillaumekln/faster-whisper-large-v2
+    text_moonshot_api_key=""
-    subtitle_provider = "faster-whisper-large-v2"
+    text_moonshot_base_url = "https://api.moonshot.cn/v1"
-    subtitle_enabled = true
+    text_moonshot_model_name = "moonshot-v1-8k"
    # ImageMagick
    # 安装后，将自动检测到 ImageMagick，Windows 除外！
    # 例如，在 Windows 上 "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
    # 下载位置 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
    # imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
    # FFMPEG
    #
    # 通常情况下，ffmpeg 会被自动下载，并且会被自动检测到。
    # 但是如果你的环境有问题，无法自动下载，可能会遇到如下错误：
    #   RuntimeError: No ffmpeg exe could be found.
    #   Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
    # 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path，下载地址：https://www.gyan.dev/ffmpeg/builds/
    # ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
    #########################################################################################
    # 当视频生成成功后，API服务提供的视频下载接入点，默认为当前服务的地址和监听端口
    # 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # 如果你需要使用域名对外提供服务（一般会用nginx做代理），则可以设置为你的域名
    # 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # endpoint="https://xxxx.com"
    # When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
    # For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
    # For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # endpoint="https://xxxx.com"
    endpoint=""
    # Video material storage location
    # material_directory = ""                    # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
    # material_directory = "/user/harry/videos"  # Indicates that video materials will be downloaded to a specified folder
    # material_directory = "task"                # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials
    # 视频素材存放位置
    # material_directory = ""                    #表示将视频素材下载到默认的文件夹，默认文件夹为当前项目下的 ./storage/cache_videos
    # material_directory = "/user/harry/videos"  #表示将视频素材下载到指定的文件夹中
    # material_directory = "task"                #表示将视频素材下载到当前任务的文件夹中，这种方式无法共享已经下载的视频素材
    material_directory = ""
    # 用于任务的状态管理
    enable_redis = false
    redis_host = "localhost"
    redis_port = 6379
    redis_db = 0
    redis_password = ""
    # 文生视频时的最大并发任务数
    max_concurrent_tasks = 5
    # webui界面是否显示配置项
-    hide_config = false
+    hide_config = true
 [whisper]
    # Only effective when subtitle_provider is "whisper"
    # Run on GPU with FP16
    # model = WhisperModel(model_size, device="cuda", compute_type="float16")
    # Run on GPU with INT8
    # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
    # Run on CPU with INT8
    # model = WhisperModel(model_size, device="cpu", compute_type="int8")
    # recommended model_size: "large-v3"
    model_size="faster-whisper-large-v2"
    # 如果要使用 GPU，请设置 device=“cuda”
    device="CPU"
    compute_type="int8"
 [proxy]
    ### Use a proxy to access the Pexels API
    ### Format: "http://<username>:<password>@<proxy>:<port>"
    ### Example: "http://user:pass@proxy:1234"
    ### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
    http = "http://127.0.0.1:7890"
    https = "http://127.0.0.1:7890"
-
+    enabled = false
 [azure]
    # Azure Speech API Key
    # Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
    speech_key=""
    speech_region=""
 [frames]
    # 提取关键帧的间隔时间
--- a/webui.py
+++ b/webui.py
@ -5,7 +5,7 @@ from loguru import logger
 from app.config import config
 from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
    review_settings, merge_settings, system_settings
-from webui.utils import cache, file_utils
+# from webui.utils import cache, file_utils
 from app.utils import utils
 from app.models.schema import VideoClipParams, VideoAspect
@ -184,7 +184,7 @@ def render_generate_button():
        except Exception as e:
            logger.error(f"播放视频失败: {e}")
-        file_utils.open_task_folder(config.root_dir, task_id)
+        # file_utils.open_task_folder(config.root_dir, task_id)
        logger.info(tr("视频生成完成"))
--- a/webui/components/basic_settings.py
+++ b/webui/components/basic_settings.py
@ -64,25 +64,25 @@ def render_proxy_settings(tr):
    proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
    # 保存代理开关状态
-    config.proxy["enabled"] = proxy_enabled
+    # config.proxy["enabled"] = proxy_enabled
    # 只有在代理启用时才显示代理设置输入框
    if proxy_enabled:
        HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
        HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
-        if HTTP_PROXY:
+        if HTTP_PROXY and HTTPS_PROXY:
            config.proxy["http"] = HTTP_PROXY
            os.environ["HTTP_PROXY"] = HTTP_PROXY
        if HTTPS_PROXY:
            config.proxy["https"] = HTTPS_PROXY
            os.environ["HTTP_PROXY"] = HTTP_PROXY
            os.environ["HTTPS_PROXY"] = HTTPS_PROXY
            # logger.debug(f"代理已启用: {HTTP_PROXY}")
    else:
        # 当代理被禁用时，清除环境变量和配置
        os.environ.pop("HTTP_PROXY", None)
        os.environ.pop("HTTPS_PROXY", None)
-        config.proxy["http"] = ""
+        # config.proxy["http"] = ""
-        config.proxy["https"] = ""
+        # config.proxy["https"] = ""
 def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
@ -108,29 +108,6 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
            return True, tr("gemini model is available")
        except Exception as e:
            return False, f"{tr('gemini model is not available')}: {str(e)}"
    elif provider.lower() == 'qwenvl':
        from openai import OpenAI
        try:
            client = OpenAI(
                api_key=api_key,
                base_url=base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
            )
            # 发送一个简单的测试请求
            response = client.chat.completions.create(
                model=model_name or "qwen-vl-max-latest",
                messages=[{"role": "user", "content": "直接回复我文本'当前网络可用'"}]
            )
            if response and response.choices:
                return True, tr("QwenVL model is available")
            else:
                return False, tr("QwenVL model returned invalid response")
        except Exception as e:
            return False, f"{tr('QwenVL model is not available')}: {str(e)}"
    elif provider.lower() == 'narratoapi':
        import requests
        try:
@ -148,9 +125,46 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
                return False, f"{tr('NarratoAPI is not available')}: HTTP {response.status_code}"
        except Exception as e:
            return False, f"{tr('NarratoAPI is not available')}: {str(e)}"
-            
+
    else:
-        return False, f"{tr('Unsupported provider')}: {provider}"
+        from openai import OpenAI
        try:
            client = OpenAI(
                api_key=api_key,
                base_url=base_url,
            )
            response = client.chat.completions.create(
                model=model_name,
                messages=[
                    {
                        "role": "system",
                        "content": [{"type": "text", "text": "You are a helpful assistant."}],
                    },
                    {
                        "role": "user",
                        "content": [
                            {
                                "type": "image_url",
                                "image_url": {
                                    "url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241022/emyrja/dog_and_girl.jpeg"
                                },
                            },
                            {"type": "text", "text": "回复我网络可用即可"},
                        ],
                    },
                ],
            )
            if response and response.choices:
                return True, tr("QwenVL model is available")
            else:
                return False, tr("QwenVL model returned invalid response")
        except Exception as e:
            # logger.debug(api_key)
            # logger.debug(base_url)
            # logger.debug(model_name)
            return False, f"{tr('QwenVL model is not available')}: {str(e)}"
 def render_vision_llm_settings(tr):
@ -158,7 +172,7 @@ def render_vision_llm_settings(tr):
    st.subheader(tr("Vision Model Settings"))
    # 视频分析模型提供商选择
-    vision_providers = ['Gemini', 'QwenVL', 'NarratoAPI(待发布)']
+    vision_providers = ['Siliconflow', 'Gemini', 'QwenVL', 'OpenAI']
    saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
    saved_provider_index = 0
@ -194,8 +208,8 @@ def render_vision_llm_settings(tr):
        )
        st_vision_model_name = st.text_input(
            tr("Vision Model Name"), 
-            value=vision_model_name or "gemini-1.5-flash",
+            value=vision_model_name or "gemini-2.0-flash-lite",
-            help=tr("Default: gemini-1.5-flash")
+            help=tr("Default: gemini-2.0-flash-lite")
        )
    elif vision_provider == 'qwenvl':
        st_vision_base_url = st.text_input(
@ -261,52 +275,45 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
-        
+
        # 如果没有指定base_url，使用默认值
        if not base_url:
            if provider.lower() == 'openai':
                base_url = "https://api.openai.com/v1"
            elif provider.lower() == 'moonshot':
                base_url = "https://api.moonshot.cn/v1"
            elif provider.lower() == 'deepseek':
                base_url = "https://api.deepseek.com"
        # 构建测试URL
        test_url = f"{base_url.rstrip('/')}/chat/completions"
        # 特殊处理Gemini
        if provider.lower() == 'gemini':
            import google.generativeai as genai
            try:
                genai.configure(api_key=api_key)
-                model = genai.GenerativeModel(model_name or 'gemini-pro')
+                model = genai.GenerativeModel(model_name)
                model.generate_content("直接回复我文本'当前网络可用'")
                return True, tr("Gemini model is available")
            except Exception as e:
                return False, f"{tr('Gemini model is not available')}: {str(e)}"
        # 构建测试消息
        test_data = {
            "model": model_name,
            "messages": [
                {"role": "user", "content": "直接回复我文本'当前网络可用'"}
            ],
            "stream": False
        }
        # 发送测试请求
        response = requests.post(
            test_url,
            headers=headers,
            json=test_data,
        )
        if response.status_code == 200:
            return True, tr("Text model is available")
        else:
-            return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
+            test_url = f"{base_url.rstrip('/')}/chat/completions"
            # 构建测试消息
            test_data = {
                "model": model_name,
                "messages": [
                    {"role": "user", "content": "直接回复我文本'当前网络可用'"}
                ],
                "stream": False
            }
            # 发送测试请求
            response = requests.post(
                test_url,
                headers=headers,
                json=test_data,
            )
            # logger.debug(model_name)
            # logger.debug(api_key)
            # logger.debug(test_url)
            if response.status_code == 200:
                return True, tr("Text model is available")
            else:
                return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
    except Exception as e:
        logger.error(traceback.format_exc())
        return False, f"{tr('Connection failed')}: {str(e)}"
@ -315,8 +322,8 @@ def render_text_llm_settings(tr):
    st.subheader(tr("Text Generation Model Settings"))
    # 文案生成模型提供商选择
-    text_providers = ['DeepSeek', 'OpenAI', 'Siliconflow', 'Qwen', 'Moonshot', 'Gemini']
+    text_providers = ['OpenAI', 'Siliconflow', 'DeepSeek', 'Gemini', 'Qwen', 'Moonshot']
-    saved_text_provider = config.app.get("text_llm_provider", "DeepSeek").lower()
+    saved_text_provider = config.app.get("text_llm_provider", "OpenAI").lower()
    saved_provider_index = 0
    for i, provider in enumerate(text_providers):
@ -344,8 +351,6 @@ def render_text_llm_settings(tr):
    # 添加测试按钮
    if st.button(tr("Test Connection"), key="test_text_connection"):
        logger.debug(st_text_base_url)
        logger.debug(st_text_model_name)
        with st.spinner(tr("Testing connection...")):
            success, message = test_text_model_connection(
                api_key=st_text_api_key,
--- a/webui/tools/base.py
+++ b/webui/tools/base.py
@ -24,15 +24,13 @@ def create_vision_analyzer(provider, api_key, model, base_url):
    """
    if provider == 'gemini':
        return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key)
-    elif provider == 'qwenvl':
+    else:
        # 只传入必要的参数
        return qwenvl_analyzer.QwenAnalyzer(
            model_name=model, 
            api_key=api_key,
            base_url=base_url
        )
    else:
        raise ValueError(f"不支持的视觉分析提供商: {provider}")
 def get_batch_timestamps(batch_files, prev_batch_files=None):
--- a/webui/tools/generate_script_docu.py
+++ b/webui/tools/generate_script_docu.py
@ -4,16 +4,12 @@ import json
 import time
 import asyncio
 import traceback
 import requests
 from app.utils import video_processor
 import streamlit as st
 from loguru import logger
 from requests.adapters import HTTPAdapter
 from datetime import datetime
 from app.config import config
-from app.utils.script_generator import ScriptProcessor
+from app.utils import utils, video_processor
 from app.utils import utils, video_processor, qwenvl_analyzer
 from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config
@ -111,12 +107,10 @@ def generate_script_docu(params):
                    vision_api_key = st.session_state.get('vision_gemini_api_key')
                    vision_model = st.session_state.get('vision_gemini_model_name')
                    vision_base_url = st.session_state.get('vision_gemini_base_url')
                elif vision_llm_provider == 'qwenvl':
                    vision_api_key = st.session_state.get('vision_qwenvl_api_key')
                    vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
                    vision_base_url = st.session_state.get('vision_qwenvl_base_url')
                else:
-                    raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
+                    vision_api_key = st.session_state.get(f'vision_{vision_llm_provider}_api_key')
                    vision_model = st.session_state.get(f'vision_{vision_llm_provider}_model_name')
                    vision_base_url = st.session_state.get(f'vision_{vision_llm_provider}_base_url')
                # 创建视觉分析器实例
                analyzer = create_vision_analyzer(
@ -354,7 +348,6 @@ def generate_script_docu(params):
                # 整理帧分析数据
                markdown_output = parse_frame_analysis_to_markdown(analysis_json_path)
                # 生成文案
                # 生成解说文案
                narration = generate_narration(
                    markdown_output,