(webfeatui): 重构视觉分析功能并添加新模型支持

- 移除了对 QwenVL模型的特定逻辑，改为更通用的实现 - 添加了对 OpenAI 视觉模型的支持- 更新了视觉模型设置界面，增加了新的模型选项 - 重构了测试连接和创建分析器的代码，提高了可维护性 - 调整了配置文件结构，简化了视觉模型的配置
2026-07-23 22:48:20 +00:00 · 2025-05-09 12:03:27 +08:00 · 2025-05-09 12:03:27 +08:00 · 3fe8eb50c0
commit 3fe8eb50c0
parent afeeb7c516
6 changed files with 147 additions and 241 deletions
--- a/app/services/generate_narration_script.py
+++ b/app/services/generate_narration_script.py
@ -237,28 +237,28 @@ if __name__ == '__main__':
    video_frame_description_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1139.json"

    # 测试新的JSON文件
-    test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1458.json"
+    test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_2258.json"
    markdown_output = parse_frame_analysis_to_markdown(test_file_path)
    # print(markdown_output)
    
    # 输出到文件以便检查格式
-    output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/narration_script.md"
+    output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/家里家外1-5.md"
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write(markdown_output)
    # print(f"\n已将Markdown输出保存到: {output_file}")
    
-    # 生成解说文案
-    narration = generate_narration(
-        markdown_output, 
-        text_api_key,
-        base_url=text_base_url,
-        model=text_model
-    )
-    
-    # 保存解说文案
-    print(narration)
-    print(type(narration))
-    narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
-    with open(narration_file, 'w', encoding='utf-8') as f:
-        f.write(narration)
-    print(f"\n已将解说文案保存到: {narration_file}")
+    # # 生成解说文案
+    # narration = generate_narration(
+    #     markdown_output,
+    #     text_api_key,
+    #     base_url=text_base_url,
+    #     model=text_model
+    # )
+    #
+    # # 保存解说文案
+    # print(narration)
+    # print(type(narration))
+    # narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
+    # with open(narration_file, 'w', encoding='utf-8') as f:
+    #     f.write(narration)
+    # print(f"\n已将解说文案保存到: {narration_file}")
--- a/config.example.toml
+++ b/config.example.toml
@ -1,175 +1,85 @@
 [app]
    project_version="0.6.0"
    # 支持视频理解的大模型提供商
-    #   gemini
-    #   qwenvl
-    vision_llm_provider="qwenvl"
+    #   gemini  (谷歌, 需要 VPN)
+    #   siliconflow (硅基流动)
+    #   qwenvl  (通义千问)
+    vision_llm_provider="Siliconflow"

-    ########## Vision Gemini API Key
+    ########## Gemini 视觉模型
    vision_gemini_api_key = ""
-    vision_gemini_model_name = "gemini-2.0-flash"
+    vision_gemini_model_name = "gemini-2.0-flash-lite"

-    ########## Vision Qwen API Key (默认使用“硅基流动”的QwenVL模型)
+    ########## QwenVL 视觉模型
    vision_qwenvl_api_key = ""
-    vision_qwenvl_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
-    vision_qwenvl_base_url = "https://api.siliconflow.cn/v1"
+    vision_qwenvl_model_name = "qwen2.5-vl-32b-instruct"
+    vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"

-    ########### Vision NarratoAPI Key
+    ########## siliconflow 视觉模型
+    vision_siliconflow_api_key = ""
+    vision_siliconflow_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
+    vision_siliconflow_base_url = "https://api.siliconflow.cn/v1"
+
+    ########## OpenAI 视觉模型
+    vision_openai_api_key = ""
+    vision_openai_model_name = "gpt-4.1-nano-2025-04-14"
+    vision_openai_base_url = "https://api.openai.com/v1"
+
+    ########### NarratoAPI 微调模型 (未发布)
    narrato_api_key = "ggyY91BAO-_ULvAqKum3XexcyN1G3dP86DEzvjZDcrg"
    narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
-    narrato_vision_model = "gemini-1.5-flash"
-    narrato_vision_key = ""
-    narrato_llm_model = "gpt-4o"
-    narrato_llm_key = ""
+    narrato_model = "narra-1.0-2025-05-09"

    # 用于生成文案的大模型支持的提供商 (Supported providers):
-    #   openai (默认)
-    #   deepseek (默认使用“硅基流动”的模型)
-    #   moonshot (月之暗面)
+    #   openai (默认, 需要 VPN)
+    #   siliconflow (硅基流动)
+    #   deepseek (深度求索)
+    #   gemini (谷歌, 需要 VPN)
    #   qwen (通义千问)
-    #   gemini
-    text_llm_provider="deepseek"
+    #   moonshot (月之暗面)
+    text_llm_provider="openai"

    ########## OpenAI API Key
    # Get your API key at https://platform.openai.com/api-keys
    text_openai_api_key = ""
    text_openai_base_url = "https://api.openai.com/v1"
-    text_openai_model_name = "gpt-4o-mini"
+    text_openai_model_name = "gpt-4.1-mini-2025-04-14"
+
+    # 使用 硅基流动 第三方 API Key，使用手机号注册：https://cloud.siliconflow.cn/i/pyOKqFCV
+    # 访问 https://cloud.siliconflow.cn/account/ak 获取你的 API 密钥
+    text_siliconflow_api_key = ""
+    text_siliconflow_base_url = "https://api.siliconflow.cn/v1"
+    text_siliconflow_model_name = "deepseek-ai/DeepSeek-R1"

    ########## DeepSeek API Key
-    # 使用 硅基流动 第三方 API Key，使用手机号注册：https://cloud.siliconflow.cn/i/pyOKqFCV
+    # 访问 https://platform.deepseek.com/api_keys 获取你的 API 密钥
    text_deepseek_api_key = ""
-    text_deepseek_base_url = "https://api.siliconflow.cn/v1"
-    text_deepseek_model_name = "deepseek-ai/DeepSeek-V3"
-
-    ########## Moonshot API Key
-    # Visit https://platform.moonshot.cn/console/api-keys to get your API key.
-    text_moonshot_api_key=""
-    text_moonshot_base_url = "https://api.moonshot.cn/v1"
-    text_moonshot_model_name = "moonshot-v1-8k"
-
-    ########## G4F
-    # Visit https://github.com/xtekky/gpt4free to get more details
-    # Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
-    text_g4f_model_name = "gpt-3.5-turbo"
-
-    ########## Azure API Key
-    # Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
-    # API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
-    text_azure_api_key = ""
-    text_azure_base_url=""
-    text_azure_model_name="gpt-35-turbo" # replace with your model deployment name
-    text_azure_api_version = "2024-02-15-preview"
+    text_deepseek_base_url = "https://api.deepseek.com"
+    text_deepseek_model_name = "deepseek-chat"

    ########## Gemini API Key
    text_gemini_api_key=""
-    text_gemini_model_name = "gemini-1.5-flash"
+    text_gemini_model_name = "gemini-2.0-flash"

    ########## Qwen API Key
-    # Visit https://dashscope.console.aliyun.com/apiKey to get your API key
-    # Visit below links to get more details
-    # https://tongyi.aliyun.com/qianwen/
-    # https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
+    # 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
    text_qwen_api_key = ""
    text_qwen_model_name = "qwen-plus-1127"
    text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"

-
-    # 字幕提供商、可选，支持 whisper 和 faster-whisper-large-v2"whisper"
-    # 默认为 faster-whisper-large-v2 模型地址：https://huggingface.co/guillaumekln/faster-whisper-large-v2
-    subtitle_provider = "faster-whisper-large-v2"
-    subtitle_enabled = true
-
-    # ImageMagick
-    # 安装后，将自动检测到 ImageMagick，Windows 除外！
-    # 例如，在 Windows 上 "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
-    # 下载位置 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
-    # imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
-
-    # FFMPEG
-    #
-    # 通常情况下，ffmpeg 会被自动下载，并且会被自动检测到。
-    # 但是如果你的环境有问题，无法自动下载，可能会遇到如下错误：
-    #   RuntimeError: No ffmpeg exe could be found.
-    #   Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
-    # 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path，下载地址：https://www.gyan.dev/ffmpeg/builds/
-
-    # ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
-    #########################################################################################
-
-    # 当视频生成成功后，API服务提供的视频下载接入点，默认为当前服务的地址和监听端口
-    # 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
-    # 如果你需要使用域名对外提供服务（一般会用nginx做代理），则可以设置为你的域名
-    # 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
-    # endpoint="https://xxxx.com"
-
-    # When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
-    # For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
-    # If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
-    # For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
-    # endpoint="https://xxxx.com"
-    endpoint=""
-
-
-    # Video material storage location
-    # material_directory = ""                    # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
-    # material_directory = "/user/harry/videos"  # Indicates that video materials will be downloaded to a specified folder
-    # material_directory = "task"                # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials
-
-    # 视频素材存放位置
-    # material_directory = ""                    #表示将视频素材下载到默认的文件夹，默认文件夹为当前项目下的 ./storage/cache_videos
-    # material_directory = "/user/harry/videos"  #表示将视频素材下载到指定的文件夹中
-    # material_directory = "task"                #表示将视频素材下载到当前任务的文件夹中，这种方式无法共享已经下载的视频素材
-
-    material_directory = ""
-
-    # 用于任务的状态管理
-    enable_redis = false
-    redis_host = "localhost"
-    redis_port = 6379
-    redis_db = 0
-    redis_password = ""
-
-    # 文生视频时的最大并发任务数
-    max_concurrent_tasks = 5
+    ########## Moonshot API Key
+    # 访问 https://platform.moonshot.cn/console/api-keys 获取你的 API 密钥
+    text_moonshot_api_key=""
+    text_moonshot_base_url = "https://api.moonshot.cn/v1"
+    text_moonshot_model_name = "moonshot-v1-8k"

    # webui界面是否显示配置项
-    hide_config = false
-
-
-[whisper]
-    # Only effective when subtitle_provider is "whisper"
-
-    # Run on GPU with FP16
-    # model = WhisperModel(model_size, device="cuda", compute_type="float16")
-
-    # Run on GPU with INT8
-    # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
-
-    # Run on CPU with INT8
-    # model = WhisperModel(model_size, device="cpu", compute_type="int8")
-
-    # recommended model_size: "large-v3"
-    model_size="faster-whisper-large-v2"
-    # 如果要使用 GPU，请设置 device=“cuda”
-    device="CPU"
-    compute_type="int8"
-
+    hide_config = true

 [proxy]
-    ### Use a proxy to access the Pexels API
-    ### Format: "http://<username>:<password>@<proxy>:<port>"
-    ### Example: "http://user:pass@proxy:1234"
-    ### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
-
    http = "http://127.0.0.1:7890"
    https = "http://127.0.0.1:7890"
-
-[azure]
-    # Azure Speech API Key
-    # Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
-    speech_key=""
-    speech_region=""
+    enabled = false

 [frames]
    # 提取关键帧的间隔时间
--- a/webui.py
+++ b/webui.py
@ -5,7 +5,7 @@ from loguru import logger
 from app.config import config
 from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
    review_settings, merge_settings, system_settings
-from webui.utils import cache, file_utils
+# from webui.utils import cache, file_utils
 from app.utils import utils
 from app.models.schema import VideoClipParams, VideoAspect

@ -184,7 +184,7 @@ def render_generate_button():
        except Exception as e:
            logger.error(f"播放视频失败: {e}")

-        file_utils.open_task_folder(config.root_dir, task_id)
+        # file_utils.open_task_folder(config.root_dir, task_id)
        logger.info(tr("视频生成完成"))


--- a/webui/components/basic_settings.py
+++ b/webui/components/basic_settings.py
@ -64,25 +64,25 @@ def render_proxy_settings(tr):
    proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
    
    # 保存代理开关状态
-    config.proxy["enabled"] = proxy_enabled
+    # config.proxy["enabled"] = proxy_enabled

    # 只有在代理启用时才显示代理设置输入框
    if proxy_enabled:
        HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
        HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)

-        if HTTP_PROXY:
+        if HTTP_PROXY and HTTPS_PROXY:
            config.proxy["http"] = HTTP_PROXY
-            os.environ["HTTP_PROXY"] = HTTP_PROXY
-        if HTTPS_PROXY:
            config.proxy["https"] = HTTPS_PROXY
+            os.environ["HTTP_PROXY"] = HTTP_PROXY
            os.environ["HTTPS_PROXY"] = HTTPS_PROXY
+            # logger.debug(f"代理已启用: {HTTP_PROXY}")
    else:
        # 当代理被禁用时，清除环境变量和配置
        os.environ.pop("HTTP_PROXY", None)
        os.environ.pop("HTTPS_PROXY", None)
-        config.proxy["http"] = ""
-        config.proxy["https"] = ""
+        # config.proxy["http"] = ""
+        # config.proxy["https"] = ""


 def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
@ -108,29 +108,6 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
            return True, tr("gemini model is available")
        except Exception as e:
            return False, f"{tr('gemini model is not available')}: {str(e)}"
-
-    elif provider.lower() == 'qwenvl':
-        from openai import OpenAI
-        try:
-            client = OpenAI(
-                api_key=api_key,
-                base_url=base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
-            )
-            
-            # 发送一个简单的测试请求
-            response = client.chat.completions.create(
-                model=model_name or "qwen-vl-max-latest",
-                messages=[{"role": "user", "content": "直接回复我文本'当前网络可用'"}]
-            )
-            
-            if response and response.choices:
-                return True, tr("QwenVL model is available")
-            else:
-                return False, tr("QwenVL model returned invalid response")
-                
-        except Exception as e:
-            return False, f"{tr('QwenVL model is not available')}: {str(e)}"
-            
    elif provider.lower() == 'narratoapi':
        import requests
        try:
@ -148,9 +125,46 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
                return False, f"{tr('NarratoAPI is not available')}: HTTP {response.status_code}"
        except Exception as e:
            return False, f"{tr('NarratoAPI is not available')}: {str(e)}"
-            
+
    else:
-        return False, f"{tr('Unsupported provider')}: {provider}"
+        from openai import OpenAI
+        try:
+            client = OpenAI(
+                api_key=api_key,
+                base_url=base_url,
+            )
+
+            response = client.chat.completions.create(
+                model=model_name,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": [{"type": "text", "text": "You are a helpful assistant."}],
+                    },
+                    {
+                        "role": "user",
+                        "content": [
+                            {
+                                "type": "image_url",
+                                "image_url": {
+                                    "url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241022/emyrja/dog_and_girl.jpeg"
+                                },
+                            },
+                            {"type": "text", "text": "回复我网络可用即可"},
+                        ],
+                    },
+                ],
+            )
+            if response and response.choices:
+                return True, tr("QwenVL model is available")
+            else:
+                return False, tr("QwenVL model returned invalid response")
+
+        except Exception as e:
+            # logger.debug(api_key)
+            # logger.debug(base_url)
+            # logger.debug(model_name)
+            return False, f"{tr('QwenVL model is not available')}: {str(e)}"


 def render_vision_llm_settings(tr):
@ -158,7 +172,7 @@ def render_vision_llm_settings(tr):
    st.subheader(tr("Vision Model Settings"))

    # 视频分析模型提供商选择
-    vision_providers = ['Gemini', 'QwenVL', 'NarratoAPI(待发布)']
+    vision_providers = ['Siliconflow', 'Gemini', 'QwenVL', 'OpenAI']
    saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
    saved_provider_index = 0

@ -194,8 +208,8 @@ def render_vision_llm_settings(tr):
        )
        st_vision_model_name = st.text_input(
            tr("Vision Model Name"), 
-            value=vision_model_name or "gemini-1.5-flash",
-            help=tr("Default: gemini-1.5-flash")
+            value=vision_model_name or "gemini-2.0-flash-lite",
+            help=tr("Default: gemini-2.0-flash-lite")
        )
    elif vision_provider == 'qwenvl':
        st_vision_base_url = st.text_input(
@ -261,52 +275,45 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
            "Authorization": f"Bearer {api_key}",
            "Content-Type": "application/json"
        }
-        
-        # 如果没有指定base_url，使用默认值
-        if not base_url:
-            if provider.lower() == 'openai':
-                base_url = "https://api.openai.com/v1"
-            elif provider.lower() == 'moonshot':
-                base_url = "https://api.moonshot.cn/v1"
-            elif provider.lower() == 'deepseek':
-                base_url = "https://api.deepseek.com"
-                
-        # 构建测试URL
-        test_url = f"{base_url.rstrip('/')}/chat/completions"
-        
+
        # 特殊处理Gemini
        if provider.lower() == 'gemini':
            import google.generativeai as genai
            try:
                genai.configure(api_key=api_key)
-                model = genai.GenerativeModel(model_name or 'gemini-pro')
+                model = genai.GenerativeModel(model_name)
                model.generate_content("直接回复我文本'当前网络可用'")
                return True, tr("Gemini model is available")
            except Exception as e:
                return False, f"{tr('Gemini model is not available')}: {str(e)}"
-        
-        # 构建测试消息
-        test_data = {
-            "model": model_name,
-            "messages": [
-                {"role": "user", "content": "直接回复我文本'当前网络可用'"}
-            ],
-            "stream": False
-        }
-        
-        # 发送测试请求
-        response = requests.post(
-            test_url,
-            headers=headers,
-            json=test_data,
-        )
-        
-        if response.status_code == 200:
-            return True, tr("Text model is available")
        else:
-            return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
+            test_url = f"{base_url.rstrip('/')}/chat/completions"
+
+            # 构建测试消息
+            test_data = {
+                "model": model_name,
+                "messages": [
+                    {"role": "user", "content": "直接回复我文本'当前网络可用'"}
+                ],
+                "stream": False
+            }
+
+            # 发送测试请求
+            response = requests.post(
+                test_url,
+                headers=headers,
+                json=test_data,
+            )
+            # logger.debug(model_name)
+            # logger.debug(api_key)
+            # logger.debug(test_url)
+            if response.status_code == 200:
+                return True, tr("Text model is available")
+            else:
+                return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
            
    except Exception as e:
+        logger.error(traceback.format_exc())
        return False, f"{tr('Connection failed')}: {str(e)}"


@ -315,8 +322,8 @@ def render_text_llm_settings(tr):
    st.subheader(tr("Text Generation Model Settings"))

    # 文案生成模型提供商选择
-    text_providers = ['DeepSeek', 'OpenAI', 'Siliconflow', 'Qwen', 'Moonshot', 'Gemini']
-    saved_text_provider = config.app.get("text_llm_provider", "DeepSeek").lower()
+    text_providers = ['OpenAI', 'Siliconflow', 'DeepSeek', 'Gemini', 'Qwen', 'Moonshot']
+    saved_text_provider = config.app.get("text_llm_provider", "OpenAI").lower()
    saved_provider_index = 0

    for i, provider in enumerate(text_providers):
@ -344,8 +351,6 @@ def render_text_llm_settings(tr):

    # 添加测试按钮
    if st.button(tr("Test Connection"), key="test_text_connection"):
-        logger.debug(st_text_base_url)
-        logger.debug(st_text_model_name)
        with st.spinner(tr("Testing connection...")):
            success, message = test_text_model_connection(
                api_key=st_text_api_key,
--- a/webui/tools/base.py
+++ b/webui/tools/base.py
@ -24,15 +24,13 @@ def create_vision_analyzer(provider, api_key, model, base_url):
    """
    if provider == 'gemini':
        return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key)
-    elif provider == 'qwenvl':
+    else:
        # 只传入必要的参数
        return qwenvl_analyzer.QwenAnalyzer(
            model_name=model, 
            api_key=api_key,
            base_url=base_url
        )
-    else:
-        raise ValueError(f"不支持的视觉分析提供商: {provider}")


 def get_batch_timestamps(batch_files, prev_batch_files=None):
--- a/webui/tools/generate_script_docu.py
+++ b/webui/tools/generate_script_docu.py
@ -4,16 +4,12 @@ import json
 import time
 import asyncio
 import traceback
-import requests
-from app.utils import video_processor
 import streamlit as st
 from loguru import logger
-from requests.adapters import HTTPAdapter
 from datetime import datetime

 from app.config import config
-from app.utils.script_generator import ScriptProcessor
-from app.utils import utils, video_processor, qwenvl_analyzer
+from app.utils import utils, video_processor
 from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config


@ -111,12 +107,10 @@ def generate_script_docu(params):
                    vision_api_key = st.session_state.get('vision_gemini_api_key')
                    vision_model = st.session_state.get('vision_gemini_model_name')
                    vision_base_url = st.session_state.get('vision_gemini_base_url')
-                elif vision_llm_provider == 'qwenvl':
-                    vision_api_key = st.session_state.get('vision_qwenvl_api_key')
-                    vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
-                    vision_base_url = st.session_state.get('vision_qwenvl_base_url')
                else:
-                    raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
+                    vision_api_key = st.session_state.get(f'vision_{vision_llm_provider}_api_key')
+                    vision_model = st.session_state.get(f'vision_{vision_llm_provider}_model_name')
+                    vision_base_url = st.session_state.get(f'vision_{vision_llm_provider}_base_url')

                # 创建视觉分析器实例
                analyzer = create_vision_analyzer(
@ -354,7 +348,6 @@ def generate_script_docu(params):
                # 整理帧分析数据
                markdown_output = parse_frame_analysis_to_markdown(analysis_json_path)

-                # 生成文案
                # 生成解说文案
                narration = generate_narration(
                    markdown_output,