更新配置文件和Web界面，支持自定义短剧混剪片段

- 将项目版本更新至0.5.3 - 修改视觉大模型提供商为qwenvl，并更新相关API密钥和模型名称 - 新增DeepSeek API支持，更新文本生成模型设置 - 在Web界面中添加短视频生成模式的选项，调整脚本设置以支持自定义片段数量 - 修改代理设置的默认值为False
2026-07-23 06:28:25 +00:00 · 2025-04-13 14:07:35 +08:00 · 2025-04-13 14:07:35 +08:00 · bc236cd195
commit bc236cd195
parent a9b71f48dd
13 changed files with 94 additions and 55 deletions
--- a/app/services/SDP/generate_script_short.pyd
+++ b/app/services/SDP/generate_script_short.pyd
--- a/app/services/SDP/utils/short_schema.pyd
+++ b/app/services/SDP/utils/short_schema.pyd
--- a/app/services/SDP/utils/step1_subtitle_analyzer_openai.pyd
+++ b/app/services/SDP/utils/step1_subtitle_analyzer_openai.pyd
--- a/app/services/SDP/utils/step2_subtitle_analyzer_bert.pyd
+++ b/app/services/SDP/utils/step2_subtitle_analyzer_bert.pyd
--- a/app/services/SDP/utils/step3_fragment_check.pyd
+++ b/app/services/SDP/utils/step3_fragment_check.pyd
--- a/app/services/SDP/utils/step4_text_generate.pyd
+++ b/app/services/SDP/utils/step4_text_generate.pyd
--- a/app/services/SDP/utils/step5_merge_script.pyd
+++ b/app/services/SDP/utils/step5_merge_script.pyd
--- a/app/services/SDP/utils/utils.pyd
+++ b/app/services/SDP/utils/utils.pyd
--- a/config.example.toml
+++ b/config.example.toml
@ -1,20 +1,19 @@
 [app]
-    project_version="0.5.2"
+    project_version="0.5.3"
    # 支持视频理解的大模型提供商
    #   gemini
-    #   NarratoAPI
-    #   qwen2-vl (待增加)
-    vision_llm_provider="gemini"
+    #   qwenvl
+    vision_llm_provider="qwenvl"
    vision_analysis_prompt = "你是资深视频内容分析专家，擅长分析视频画面信息，分析下面视频画面内容，只输出客观的画面描述不要给任何总结或评价"

    ########## Vision Gemini API Key
    vision_gemini_api_key = ""
-    vision_gemini_model_name = "gemini-1.5-flash"
+    vision_gemini_model_name = "gemini-2.0-flash"

-    ########## Vision Qwen API Key
+    ########## Vision Qwen API Key (默认使用“硅基流动”的QwenVL模型)
    vision_qwenvl_api_key = ""
-    vision_qwenvl_model_name = "qwen-vl-max-latest"
-    vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
+    vision_qwenvl_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
+    vision_qwenvl_base_url = "https://api.siliconflow.cn/v1"

    ########### Vision NarratoAPI Key
    narrato_api_key = "ggyY91BAO-_ULvAqKum3XexcyN1G3dP86DEzvjZDcrg"
@ -26,13 +25,11 @@

    # 用于生成文案的大模型支持的提供商 (Supported providers):
    #   openai (默认)
+    #   deepseek (默认使用“硅基流动”的模型)
    #   moonshot (月之暗面)
-    #   oneapi
-    #   g4f
-    #   azure
    #   qwen (通义千问)
    #   gemini
-    text_llm_provider="openai"
+    text_llm_provider="deepseek"

    ########## OpenAI API Key
    # Get your API key at https://platform.openai.com/api-keys
@ -40,6 +37,12 @@
    text_openai_base_url = "https://api.openai.com/v1"
    text_openai_model_name = "gpt-4o-mini"

+    ########## DeepSeek API Key
+    # 使用 硅基流动 第三方 API Key，使用手机号注册：https://cloud.siliconflow.cn/i/pyOKqFCV
+    text_deepseek_api_key = ""
+    text_deepseek_base_url = "https://api.siliconflow.cn/v1"
+    text_deepseek_model_name = "deepseek-ai/DeepSeek-V3"
+
    ########## Moonshot API Key
    # Visit https://platform.moonshot.cn/console/api-keys to get your API key.
    text_moonshot_api_key=""
@ -72,11 +75,6 @@
    text_qwen_model_name = "qwen-plus-1127"
    text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"

-    ########## DeepSeek API Key
-    # 使用 硅基流动 第三方 API Key，使用手机号注册：https://cloud.siliconflow.cn/i/pyOKqFCV
-    text_deepseek_api_key = ""
-    text_deepseek_base_url = "https://api.siliconflow.cn/v1"
-    text_deepseek_model_name = "deepseek-ai/DeepSeek-V3"

    # 字幕提供商、可选，支持 whisper 和 faster-whisper-large-v2"whisper"
    # 默认为 faster-whisper-large-v2 模型地址：https://huggingface.co/guillaumekln/faster-whisper-large-v2
--- a/webui.txt
+++ b/webui.txt
@ -48,6 +48,8 @@ pause
 rem set HF_ENDPOINT=https://hf-mirror.com
 streamlit run webui.py --browser.serverAddress="127.0.0.1" --server.enableCORS=True  --server.maxUploadSize=2048 --browser.gatherUsageStats=False

+streamlit run webui.py --server.maxUploadSize=2048
+
 请求0：
 curl -X 'POST' \
  'http://127.0.0.1:8080/api/v2/youtube/download' \
--- a/webui/components/basic_settings.py
+++ b/webui/components/basic_settings.py
@ -53,7 +53,7 @@ def render_language_settings(tr):
 def render_proxy_settings(tr):
    """渲染代理设置"""
    # 获取当前代理状态
-    proxy_enabled = config.proxy.get("enabled", True)
+    proxy_enabled = config.proxy.get("enabled", False)
    proxy_url_http = config.proxy.get("http")
    proxy_url_https = config.proxy.get("https")

--- a/webui/components/script_settings.py
+++ b/webui/components/script_settings.py
@ -25,8 +25,17 @@ def render_script_panel(tr):
        # 渲染视频文件选择
        render_video_file(tr, params)

-        # 渲染视频主题和提示词
-        render_video_details(tr)
+        # 获取当前选择的脚本类型
+        script_path = st.session_state.get('video_clip_json_path', '')
+        
+        # 根据脚本类型显示不同的布局
+        if script_path == "short":
+            # Short Generate模式下显示的内容
+            render_short_generate_options(tr)
+        else:
+            # 其他模式下保持原有布局
+            # 渲染视频主题和提示词
+            render_video_details(tr)

        # 渲染脚本操作按钮
        render_script_buttons(tr, params)
@ -166,6 +175,23 @@ def render_video_file(tr, params):
                st.rerun()


+def render_short_generate_options(tr):
+    """
+    渲染Short Generate模式下的特殊选项
+    在Short Generate模式下，替换原有的输入框为自定义片段选项
+    """
+    # 显示自定义片段数量选择器
+    custom_clips = st.number_input(
+        tr("自定义片段"),
+        min_value=1,
+        max_value=20,
+        value=st.session_state.get('custom_clips', 5),
+        help=tr("设置需要生成的短视频片段数量"),
+        key="custom_clips_input"
+    )
+    st.session_state['custom_clips'] = custom_clips
+
+
 def render_video_details(tr):
    """渲染视频主题和提示词"""
    video_theme = st.text_input(tr("Video Theme"))
@ -182,42 +208,46 @@ def render_video_details(tr):

 def render_script_buttons(tr, params):
    """渲染脚本操作按钮"""
-    # 新增三个输入框，放在同一行
-    input_cols = st.columns(3)
+    # 获取当前选择的脚本类型
+    script_path = st.session_state.get('video_clip_json_path', '')
    
-    with input_cols[0]:
-        skip_seconds = st.number_input(
-            "skip_seconds",
-            min_value=0,
-            value=st.session_state.get('skip_seconds', config.frames.get('skip_seconds', 0)),
-            help=tr("Skip the first few seconds"),
-            key="skip_seconds_input"
-        )
-        st.session_state['skip_seconds'] = skip_seconds
+    # 根据脚本类型显示不同的设置
+    if script_path != "short":
+        # 非短视频模式下显示原有的三个输入框
+        input_cols = st.columns(3)
        
-    with input_cols[1]:
-        threshold = st.number_input(
-            "threshold",
-            min_value=0,
-            value=st.session_state.get('threshold', config.frames.get('threshold', 30)),
-            help=tr("Difference threshold"),
-            key="threshold_input"
-        )
-        st.session_state['threshold'] = threshold
-        
-    with input_cols[2]:
-        vision_batch_size = st.number_input(
-            "vision_batch_size",
-            min_value=1,
-            max_value=20,
-            value=st.session_state.get('vision_batch_size', config.frames.get('vision_batch_size', 5)),
-            help=tr("Vision processing batch size"),
-            key="vision_batch_size_input"
-        )
-        st.session_state['vision_batch_size'] = vision_batch_size
+        with input_cols[0]:
+            skip_seconds = st.number_input(
+                "skip_seconds",
+                min_value=0,
+                value=st.session_state.get('skip_seconds', config.frames.get('skip_seconds', 0)),
+                help=tr("Skip the first few seconds"),
+                key="skip_seconds_input"
+            )
+            st.session_state['skip_seconds'] = skip_seconds
+            
+        with input_cols[1]:
+            threshold = st.number_input(
+                "threshold",
+                min_value=0,
+                value=st.session_state.get('threshold', config.frames.get('threshold', 30)),
+                help=tr("Difference threshold"),
+                key="threshold_input"
+            )
+            st.session_state['threshold'] = threshold
+            
+        with input_cols[2]:
+            vision_batch_size = st.number_input(
+                "vision_batch_size",
+                min_value=1,
+                max_value=20,
+                value=st.session_state.get('vision_batch_size', config.frames.get('vision_batch_size', 5)),
+                help=tr("Vision processing batch size"),
+                key="vision_batch_size_input"
+            )
+            st.session_state['vision_batch_size'] = vision_batch_size

    # 生成/加载按钮
-    script_path = st.session_state.get('video_clip_json_path', '')
    if script_path == "auto":
        button_name = tr("Generate Video Script")
    elif script_path == "short":
@ -231,7 +261,10 @@ def render_script_buttons(tr, params):
        if script_path == "auto":
            generate_script_docu(tr, params)
        elif script_path == "short":
-            generate_script_short(tr, params)
+            # 获取自定义片段数量参数
+            custom_clips = st.session_state.get('custom_clips', 5)
+            # 直接将custom_clips作为参数传递，而不是通过params对象
+            generate_script_short(tr, params, custom_clips)
        else:
            load_script(tr, script_path)

--- a/webui/tools/generate_script_short.py
+++ b/webui/tools/generate_script_short.py
@ -11,9 +11,14 @@ from app.config import config
 from webui.tools.base import chekc_video_config


-def generate_script_short(tr, params):
+def generate_script_short(tr, params, custom_clips=5):
    """
-    生成 纪录片 视频脚本
+    生成短视频脚本
+    
+    Args:
+        tr: 翻译函数
+        params: 视频参数对象
+        custom_clips: 自定义片段数量，默认为5
    """
    progress_bar = st.progress(0)
    status_text = st.empty()
@ -62,6 +67,7 @@ def generate_script_short(tr, params):
                base_url=text_base_url,
                narrato_api_key=narrato_api_key,
                bert_path="app/models/bert/",
+                custom_clips=custom_clips,
            )

            if script is None: