From 07c3d540c56a9307c3d7159f6ac3db95fa9e6747 Mon Sep 17 00:00:00 2001 From: linyq Date: Mon, 18 Nov 2024 11:55:11 +0800 Subject: [PATCH 1/2] =?UTF-8?q?feat(webui):=20=E6=B7=BB=E5=8A=A0=E8=A7=86?= =?UTF-8?q?=E8=A7=89=E6=A8=A1=E5=9E=8B=E8=BF=9E=E6=8E=A5=E6=B5=8B=E8=AF=95?= =?UTF-8?q?=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 test_vision_model_connection 函数,用于测试视觉模型连接 - 在视觉模型设置界面添加测试连接按钮 - 实现对 Gemini 和 NarratoAPI 两种提供商的连接测试 - 优化界面布局,注释掉部分冗余代码 --- webui/components/basic_settings.py | 209 +++++++++++++++++++---------- 1 file changed, 135 insertions(+), 74 deletions(-) diff --git a/webui/components/basic_settings.py b/webui/components/basic_settings.py index e5fa8f7..960587a 100644 --- a/webui/components/basic_settings.py +++ b/webui/components/basic_settings.py @@ -66,6 +66,51 @@ def render_proxy_settings(tr): os.environ["HTTPS_PROXY"] = HTTPS_PROXY +def test_vision_model_connection(api_key, base_url, model_name, provider, tr): + """测试视觉模型连接 + + Args: + api_key: API密钥 + base_url: 基础URL + model_name: 模型名称 + provider: 提供商名称 + + Returns: + bool: 连接是否成功 + str: 测试结果消息 + """ + if provider.lower() == 'gemini': + import google.generativeai as genai + + try: + genai.configure(api_key=api_key) + model = genai.GenerativeModel(model_name) + model.generate_content("直接回复我文本'当前网络可用'") + return True, tr("gemini model is available") + except Exception as e: + return False, f"{tr('gemini model is not available')}: {str(e)}" + + elif provider.lower() == 'narratoapi': + import requests + try: + # 构建测试请求 + headers = { + "Authorization": f"Bearer {api_key}" + } + + test_url = f"{base_url.rstrip('/')}/health" + response = requests.get(test_url, headers=headers, timeout=10) + + if response.status_code == 200: + return True, tr("NarratoAPI is available") + else: + return False, f"{tr('NarratoAPI is not available')}: HTTP {response.status_code}" + except Exception as e: + return False, f"{tr('NarratoAPI is not available')}: {str(e)}" + + else: + return False, f"{tr('Unsupported provider')}: {provider}" + def render_vision_llm_settings(tr): """渲染视频分析模型设置""" st.subheader(tr("Vision Model Settings")) @@ -99,6 +144,22 @@ def render_vision_llm_settings(tr): st_vision_base_url = st.text_input(tr("Vision Base URL"), value=vision_base_url) st_vision_model_name = st.text_input(tr("Vision Model Name"), value=vision_model_name) + # 在配置输入框后添加测试按钮 + if st.button(tr("Test Connection"), key="test_vision_connection"): + with st.spinner(tr("Testing connection...")): + success, message = test_vision_model_connection( + api_key=st_vision_api_key, + base_url=st_vision_base_url, + model_name=st_vision_model_name, + provider=vision_provider, + tr=tr + ) + + if success: + st.success(tr(message)) + else: + st.error(tr(message)) + # 保存视觉模型配置 if st_vision_api_key: config.app[f"vision_{vision_provider}_api_key"] = st_vision_api_key @@ -110,80 +171,80 @@ def render_vision_llm_settings(tr): config.app[f"vision_{vision_provider}_model_name"] = st_vision_model_name st.session_state[f"vision_{vision_provider}_model_name"] = st_vision_model_name - # NarratoAPI 特殊配置 - if vision_provider == 'narratoapi': - st.subheader(tr("Narrato Additional Settings")) - - # Narrato API 基础配置 - narrato_api_key = st.text_input( - tr("Narrato API Key"), - value=config.app.get("narrato_api_key", ""), - type="password", - help="用于访问 Narrato API 的密钥" - ) - if narrato_api_key: - config.app["narrato_api_key"] = narrato_api_key - st.session_state['narrato_api_key'] = narrato_api_key - - narrato_api_url = st.text_input( - tr("Narrato API URL"), - value=config.app.get("narrato_api_url", "http://127.0.0.1:8000/api/v1/video/analyze") - ) - if narrato_api_url: - config.app["narrato_api_url"] = narrato_api_url - st.session_state['narrato_api_url'] = narrato_api_url - - # 视频分析模型配置 - st.markdown("##### " + tr("Vision Model Settings")) - narrato_vision_model = st.text_input( - tr("Vision Model Name"), - value=config.app.get("narrato_vision_model", "gemini-1.5-flash") - ) - narrato_vision_key = st.text_input( - tr("Vision Model API Key"), - value=config.app.get("narrato_vision_key", ""), - type="password", - help="用于视频分析的模型 API Key" - ) - - if narrato_vision_model: - config.app["narrato_vision_model"] = narrato_vision_model - st.session_state['narrato_vision_model'] = narrato_vision_model - if narrato_vision_key: - config.app["narrato_vision_key"] = narrato_vision_key - st.session_state['narrato_vision_key'] = narrato_vision_key - - # 文案生成模型配置 - st.markdown("##### " + tr("Text Generation Model Settings")) - narrato_llm_model = st.text_input( - tr("LLM Model Name"), - value=config.app.get("narrato_llm_model", "qwen-plus") - ) - narrato_llm_key = st.text_input( - tr("LLM Model API Key"), - value=config.app.get("narrato_llm_key", ""), - type="password", - help="用于文案生成的模型 API Key" - ) - - if narrato_llm_model: - config.app["narrato_llm_model"] = narrato_llm_model - st.session_state['narrato_llm_model'] = narrato_llm_model - if narrato_llm_key: - config.app["narrato_llm_key"] = narrato_llm_key - st.session_state['narrato_llm_key'] = narrato_llm_key - - # 批处理配置 - narrato_batch_size = st.number_input( - tr("Batch Size"), - min_value=1, - max_value=50, - value=config.app.get("narrato_batch_size", 10), - help="每批处理的图片数量" - ) - if narrato_batch_size: - config.app["narrato_batch_size"] = narrato_batch_size - st.session_state['narrato_batch_size'] = narrato_batch_size + # # NarratoAPI 特殊配置 + # if vision_provider == 'narratoapi': + # st.subheader(tr("Narrato Additional Settings")) + # + # # Narrato API 基础配置 + # narrato_api_key = st.text_input( + # tr("Narrato API Key"), + # value=config.app.get("narrato_api_key", ""), + # type="password", + # help="用于访问 Narrato API 的密钥" + # ) + # if narrato_api_key: + # config.app["narrato_api_key"] = narrato_api_key + # st.session_state['narrato_api_key'] = narrato_api_key + # + # narrato_api_url = st.text_input( + # tr("Narrato API URL"), + # value=config.app.get("narrato_api_url", "http://127.0.0.1:8000/api/v1/video/analyze") + # ) + # if narrato_api_url: + # config.app["narrato_api_url"] = narrato_api_url + # st.session_state['narrato_api_url'] = narrato_api_url + # + # # 视频分析模型配置 + # st.markdown("##### " + tr("Vision Model Settings")) + # narrato_vision_model = st.text_input( + # tr("Vision Model Name"), + # value=config.app.get("narrato_vision_model", "gemini-1.5-flash") + # ) + # narrato_vision_key = st.text_input( + # tr("Vision Model API Key"), + # value=config.app.get("narrato_vision_key", ""), + # type="password", + # help="用于视频分析的模型 API Key" + # ) + # + # if narrato_vision_model: + # config.app["narrato_vision_model"] = narrato_vision_model + # st.session_state['narrato_vision_model'] = narrato_vision_model + # if narrato_vision_key: + # config.app["narrato_vision_key"] = narrato_vision_key + # st.session_state['narrato_vision_key'] = narrato_vision_key + # + # # 文案生成模型配置 + # st.markdown("##### " + tr("Text Generation Model Settings")) + # narrato_llm_model = st.text_input( + # tr("LLM Model Name"), + # value=config.app.get("narrato_llm_model", "qwen-plus") + # ) + # narrato_llm_key = st.text_input( + # tr("LLM Model API Key"), + # value=config.app.get("narrato_llm_key", ""), + # type="password", + # help="用于文案生成的模型 API Key" + # ) + # + # if narrato_llm_model: + # config.app["narrato_llm_model"] = narrato_llm_model + # st.session_state['narrato_llm_model'] = narrato_llm_model + # if narrato_llm_key: + # config.app["narrato_llm_key"] = narrato_llm_key + # st.session_state['narrato_llm_key'] = narrato_llm_key + # + # # 批处理配置 + # narrato_batch_size = st.number_input( + # tr("Batch Size"), + # min_value=1, + # max_value=50, + # value=config.app.get("narrato_batch_size", 10), + # help="每批处理的图片数量" + # ) + # if narrato_batch_size: + # config.app["narrato_batch_size"] = narrato_batch_size + # st.session_state['narrato_batch_size'] = narrato_batch_size def render_text_llm_settings(tr): From 420f9392e395cbd3abb7307c1edf1a31b50a0ef7 Mon Sep 17 00:00:00 2001 From: linyq Date: Mon, 18 Nov 2024 12:19:48 +0800 Subject: [PATCH 2/2] =?UTF-8?q?feat(webui):=20=E6=96=B0=E5=A2=9E=E8=84=9A?= =?UTF-8?q?=E6=9C=AC=E8=AE=BE=E7=BD=AE=E5=8A=9F=E8=83=BD=E5=B9=B6=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E5=85=B3=E9=94=AE=E5=B8=A7=E6=8F=90=E5=8F=96-=20?= =?UTF-8?q?=E5=9C=A8=20script=5Fsettings.py=20=E4=B8=AD=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BA=86=E4=B8=89=E4=B8=AA=E6=96=B0=E7=9A=84=E8=BE=93=E5=85=A5?= =?UTF-8?q?=E6=A1=86=EF=BC=8C=E7=94=A8=E4=BA=8E=E8=AE=BE=E7=BD=AE=20skip?= =?UTF-8?q?=5Fseconds=E3=80=81threshold=20=E5=92=8C=20vision=5Fbatch=5Fsiz?= =?UTF-8?q?e-=20=E6=9B=B4=E6=96=B0=E4=BA=86=E5=85=B3=E9=94=AE=E5=B8=A7?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E8=BF=87=E7=A8=8B=EF=BC=8C=E4=BD=BF=E7=94=A8?= =?UTF-8?q?=E6=96=B0=E8=AE=BE=E7=BD=AE=E7=9A=84=E5=8F=82=E6=95=B0=E6=9B=BF?= =?UTF-8?q?=E4=BB=A3=E4=BA=86=E9=85=8D=E7=BD=AE=E6=96=87=E4=BB=B6=E4=B8=AD?= =?UTF-8?q?=E7=9A=84=E5=9B=BA=E5=AE=9A=E5=80=BC=20-=20=E5=9C=A8=20i18n/zh.?= =?UTF-8?q?json=20=E4=B8=AD=E6=B7=BB=E5=8A=A0=E4=BA=86=E7=9B=B8=E5=85=B3?= =?UTF-8?q?=E7=BF=BB=E8=AF=91=EF=BC=8C=E7=A1=AE=E4=BF=9D=E6=96=B0=E5=8A=9F?= =?UTF-8?q?=E8=83=BD=E6=94=AF=E6=8C=81=E4=B8=AD=E6=96=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- webui/components/script_settings.py | 45 +++++++++++++++++++++++++---- webui/i18n/zh.json | 13 +++++++-- 2 files changed, 50 insertions(+), 8 deletions(-) diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py index 4e4aea6..67a2e16 100644 --- a/webui/components/script_settings.py +++ b/webui/components/script_settings.py @@ -205,6 +205,40 @@ def render_video_details(tr): def render_script_buttons(tr, params): """渲染脚本操作按钮""" + # 新增三个输入框,放在同一行 + input_cols = st.columns(3) + + with input_cols[0]: + skip_seconds = st.number_input( + "skip_seconds", + min_value=0, + value=st.session_state.get('skip_seconds', config.frames.get('skip_seconds', 0)), + help=tr("Skip the first few seconds"), + key="skip_seconds_input" + ) + st.session_state['skip_seconds'] = skip_seconds + + with input_cols[1]: + threshold = st.number_input( + "threshold", + min_value=0, + value=st.session_state.get('threshold', config.frames.get('threshold', 30)), + help=tr("Difference threshold"), + key="threshold_input" + ) + st.session_state['threshold'] = threshold + + with input_cols[2]: + vision_batch_size = st.number_input( + "vision_batch_size", + min_value=1, + max_value=20, + value=st.session_state.get('vision_batch_size', config.frames.get('vision_batch_size', 5)), + help=tr("Vision processing batch size"), + key="vision_batch_size_input" + ) + st.session_state['vision_batch_size'] = vision_batch_size + # 生成/加载按钮 script_path = st.session_state.get('video_clip_json_path', '') if script_path == "auto": @@ -287,7 +321,6 @@ def generate_script(tr, params): with st.spinner("正在生成脚本..."): if not params.video_origin_path: st.error("请先选择视频文件") - st.stop() return # ===================提取键帧=================== @@ -323,8 +356,8 @@ def generate_script(tr, params): # 处理视频并提取关键帧 processor.process_video_pipeline( output_dir=video_keyframes_dir, - skip_seconds=config.frames.get("skip_seconds", 0), - threshold=config.frames.get("threshold", 30) + skip_seconds=st.session_state.get('skip_seconds'), + threshold=st.session_state.get('threshold') ) else: processor = video_processor.VideoProcessor(params.video_origin_path) @@ -353,7 +386,7 @@ def generate_script(tr, params): except Exception as cleanup_err: logger.error(f"清理失败的关键帧目录时出错: {cleanup_err}") - raise Exception(f"关键帧提取��败: {str(e)}") + raise Exception(f"关键帧提取失败: {str(e)}") # 根据不同的 LLM 提供商处理 vision_llm_provider = st.session_state.get('vision_llm_providers').lower() @@ -374,7 +407,7 @@ def generate_script(tr, params): analyzer = vision_analyzer.VisionAnalyzer( model_name=vision_model, - api_key=vision_api_key + api_key=vision_api_key, ) update_progress(40, "正在分析关键帧...") @@ -388,7 +421,7 @@ def generate_script(tr, params): analyzer.analyze_images( images=keyframe_files, prompt=config.app.get('vision_analysis_prompt'), - batch_size=config.frames.get("vision_batch_size", 5) + batch_size=config.frames.get("vision_batch_size", st.session_state.get('vision_batch_size', 5)) ) ) loop.close() diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 48b50cf..c3c06fd 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -125,6 +125,15 @@ "Text API Key": "文案生成 API 密钥", "Text Base URL": "文案生成接口地址", "Text Model Name": "文案生成模型名称", - "Account ID": "账户 ID" + "Account ID": "账户 ID", + "Skip the first few seconds": "跳过开头多少秒", + "Difference threshold": "差异阈值", + "Vision processing batch size": "视觉处理批次大小", + "Test Connection": "测试连接", + "gemini model is available": "Gemini 模型可用", + "gemini model is not available": "Gemini 模型不可用", + "NarratoAPI is available": "NarratoAPI 可用", + "NarratoAPI is not available": "NarratoAPI 不可用", + "Unsupported provider": "不支持的提供商" } -} \ No newline at end of file +}