diff --git a/config.example.toml b/config.example.toml index 38d4766..e43a61e 100644 --- a/config.example.toml +++ b/config.example.toml @@ -2,8 +2,24 @@ project_version="0.3.0" # 支持视频理解的大模型提供商 # gemini + # NarratoAPI # qwen2-vl (待增加) - video_llm_provider="gemini" + vision_llm_provider="gemini" + vision_batch_size = 5 + vision_analysis_prompt = "你是资深视频内容分析专家,擅长分析视频画面信息,分析下面视频画面内容,只输出客观的画面描述不要给任何总结或评价" + + ########## Vision Gemini API Key + vision_gemini_api_key = "" + vision_gemini_model_name = "gemini-1.5-flash" + + ########### Vision NarratoAPI Key + # NarratoAPI 是为了便捷访问不了 Gemini API 的用户, 提供的代理服务 + narrato_api_key = "" + narrato_api_url = "" + narrato_vision_model = "gemini-1.5-flash" + narrato_vision_key = "" + narrato_llm_model = "gpt-4o" + narrato_llm_key = "" # 用于生成文案的大模型支持的提供商 (Supported providers): # openai (默认) @@ -13,63 +29,52 @@ # azure # qwen (通义千问) # gemini - llm_provider="openai" - ########## Ollama Settings - # No need to set it unless you want to use your own proxy - ollama_base_url = "" - # Check your available models at https://ollama.com/library - ollama_model_name = "" + text_llm_provider="openai" ########## OpenAI API Key # Get your API key at https://platform.openai.com/api-keys - openai_api_key = "" + text_openai_api_key = "" # No need to set it unless you want to use your own proxy - openai_base_url = "" + text_openai_base_url = "" # Check your available models at https://platform.openai.com/account/limits - openai_model_name = "gpt-4o" + text_openai_model_name = "gpt-4o-mini" ########## Moonshot API Key # Visit https://platform.moonshot.cn/console/api-keys to get your API key. - moonshot_api_key="" - moonshot_base_url = "https://api.moonshot.cn/v1" - moonshot_model_name = "moonshot-v1-8k" - - ########## OneAPI API Key - # Visit https://github.com/songquanpeng/one-api to get your API key - oneapi_api_key="" - oneapi_base_url="" - oneapi_model_name="" + text_moonshot_api_key="" + text_moonshot_base_url = "https://api.moonshot.cn/v1" + text_moonshot_model_name = "moonshot-v1-8k" ########## G4F # Visit https://github.com/xtekky/gpt4free to get more details # Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py - g4f_model_name = "gpt-3.5-turbo" + text_g4f_model_name = "gpt-3.5-turbo" ########## Azure API Key # Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details # API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference - azure_api_key = "" - azure_base_url="" - azure_model_name="gpt-35-turbo" # replace with your model deployment name - azure_api_version = "2024-02-15-preview" + text_azure_api_key = "" + text_azure_base_url="" + text_azure_model_name="gpt-35-turbo" # replace with your model deployment name + text_azure_api_version = "2024-02-15-preview" ########## Gemini API Key - gemini_api_key="" - gemini_model_name = "gemini-1.5-pro" + text_gemini_api_key="" + text_gemini_model_name = "gemini-1.5-flash" ########## Qwen API Key # Visit https://dashscope.console.aliyun.com/apiKey to get your API key # Visit below links to get more details # https://tongyi.aliyun.com/qianwen/ # https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction - qwen_api_key = "" - qwen_model_name = "qwen-max" + text_qwen_api_key = "" + text_qwen_model_name = "qwen-max" ########## DeepSeek API Key # Visit https://platform.deepseek.com/api_keys to get your API key - deepseek_api_key = "" - deepseek_base_url = "https://api.deepseek.com" - deepseek_model_name = "deepseek-chat" + text_deepseek_api_key = "" + text_deepseek_base_url = "https://api.deepseek.com" + text_deepseek_model_name = "deepseek-chat" # 字幕提供商、可选,支持 whisper 和 faster-whisper-large-v2"whisper" # 默认为 faster-whisper-large-v2 模型地址:https://huggingface.co/guillaumekln/faster-whisper-large-v2 diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py index 986c8af..53fc0b0 100644 --- a/webui/components/script_settings.py +++ b/webui/components/script_settings.py @@ -283,9 +283,10 @@ def generate_script(tr, params): raise Exception(f"关键帧提取失败: {str(e)}") # 根据不同的 LLM 提供商处理 - video_llm_provider = st.session_state.get('video_llm_providers', 'Gemini').lower() + vision_llm_provider = st.session_state.get('vision_llm_providers').lower() + logger.debug(f"Vision LLM 提供商: {vision_llm_provider}") - if video_llm_provider == 'gemini': + if vision_llm_provider == 'gemini': try: # ===================初始化视觉分析器=================== update_progress(30, "正在初始化视觉分析器...") @@ -443,7 +444,7 @@ def generate_script(tr, params): logger.exception(f"Gemini 处理过程中发生错误\n{traceback.format_exc()}") raise Exception(f"视觉分析失败: {str(e)}") - else: # NarratoAPI + elif vision_llm_provider == 'narratoapi': # NarratoAPI try: # 创建临时目录 temp_dir = utils.temp_dir("narrato") @@ -451,12 +452,11 @@ def generate_script(tr, params): # 打包关键帧 update_progress(30, "正在打包关键帧...") zip_path = os.path.join(temp_dir, f"keyframes_{int(time.time())}.zip") - if not file_utils.create_zip(keyframe_files, zip_path): raise Exception("打包关键帧失败") # 获取API配置 - api_url = st.session_state.get('narrato_api_url', 'http://127.0.0.1:8000/api/v1/video/analyze') + api_url = st.session_state.get('narrato_api_url') api_key = st.session_state.get('narrato_api_key') if not api_key: @@ -480,12 +480,13 @@ def generate_script(tr, params): } # 发送API请求 + logger.info(f"请求 NarratoAPI:{api_url}") update_progress(40, "正在上传文件...") with open(zip_path, 'rb') as f: files = {'file': (os.path.basename(zip_path), f, 'application/x-zip-compressed')} try: response = requests.post( - api_url, + f"{api_url}/video/analyze", headers=headers, params=api_params, files=files, @@ -493,10 +494,11 @@ def generate_script(tr, params): ) response.raise_for_status() except requests.RequestException as e: + logger.error(f"Narrato API 请求失败:\n{traceback.format_exc()}") raise Exception(f"API请求失败: {str(e)}") task_data = response.json() - task_id = task_data.get('task_id') + task_id = task_data["data"].get('task_id') if not task_id: raise Exception(f"无效的API响应: {response.text}") @@ -508,7 +510,7 @@ def generate_script(tr, params): while retry_count < max_retries: try: status_response = requests.get( - f"{api_url}/tasks/{task_id}", + f"{api_url}/video/tasks/{task_id}", headers=headers, timeout=10 ) @@ -516,14 +518,12 @@ def generate_script(tr, params): task_status = status_response.json()['data'] if task_status['status'] == 'SUCCESS': - script = task_status['result'] + script = task_status['result']['data'] break elif task_status['status'] in ['FAILURE', 'RETRY']: raise Exception(f"任务失败: {task_status.get('error')}") retry_count += 1 - progress = min(70, 50 + (retry_count * 20 / max_retries)) - update_progress(progress, "正在分析中...") time.sleep(2) except requests.RequestException as e: @@ -536,7 +536,7 @@ def generate_script(tr, params): raise Exception("任务执行超时") except Exception as e: - logger.exception("NarratoAPI 处理过程中发生错误") + logger.exception(f"NarratoAPI 处理过程中发生错误\n{traceback.format_exc()}") raise Exception(f"NarratoAPI 处理失败: {str(e)}") finally: # 清理临时文件 @@ -546,12 +546,14 @@ def generate_script(tr, params): except Exception as e: logger.warning(f"清理临时文件失败: {str(e)}") + else: + logger.exception("Vision Model 未启用,请检查配置") + if script is None: st.error("生成脚本失败,请检查日志") st.stop() - - script = utils.clean_model_output(script) - st.session_state['video_clip_json'] = json.loads(script) + logger.info(f"脚本生成完成\n{script} \n{type(script)}") + st.session_state['video_clip_json'] = script update_progress(90, "脚本生成完成") time.sleep(0.5) @@ -561,7 +563,7 @@ def generate_script(tr, params): except Exception as err: st.error(f"生成过程中发生错误: {str(err)}") - logger.exception("生成脚本时发生错误") + logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}") finally: time.sleep(2) progress_bar.empty() diff --git a/webui/utils/file_utils.py b/webui/utils/file_utils.py index 8fe6988..b6b1238 100644 --- a/webui/utils/file_utils.py +++ b/webui/utils/file_utils.py @@ -188,13 +188,14 @@ def ensure_directory(directory): logger.error(f"创建目录失败: {directory}, 错误: {e}") return False -def create_zip(files: list, zip_path: str, base_dir: str = None) -> bool: +def create_zip(files: list, zip_path: str, base_dir: str = None, folder_name: str = "demo") -> bool: """ 创建zip文件 Args: files: 要打包的文件列表 zip_path: zip文件保存路径 base_dir: 基础目录,用于保持目录结构 + folder_name: zip解压后的文件夹名称,默认为frames Returns: bool: 是否成功 """ @@ -210,19 +211,18 @@ def create_zip(files: list, zip_path: str, base_dir: str = None) -> bool: logger.warning(f"文件不存在,跳过: {file}") continue - # 计算文件在zip中的路径 + # 计算文件在zip中的路径,添加folder_name作为前缀目录 if base_dir: - arcname = os.path.relpath(file, base_dir) + arcname = os.path.join(folder_name, os.path.relpath(file, base_dir)) else: - arcname = os.path.basename(file) + arcname = os.path.join(folder_name, os.path.basename(file)) try: zipf.write(file, arcname) except Exception as e: logger.error(f"添加文件到zip失败: {file}, 错误: {e}") continue - - logger.info(f"创建zip文件成功: {zip_path}") + return True except Exception as e: