mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-12 11:22:51 +00:00
(webfeatui): 重构视觉分析功能并添加新模型支持
- 移除了对 QwenVL模型的特定逻辑,改为更通用的实现 - 添加了对 OpenAI 视觉模型的支持- 更新了视觉模型设置界面,增加了新的模型选项 - 重构了测试连接和创建分析器的代码,提高了可维护性 - 调整了配置文件结构,简化了视觉模型的配置
This commit is contained in:
parent
afeeb7c516
commit
3fe8eb50c0
@ -237,28 +237,28 @@ if __name__ == '__main__':
|
|||||||
video_frame_description_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1139.json"
|
video_frame_description_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1139.json"
|
||||||
|
|
||||||
# 测试新的JSON文件
|
# 测试新的JSON文件
|
||||||
test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1458.json"
|
test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_2258.json"
|
||||||
markdown_output = parse_frame_analysis_to_markdown(test_file_path)
|
markdown_output = parse_frame_analysis_to_markdown(test_file_path)
|
||||||
# print(markdown_output)
|
# print(markdown_output)
|
||||||
|
|
||||||
# 输出到文件以便检查格式
|
# 输出到文件以便检查格式
|
||||||
output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/narration_script.md"
|
output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/家里家外1-5.md"
|
||||||
with open(output_file, 'w', encoding='utf-8') as f:
|
with open(output_file, 'w', encoding='utf-8') as f:
|
||||||
f.write(markdown_output)
|
f.write(markdown_output)
|
||||||
# print(f"\n已将Markdown输出保存到: {output_file}")
|
# print(f"\n已将Markdown输出保存到: {output_file}")
|
||||||
|
|
||||||
# 生成解说文案
|
# # 生成解说文案
|
||||||
narration = generate_narration(
|
# narration = generate_narration(
|
||||||
markdown_output,
|
# markdown_output,
|
||||||
text_api_key,
|
# text_api_key,
|
||||||
base_url=text_base_url,
|
# base_url=text_base_url,
|
||||||
model=text_model
|
# model=text_model
|
||||||
)
|
# )
|
||||||
|
#
|
||||||
# 保存解说文案
|
# # 保存解说文案
|
||||||
print(narration)
|
# print(narration)
|
||||||
print(type(narration))
|
# print(type(narration))
|
||||||
narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
|
# narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
|
||||||
with open(narration_file, 'w', encoding='utf-8') as f:
|
# with open(narration_file, 'w', encoding='utf-8') as f:
|
||||||
f.write(narration)
|
# f.write(narration)
|
||||||
print(f"\n已将解说文案保存到: {narration_file}")
|
# print(f"\n已将解说文案保存到: {narration_file}")
|
||||||
|
|||||||
@ -1,175 +1,85 @@
|
|||||||
[app]
|
[app]
|
||||||
project_version="0.6.0"
|
project_version="0.6.0"
|
||||||
# 支持视频理解的大模型提供商
|
# 支持视频理解的大模型提供商
|
||||||
# gemini
|
# gemini (谷歌, 需要 VPN)
|
||||||
# qwenvl
|
# siliconflow (硅基流动)
|
||||||
vision_llm_provider="qwenvl"
|
# qwenvl (通义千问)
|
||||||
|
vision_llm_provider="Siliconflow"
|
||||||
|
|
||||||
########## Vision Gemini API Key
|
########## Gemini 视觉模型
|
||||||
vision_gemini_api_key = ""
|
vision_gemini_api_key = ""
|
||||||
vision_gemini_model_name = "gemini-2.0-flash"
|
vision_gemini_model_name = "gemini-2.0-flash-lite"
|
||||||
|
|
||||||
########## Vision Qwen API Key (默认使用“硅基流动”的QwenVL模型)
|
########## QwenVL 视觉模型
|
||||||
vision_qwenvl_api_key = ""
|
vision_qwenvl_api_key = ""
|
||||||
vision_qwenvl_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
|
vision_qwenvl_model_name = "qwen2.5-vl-32b-instruct"
|
||||||
vision_qwenvl_base_url = "https://api.siliconflow.cn/v1"
|
vision_qwenvl_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||||
|
|
||||||
########### Vision NarratoAPI Key
|
########## siliconflow 视觉模型
|
||||||
|
vision_siliconflow_api_key = ""
|
||||||
|
vision_siliconflow_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
|
||||||
|
vision_siliconflow_base_url = "https://api.siliconflow.cn/v1"
|
||||||
|
|
||||||
|
########## OpenAI 视觉模型
|
||||||
|
vision_openai_api_key = ""
|
||||||
|
vision_openai_model_name = "gpt-4.1-nano-2025-04-14"
|
||||||
|
vision_openai_base_url = "https://api.openai.com/v1"
|
||||||
|
|
||||||
|
########### NarratoAPI 微调模型 (未发布)
|
||||||
narrato_api_key = "ggyY91BAO-_ULvAqKum3XexcyN1G3dP86DEzvjZDcrg"
|
narrato_api_key = "ggyY91BAO-_ULvAqKum3XexcyN1G3dP86DEzvjZDcrg"
|
||||||
narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
|
narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
|
||||||
narrato_vision_model = "gemini-1.5-flash"
|
narrato_model = "narra-1.0-2025-05-09"
|
||||||
narrato_vision_key = ""
|
|
||||||
narrato_llm_model = "gpt-4o"
|
|
||||||
narrato_llm_key = ""
|
|
||||||
|
|
||||||
# 用于生成文案的大模型支持的提供商 (Supported providers):
|
# 用于生成文案的大模型支持的提供商 (Supported providers):
|
||||||
# openai (默认)
|
# openai (默认, 需要 VPN)
|
||||||
# deepseek (默认使用“硅基流动”的模型)
|
# siliconflow (硅基流动)
|
||||||
# moonshot (月之暗面)
|
# deepseek (深度求索)
|
||||||
|
# gemini (谷歌, 需要 VPN)
|
||||||
# qwen (通义千问)
|
# qwen (通义千问)
|
||||||
# gemini
|
# moonshot (月之暗面)
|
||||||
text_llm_provider="deepseek"
|
text_llm_provider="openai"
|
||||||
|
|
||||||
########## OpenAI API Key
|
########## OpenAI API Key
|
||||||
# Get your API key at https://platform.openai.com/api-keys
|
# Get your API key at https://platform.openai.com/api-keys
|
||||||
text_openai_api_key = ""
|
text_openai_api_key = ""
|
||||||
text_openai_base_url = "https://api.openai.com/v1"
|
text_openai_base_url = "https://api.openai.com/v1"
|
||||||
text_openai_model_name = "gpt-4o-mini"
|
text_openai_model_name = "gpt-4.1-mini-2025-04-14"
|
||||||
|
|
||||||
|
# 使用 硅基流动 第三方 API Key,使用手机号注册:https://cloud.siliconflow.cn/i/pyOKqFCV
|
||||||
|
# 访问 https://cloud.siliconflow.cn/account/ak 获取你的 API 密钥
|
||||||
|
text_siliconflow_api_key = ""
|
||||||
|
text_siliconflow_base_url = "https://api.siliconflow.cn/v1"
|
||||||
|
text_siliconflow_model_name = "deepseek-ai/DeepSeek-R1"
|
||||||
|
|
||||||
########## DeepSeek API Key
|
########## DeepSeek API Key
|
||||||
# 使用 硅基流动 第三方 API Key,使用手机号注册:https://cloud.siliconflow.cn/i/pyOKqFCV
|
# 访问 https://platform.deepseek.com/api_keys 获取你的 API 密钥
|
||||||
text_deepseek_api_key = ""
|
text_deepseek_api_key = ""
|
||||||
text_deepseek_base_url = "https://api.siliconflow.cn/v1"
|
text_deepseek_base_url = "https://api.deepseek.com"
|
||||||
text_deepseek_model_name = "deepseek-ai/DeepSeek-V3"
|
text_deepseek_model_name = "deepseek-chat"
|
||||||
|
|
||||||
########## Moonshot API Key
|
|
||||||
# Visit https://platform.moonshot.cn/console/api-keys to get your API key.
|
|
||||||
text_moonshot_api_key=""
|
|
||||||
text_moonshot_base_url = "https://api.moonshot.cn/v1"
|
|
||||||
text_moonshot_model_name = "moonshot-v1-8k"
|
|
||||||
|
|
||||||
########## G4F
|
|
||||||
# Visit https://github.com/xtekky/gpt4free to get more details
|
|
||||||
# Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
|
|
||||||
text_g4f_model_name = "gpt-3.5-turbo"
|
|
||||||
|
|
||||||
########## Azure API Key
|
|
||||||
# Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
|
|
||||||
# API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
|
|
||||||
text_azure_api_key = ""
|
|
||||||
text_azure_base_url=""
|
|
||||||
text_azure_model_name="gpt-35-turbo" # replace with your model deployment name
|
|
||||||
text_azure_api_version = "2024-02-15-preview"
|
|
||||||
|
|
||||||
########## Gemini API Key
|
########## Gemini API Key
|
||||||
text_gemini_api_key=""
|
text_gemini_api_key=""
|
||||||
text_gemini_model_name = "gemini-1.5-flash"
|
text_gemini_model_name = "gemini-2.0-flash"
|
||||||
|
|
||||||
########## Qwen API Key
|
########## Qwen API Key
|
||||||
# Visit https://dashscope.console.aliyun.com/apiKey to get your API key
|
# 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
|
||||||
# Visit below links to get more details
|
|
||||||
# https://tongyi.aliyun.com/qianwen/
|
|
||||||
# https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
|
|
||||||
text_qwen_api_key = ""
|
text_qwen_api_key = ""
|
||||||
text_qwen_model_name = "qwen-plus-1127"
|
text_qwen_model_name = "qwen-plus-1127"
|
||||||
text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||||
|
|
||||||
|
########## Moonshot API Key
|
||||||
# 字幕提供商、可选,支持 whisper 和 faster-whisper-large-v2"whisper"
|
# 访问 https://platform.moonshot.cn/console/api-keys 获取你的 API 密钥
|
||||||
# 默认为 faster-whisper-large-v2 模型地址:https://huggingface.co/guillaumekln/faster-whisper-large-v2
|
text_moonshot_api_key=""
|
||||||
subtitle_provider = "faster-whisper-large-v2"
|
text_moonshot_base_url = "https://api.moonshot.cn/v1"
|
||||||
subtitle_enabled = true
|
text_moonshot_model_name = "moonshot-v1-8k"
|
||||||
|
|
||||||
# ImageMagick
|
|
||||||
# 安装后,将自动检测到 ImageMagick,Windows 除外!
|
|
||||||
# 例如,在 Windows 上 "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
|
|
||||||
# 下载位置 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
|
|
||||||
# imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"
|
|
||||||
|
|
||||||
# FFMPEG
|
|
||||||
#
|
|
||||||
# 通常情况下,ffmpeg 会被自动下载,并且会被自动检测到。
|
|
||||||
# 但是如果你的环境有问题,无法自动下载,可能会遇到如下错误:
|
|
||||||
# RuntimeError: No ffmpeg exe could be found.
|
|
||||||
# Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
|
|
||||||
# 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path,下载地址:https://www.gyan.dev/ffmpeg/builds/
|
|
||||||
|
|
||||||
# ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
|
|
||||||
#########################################################################################
|
|
||||||
|
|
||||||
# 当视频生成成功后,API服务提供的视频下载接入点,默认为当前服务的地址和监听端口
|
|
||||||
# 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
|
|
||||||
# 如果你需要使用域名对外提供服务(一般会用nginx做代理),则可以设置为你的域名
|
|
||||||
# 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
|
|
||||||
# endpoint="https://xxxx.com"
|
|
||||||
|
|
||||||
# When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
|
|
||||||
# For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
|
|
||||||
# If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
|
|
||||||
# For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
|
|
||||||
# endpoint="https://xxxx.com"
|
|
||||||
endpoint=""
|
|
||||||
|
|
||||||
|
|
||||||
# Video material storage location
|
|
||||||
# material_directory = "" # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
|
|
||||||
# material_directory = "/user/harry/videos" # Indicates that video materials will be downloaded to a specified folder
|
|
||||||
# material_directory = "task" # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials
|
|
||||||
|
|
||||||
# 视频素材存放位置
|
|
||||||
# material_directory = "" #表示将视频素材下载到默认的文件夹,默认文件夹为当前项目下的 ./storage/cache_videos
|
|
||||||
# material_directory = "/user/harry/videos" #表示将视频素材下载到指定的文件夹中
|
|
||||||
# material_directory = "task" #表示将视频素材下载到当前任务的文件夹中,这种方式无法共享已经下载的视频素材
|
|
||||||
|
|
||||||
material_directory = ""
|
|
||||||
|
|
||||||
# 用于任务的状态管理
|
|
||||||
enable_redis = false
|
|
||||||
redis_host = "localhost"
|
|
||||||
redis_port = 6379
|
|
||||||
redis_db = 0
|
|
||||||
redis_password = ""
|
|
||||||
|
|
||||||
# 文生视频时的最大并发任务数
|
|
||||||
max_concurrent_tasks = 5
|
|
||||||
|
|
||||||
# webui界面是否显示配置项
|
# webui界面是否显示配置项
|
||||||
hide_config = false
|
hide_config = true
|
||||||
|
|
||||||
|
|
||||||
[whisper]
|
|
||||||
# Only effective when subtitle_provider is "whisper"
|
|
||||||
|
|
||||||
# Run on GPU with FP16
|
|
||||||
# model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
|
||||||
|
|
||||||
# Run on GPU with INT8
|
|
||||||
# model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")
|
|
||||||
|
|
||||||
# Run on CPU with INT8
|
|
||||||
# model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
||||||
|
|
||||||
# recommended model_size: "large-v3"
|
|
||||||
model_size="faster-whisper-large-v2"
|
|
||||||
# 如果要使用 GPU,请设置 device=“cuda”
|
|
||||||
device="CPU"
|
|
||||||
compute_type="int8"
|
|
||||||
|
|
||||||
|
|
||||||
[proxy]
|
[proxy]
|
||||||
### Use a proxy to access the Pexels API
|
|
||||||
### Format: "http://<username>:<password>@<proxy>:<port>"
|
|
||||||
### Example: "http://user:pass@proxy:1234"
|
|
||||||
### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies
|
|
||||||
|
|
||||||
http = "http://127.0.0.1:7890"
|
http = "http://127.0.0.1:7890"
|
||||||
https = "http://127.0.0.1:7890"
|
https = "http://127.0.0.1:7890"
|
||||||
|
enabled = false
|
||||||
[azure]
|
|
||||||
# Azure Speech API Key
|
|
||||||
# Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
|
|
||||||
speech_key=""
|
|
||||||
speech_region=""
|
|
||||||
|
|
||||||
[frames]
|
[frames]
|
||||||
# 提取关键帧的间隔时间
|
# 提取关键帧的间隔时间
|
||||||
|
|||||||
4
webui.py
4
webui.py
@ -5,7 +5,7 @@ from loguru import logger
|
|||||||
from app.config import config
|
from app.config import config
|
||||||
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
|
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
|
||||||
review_settings, merge_settings, system_settings
|
review_settings, merge_settings, system_settings
|
||||||
from webui.utils import cache, file_utils
|
# from webui.utils import cache, file_utils
|
||||||
from app.utils import utils
|
from app.utils import utils
|
||||||
from app.models.schema import VideoClipParams, VideoAspect
|
from app.models.schema import VideoClipParams, VideoAspect
|
||||||
|
|
||||||
@ -184,7 +184,7 @@ def render_generate_button():
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"播放视频失败: {e}")
|
logger.error(f"播放视频失败: {e}")
|
||||||
|
|
||||||
file_utils.open_task_folder(config.root_dir, task_id)
|
# file_utils.open_task_folder(config.root_dir, task_id)
|
||||||
logger.info(tr("视频生成完成"))
|
logger.info(tr("视频生成完成"))
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@ -64,25 +64,25 @@ def render_proxy_settings(tr):
|
|||||||
proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
|
proxy_enabled = st.checkbox(tr("Enable Proxy"), value=proxy_enabled)
|
||||||
|
|
||||||
# 保存代理开关状态
|
# 保存代理开关状态
|
||||||
config.proxy["enabled"] = proxy_enabled
|
# config.proxy["enabled"] = proxy_enabled
|
||||||
|
|
||||||
# 只有在代理启用时才显示代理设置输入框
|
# 只有在代理启用时才显示代理设置输入框
|
||||||
if proxy_enabled:
|
if proxy_enabled:
|
||||||
HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
|
HTTP_PROXY = st.text_input(tr("HTTP_PROXY"), value=proxy_url_http)
|
||||||
HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
|
HTTPS_PROXY = st.text_input(tr("HTTPs_PROXY"), value=proxy_url_https)
|
||||||
|
|
||||||
if HTTP_PROXY:
|
if HTTP_PROXY and HTTPS_PROXY:
|
||||||
config.proxy["http"] = HTTP_PROXY
|
config.proxy["http"] = HTTP_PROXY
|
||||||
os.environ["HTTP_PROXY"] = HTTP_PROXY
|
|
||||||
if HTTPS_PROXY:
|
|
||||||
config.proxy["https"] = HTTPS_PROXY
|
config.proxy["https"] = HTTPS_PROXY
|
||||||
|
os.environ["HTTP_PROXY"] = HTTP_PROXY
|
||||||
os.environ["HTTPS_PROXY"] = HTTPS_PROXY
|
os.environ["HTTPS_PROXY"] = HTTPS_PROXY
|
||||||
|
# logger.debug(f"代理已启用: {HTTP_PROXY}")
|
||||||
else:
|
else:
|
||||||
# 当代理被禁用时,清除环境变量和配置
|
# 当代理被禁用时,清除环境变量和配置
|
||||||
os.environ.pop("HTTP_PROXY", None)
|
os.environ.pop("HTTP_PROXY", None)
|
||||||
os.environ.pop("HTTPS_PROXY", None)
|
os.environ.pop("HTTPS_PROXY", None)
|
||||||
config.proxy["http"] = ""
|
# config.proxy["http"] = ""
|
||||||
config.proxy["https"] = ""
|
# config.proxy["https"] = ""
|
||||||
|
|
||||||
|
|
||||||
def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
||||||
@ -108,29 +108,6 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
|||||||
return True, tr("gemini model is available")
|
return True, tr("gemini model is available")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False, f"{tr('gemini model is not available')}: {str(e)}"
|
return False, f"{tr('gemini model is not available')}: {str(e)}"
|
||||||
|
|
||||||
elif provider.lower() == 'qwenvl':
|
|
||||||
from openai import OpenAI
|
|
||||||
try:
|
|
||||||
client = OpenAI(
|
|
||||||
api_key=api_key,
|
|
||||||
base_url=base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
|
||||||
)
|
|
||||||
|
|
||||||
# 发送一个简单的测试请求
|
|
||||||
response = client.chat.completions.create(
|
|
||||||
model=model_name or "qwen-vl-max-latest",
|
|
||||||
messages=[{"role": "user", "content": "直接回复我文本'当前网络可用'"}]
|
|
||||||
)
|
|
||||||
|
|
||||||
if response and response.choices:
|
|
||||||
return True, tr("QwenVL model is available")
|
|
||||||
else:
|
|
||||||
return False, tr("QwenVL model returned invalid response")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
return False, f"{tr('QwenVL model is not available')}: {str(e)}"
|
|
||||||
|
|
||||||
elif provider.lower() == 'narratoapi':
|
elif provider.lower() == 'narratoapi':
|
||||||
import requests
|
import requests
|
||||||
try:
|
try:
|
||||||
@ -148,9 +125,46 @@ def test_vision_model_connection(api_key, base_url, model_name, provider, tr):
|
|||||||
return False, f"{tr('NarratoAPI is not available')}: HTTP {response.status_code}"
|
return False, f"{tr('NarratoAPI is not available')}: HTTP {response.status_code}"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False, f"{tr('NarratoAPI is not available')}: {str(e)}"
|
return False, f"{tr('NarratoAPI is not available')}: {str(e)}"
|
||||||
|
|
||||||
else:
|
else:
|
||||||
return False, f"{tr('Unsupported provider')}: {provider}"
|
from openai import OpenAI
|
||||||
|
try:
|
||||||
|
client = OpenAI(
|
||||||
|
api_key=api_key,
|
||||||
|
base_url=base_url,
|
||||||
|
)
|
||||||
|
|
||||||
|
response = client.chat.completions.create(
|
||||||
|
model=model_name,
|
||||||
|
messages=[
|
||||||
|
{
|
||||||
|
"role": "system",
|
||||||
|
"content": [{"type": "text", "text": "You are a helpful assistant."}],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"role": "user",
|
||||||
|
"content": [
|
||||||
|
{
|
||||||
|
"type": "image_url",
|
||||||
|
"image_url": {
|
||||||
|
"url": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241022/emyrja/dog_and_girl.jpeg"
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{"type": "text", "text": "回复我网络可用即可"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
],
|
||||||
|
)
|
||||||
|
if response and response.choices:
|
||||||
|
return True, tr("QwenVL model is available")
|
||||||
|
else:
|
||||||
|
return False, tr("QwenVL model returned invalid response")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
# logger.debug(api_key)
|
||||||
|
# logger.debug(base_url)
|
||||||
|
# logger.debug(model_name)
|
||||||
|
return False, f"{tr('QwenVL model is not available')}: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
def render_vision_llm_settings(tr):
|
def render_vision_llm_settings(tr):
|
||||||
@ -158,7 +172,7 @@ def render_vision_llm_settings(tr):
|
|||||||
st.subheader(tr("Vision Model Settings"))
|
st.subheader(tr("Vision Model Settings"))
|
||||||
|
|
||||||
# 视频分析模型提供商选择
|
# 视频分析模型提供商选择
|
||||||
vision_providers = ['Gemini', 'QwenVL', 'NarratoAPI(待发布)']
|
vision_providers = ['Siliconflow', 'Gemini', 'QwenVL', 'OpenAI']
|
||||||
saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
|
saved_vision_provider = config.app.get("vision_llm_provider", "Gemini").lower()
|
||||||
saved_provider_index = 0
|
saved_provider_index = 0
|
||||||
|
|
||||||
@ -194,8 +208,8 @@ def render_vision_llm_settings(tr):
|
|||||||
)
|
)
|
||||||
st_vision_model_name = st.text_input(
|
st_vision_model_name = st.text_input(
|
||||||
tr("Vision Model Name"),
|
tr("Vision Model Name"),
|
||||||
value=vision_model_name or "gemini-1.5-flash",
|
value=vision_model_name or "gemini-2.0-flash-lite",
|
||||||
help=tr("Default: gemini-1.5-flash")
|
help=tr("Default: gemini-2.0-flash-lite")
|
||||||
)
|
)
|
||||||
elif vision_provider == 'qwenvl':
|
elif vision_provider == 'qwenvl':
|
||||||
st_vision_base_url = st.text_input(
|
st_vision_base_url = st.text_input(
|
||||||
@ -261,52 +275,45 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
|
|||||||
"Authorization": f"Bearer {api_key}",
|
"Authorization": f"Bearer {api_key}",
|
||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json"
|
||||||
}
|
}
|
||||||
|
|
||||||
# 如果没有指定base_url,使用默认值
|
|
||||||
if not base_url:
|
|
||||||
if provider.lower() == 'openai':
|
|
||||||
base_url = "https://api.openai.com/v1"
|
|
||||||
elif provider.lower() == 'moonshot':
|
|
||||||
base_url = "https://api.moonshot.cn/v1"
|
|
||||||
elif provider.lower() == 'deepseek':
|
|
||||||
base_url = "https://api.deepseek.com"
|
|
||||||
|
|
||||||
# 构建测试URL
|
|
||||||
test_url = f"{base_url.rstrip('/')}/chat/completions"
|
|
||||||
|
|
||||||
# 特殊处理Gemini
|
# 特殊处理Gemini
|
||||||
if provider.lower() == 'gemini':
|
if provider.lower() == 'gemini':
|
||||||
import google.generativeai as genai
|
import google.generativeai as genai
|
||||||
try:
|
try:
|
||||||
genai.configure(api_key=api_key)
|
genai.configure(api_key=api_key)
|
||||||
model = genai.GenerativeModel(model_name or 'gemini-pro')
|
model = genai.GenerativeModel(model_name)
|
||||||
model.generate_content("直接回复我文本'当前网络可用'")
|
model.generate_content("直接回复我文本'当前网络可用'")
|
||||||
return True, tr("Gemini model is available")
|
return True, tr("Gemini model is available")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False, f"{tr('Gemini model is not available')}: {str(e)}"
|
return False, f"{tr('Gemini model is not available')}: {str(e)}"
|
||||||
|
|
||||||
# 构建测试消息
|
|
||||||
test_data = {
|
|
||||||
"model": model_name,
|
|
||||||
"messages": [
|
|
||||||
{"role": "user", "content": "直接回复我文本'当前网络可用'"}
|
|
||||||
],
|
|
||||||
"stream": False
|
|
||||||
}
|
|
||||||
|
|
||||||
# 发送测试请求
|
|
||||||
response = requests.post(
|
|
||||||
test_url,
|
|
||||||
headers=headers,
|
|
||||||
json=test_data,
|
|
||||||
)
|
|
||||||
|
|
||||||
if response.status_code == 200:
|
|
||||||
return True, tr("Text model is available")
|
|
||||||
else:
|
else:
|
||||||
return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
|
test_url = f"{base_url.rstrip('/')}/chat/completions"
|
||||||
|
|
||||||
|
# 构建测试消息
|
||||||
|
test_data = {
|
||||||
|
"model": model_name,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": "直接回复我文本'当前网络可用'"}
|
||||||
|
],
|
||||||
|
"stream": False
|
||||||
|
}
|
||||||
|
|
||||||
|
# 发送测试请求
|
||||||
|
response = requests.post(
|
||||||
|
test_url,
|
||||||
|
headers=headers,
|
||||||
|
json=test_data,
|
||||||
|
)
|
||||||
|
# logger.debug(model_name)
|
||||||
|
# logger.debug(api_key)
|
||||||
|
# logger.debug(test_url)
|
||||||
|
if response.status_code == 200:
|
||||||
|
return True, tr("Text model is available")
|
||||||
|
else:
|
||||||
|
return False, f"{tr('Text model is not available')}: HTTP {response.status_code}"
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
logger.error(traceback.format_exc())
|
||||||
return False, f"{tr('Connection failed')}: {str(e)}"
|
return False, f"{tr('Connection failed')}: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
@ -315,8 +322,8 @@ def render_text_llm_settings(tr):
|
|||||||
st.subheader(tr("Text Generation Model Settings"))
|
st.subheader(tr("Text Generation Model Settings"))
|
||||||
|
|
||||||
# 文案生成模型提供商选择
|
# 文案生成模型提供商选择
|
||||||
text_providers = ['DeepSeek', 'OpenAI', 'Siliconflow', 'Qwen', 'Moonshot', 'Gemini']
|
text_providers = ['OpenAI', 'Siliconflow', 'DeepSeek', 'Gemini', 'Qwen', 'Moonshot']
|
||||||
saved_text_provider = config.app.get("text_llm_provider", "DeepSeek").lower()
|
saved_text_provider = config.app.get("text_llm_provider", "OpenAI").lower()
|
||||||
saved_provider_index = 0
|
saved_provider_index = 0
|
||||||
|
|
||||||
for i, provider in enumerate(text_providers):
|
for i, provider in enumerate(text_providers):
|
||||||
@ -344,8 +351,6 @@ def render_text_llm_settings(tr):
|
|||||||
|
|
||||||
# 添加测试按钮
|
# 添加测试按钮
|
||||||
if st.button(tr("Test Connection"), key="test_text_connection"):
|
if st.button(tr("Test Connection"), key="test_text_connection"):
|
||||||
logger.debug(st_text_base_url)
|
|
||||||
logger.debug(st_text_model_name)
|
|
||||||
with st.spinner(tr("Testing connection...")):
|
with st.spinner(tr("Testing connection...")):
|
||||||
success, message = test_text_model_connection(
|
success, message = test_text_model_connection(
|
||||||
api_key=st_text_api_key,
|
api_key=st_text_api_key,
|
||||||
|
|||||||
@ -24,15 +24,13 @@ def create_vision_analyzer(provider, api_key, model, base_url):
|
|||||||
"""
|
"""
|
||||||
if provider == 'gemini':
|
if provider == 'gemini':
|
||||||
return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key)
|
return gemini_analyzer.VisionAnalyzer(model_name=model, api_key=api_key)
|
||||||
elif provider == 'qwenvl':
|
else:
|
||||||
# 只传入必要的参数
|
# 只传入必要的参数
|
||||||
return qwenvl_analyzer.QwenAnalyzer(
|
return qwenvl_analyzer.QwenAnalyzer(
|
||||||
model_name=model,
|
model_name=model,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
base_url=base_url
|
base_url=base_url
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
raise ValueError(f"不支持的视觉分析提供商: {provider}")
|
|
||||||
|
|
||||||
|
|
||||||
def get_batch_timestamps(batch_files, prev_batch_files=None):
|
def get_batch_timestamps(batch_files, prev_batch_files=None):
|
||||||
|
|||||||
@ -4,16 +4,12 @@ import json
|
|||||||
import time
|
import time
|
||||||
import asyncio
|
import asyncio
|
||||||
import traceback
|
import traceback
|
||||||
import requests
|
|
||||||
from app.utils import video_processor
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from requests.adapters import HTTPAdapter
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
|
|
||||||
from app.config import config
|
from app.config import config
|
||||||
from app.utils.script_generator import ScriptProcessor
|
from app.utils import utils, video_processor
|
||||||
from app.utils import utils, video_processor, qwenvl_analyzer
|
|
||||||
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config
|
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config
|
||||||
|
|
||||||
|
|
||||||
@ -111,12 +107,10 @@ def generate_script_docu(params):
|
|||||||
vision_api_key = st.session_state.get('vision_gemini_api_key')
|
vision_api_key = st.session_state.get('vision_gemini_api_key')
|
||||||
vision_model = st.session_state.get('vision_gemini_model_name')
|
vision_model = st.session_state.get('vision_gemini_model_name')
|
||||||
vision_base_url = st.session_state.get('vision_gemini_base_url')
|
vision_base_url = st.session_state.get('vision_gemini_base_url')
|
||||||
elif vision_llm_provider == 'qwenvl':
|
|
||||||
vision_api_key = st.session_state.get('vision_qwenvl_api_key')
|
|
||||||
vision_model = st.session_state.get('vision_qwenvl_model_name', 'qwen-vl-max-latest')
|
|
||||||
vision_base_url = st.session_state.get('vision_qwenvl_base_url')
|
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"不支持的视觉分析提供商: {vision_llm_provider}")
|
vision_api_key = st.session_state.get(f'vision_{vision_llm_provider}_api_key')
|
||||||
|
vision_model = st.session_state.get(f'vision_{vision_llm_provider}_model_name')
|
||||||
|
vision_base_url = st.session_state.get(f'vision_{vision_llm_provider}_base_url')
|
||||||
|
|
||||||
# 创建视觉分析器实例
|
# 创建视觉分析器实例
|
||||||
analyzer = create_vision_analyzer(
|
analyzer = create_vision_analyzer(
|
||||||
@ -354,7 +348,6 @@ def generate_script_docu(params):
|
|||||||
# 整理帧分析数据
|
# 整理帧分析数据
|
||||||
markdown_output = parse_frame_analysis_to_markdown(analysis_json_path)
|
markdown_output = parse_frame_analysis_to_markdown(analysis_json_path)
|
||||||
|
|
||||||
# 生成文案
|
|
||||||
# 生成解说文案
|
# 生成解说文案
|
||||||
narration = generate_narration(
|
narration = generate_narration(
|
||||||
markdown_output,
|
markdown_output,
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user