NarratoAI/config.example.toml

[app]
    project_version="0.6.0"
    # 支持视频理解的大模型提供商
    #   gemini
    #   qwenvl
    vision_llm_provider="qwenvl"
    vision_analysis_prompt = "你是资深视频内容分析专家，擅长分析视频画面信息，分析下面视频画面内容，只输出客观的画面描述不要给任何总结或评价"

    ########## Vision Gemini API Key
    vision_gemini_api_key = ""
    vision_gemini_model_name = "gemini-2.0-flash"

    ########## Vision Qwen API Key (默认使用“硅基流动”的QwenVL模型)
    vision_qwenvl_api_key = ""
    vision_qwenvl_model_name = "Qwen/Qwen2.5-VL-32B-Instruct"
    vision_qwenvl_base_url = "https://api.siliconflow.cn/v1"

    ########### Vision NarratoAPI Key
    narrato_api_key = "ggyY91BAO-_ULvAqKum3XexcyN1G3dP86DEzvjZDcrg"
    narrato_api_url = "https://narratoinsight.scsmtech.cn/api/v1"
    narrato_vision_model = "gemini-1.5-flash"
    narrato_vision_key = ""
    narrato_llm_model = "gpt-4o"
    narrato_llm_key = ""

    # 用于生成文案的大模型支持的提供商 (Supported providers):
    #   openai (默认)
    #   deepseek (默认使用“硅基流动”的模型)
    #   moonshot (月之暗面)
    #   qwen (通义千问)
    #   gemini
    text_llm_provider="deepseek"

    ########## OpenAI API Key
    # Get your API key at https://platform.openai.com/api-keys
    text_openai_api_key = ""
    text_openai_base_url = "https://api.openai.com/v1"
    text_openai_model_name = "gpt-4o-mini"

    ########## DeepSeek API Key
    # 使用 硅基流动 第三方 API Key，使用手机号注册：https://cloud.siliconflow.cn/i/pyOKqFCV
    text_deepseek_api_key = ""
    text_deepseek_base_url = "https://api.siliconflow.cn/v1"
    text_deepseek_model_name = "deepseek-ai/DeepSeek-V3"

    ########## Moonshot API Key
    # Visit https://platform.moonshot.cn/console/api-keys to get your API key.
    text_moonshot_api_key=""
    text_moonshot_base_url = "https://api.moonshot.cn/v1"
    text_moonshot_model_name = "moonshot-v1-8k"

    ########## G4F
    # Visit https://github.com/xtekky/gpt4free to get more details
    # Supported model list: https://github.com/xtekky/gpt4free/blob/main/g4f/models.py
    text_g4f_model_name = "gpt-3.5-turbo"

    ########## Azure API Key
    # Visit https://learn.microsoft.com/zh-cn/azure/ai-services/openai/ to get more details
    # API documentation: https://learn.microsoft.com/zh-cn/azure/ai-services/openai/reference
    text_azure_api_key = ""
    text_azure_base_url=""
    text_azure_model_name="gpt-35-turbo" # replace with your model deployment name
    text_azure_api_version = "2024-02-15-preview"

    ########## Gemini API Key
    text_gemini_api_key=""
    text_gemini_model_name = "gemini-1.5-flash"

    ########## Qwen API Key
    # Visit https://dashscope.console.aliyun.com/apiKey to get your API key
    # Visit below links to get more details
    # https://tongyi.aliyun.com/qianwen/
    # https://help.aliyun.com/zh/dashscope/developer-reference/model-introduction
    text_qwen_api_key = ""
    text_qwen_model_name = "qwen-plus-1127"
    text_qwen_base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1"


    # 字幕提供商、可选，支持 whisper 和 faster-whisper-large-v2"whisper"
    # 默认为 faster-whisper-large-v2 模型地址：https://huggingface.co/guillaumekln/faster-whisper-large-v2
    subtitle_provider = "faster-whisper-large-v2"
    subtitle_enabled = true

    # ImageMagick
    # 安装后，将自动检测到 ImageMagick，Windows 除外！
    # 例如，在 Windows 上 "C:\Program Files (x86)\ImageMagick-7.1.1-Q16-HDRI\magick.exe"
    # 下载位置 https://imagemagick.org/archive/binaries/ImageMagick-7.1.1-29-Q16-x64-static.exe
    # imagemagick_path = "C:\\Program Files (x86)\\ImageMagick-7.1.1-Q16\\magick.exe"

    # FFMPEG
    #
    # 通常情况下，ffmpeg 会被自动下载，并且会被自动检测到。
    # 但是如果你的环境有问题，无法自动下载，可能会遇到如下错误：
    #   RuntimeError: No ffmpeg exe could be found.
    #   Install ffmpeg on your system, or set the IMAGEIO_FFMPEG_EXE environment variable.
    # 此时你可以手动下载 ffmpeg 并设置 ffmpeg_path，下载地址：https://www.gyan.dev/ffmpeg/builds/

    # ffmpeg_path = "C:\\Users\\harry\\Downloads\\ffmpeg.exe"
    #########################################################################################

    # 当视频生成成功后，API服务提供的视频下载接入点，默认为当前服务的地址和监听端口
    # 比如 http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # 如果你需要使用域名对外提供服务（一般会用nginx做代理），则可以设置为你的域名
    # 比如 https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # endpoint="https://xxxx.com"

    # When the video is successfully generated, the API service provides a download endpoint for the video, defaulting to the service's current address and listening port.
    # For example, http://127.0.0.1:8080/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # If you need to provide the service externally using a domain name (usually done with nginx as a proxy), you can set it to your domain name.
    # For example, https://xxxx.com/tasks/6357f542-a4e1-46a1-b4c9-bf3bd0df5285/final-1.mp4
    # endpoint="https://xxxx.com"
    endpoint=""


    # Video material storage location
    # material_directory = ""                    # Indicates that video materials will be downloaded to the default folder, the default folder is ./storage/cache_videos under the current project
    # material_directory = "/user/harry/videos"  # Indicates that video materials will be downloaded to a specified folder
    # material_directory = "task"                # Indicates that video materials will be downloaded to the current task's folder, this method does not allow sharing of already downloaded video materials

    # 视频素材存放位置
    # material_directory = ""                    #表示将视频素材下载到默认的文件夹，默认文件夹为当前项目下的 ./storage/cache_videos
    # material_directory = "/user/harry/videos"  #表示将视频素材下载到指定的文件夹中
    # material_directory = "task"                #表示将视频素材下载到当前任务的文件夹中，这种方式无法共享已经下载的视频素材

    material_directory = ""

    # 用于任务的状态管理
    enable_redis = false
    redis_host = "localhost"
    redis_port = 6379
    redis_db = 0
    redis_password = ""

    # 文生视频时的最大并发任务数
    max_concurrent_tasks = 5

    # webui界面是否显示配置项
    hide_config = false


[whisper]
    # Only effective when subtitle_provider is "whisper"

    # Run on GPU with FP16
    # model = WhisperModel(model_size, device="cuda", compute_type="float16")

    # Run on GPU with INT8
    # model = WhisperModel(model_size, device="cuda", compute_type="int8_float16")

    # Run on CPU with INT8
    # model = WhisperModel(model_size, device="cpu", compute_type="int8")

    # recommended model_size: "large-v3"
    model_size="faster-whisper-large-v2"
    # 如果要使用 GPU，请设置 device=“cuda”
    device="CPU"
    compute_type="int8"


[proxy]
    ### Use a proxy to access the Pexels API
    ### Format: "http://<username>:<password>@<proxy>:<port>"
    ### Example: "http://user:pass@proxy:1234"
    ### Doc: https://requests.readthedocs.io/en/latest/user/advanced/#proxies

    http = "http://127.0.0.1:7890"
    https = "http://127.0.0.1:7890"

[azure]
    # Azure Speech API Key
    # Get your API key at https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices
    speech_key=""
    speech_region=""

[frames]
    skip_seconds = 0
    # threshold（差异阈值）用于判断两个连续帧之间是否发生了场景切换
    # 较小的阈值（如 20）：更敏感，能捕捉到细微的场景变化，但可能会误判，关键帧图片更多
    # 较大的阈值（如 40）：更保守，只捕捉明显的场景切换，但可能会漏掉渐变场景，关键帧图片更少
    # 默认值 30：在实践中是一个比较平衡的选择
    threshold = 30
    version = "v2"
    # 大模型单次处理的关键帧数量
    vision_batch_size = 5