mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-06-17 04:42:05 +00:00
新增配置项以配置字幕翻译批次大小和最大并发数 补充中、英文国际化文案支持翻译相关界面 实现核心字幕翻译服务,支持批量处理与并发执行 添加WebUI界面用于触发和监控字幕翻译任务 新增完整单元测试覆盖翻译功能全流程
289 lines
9.8 KiB
TOML
289 lines
9.8 KiB
TOML
[app]
|
||
project_version="0.7.8"
|
||
|
||
# LLM API 超时配置(秒)
|
||
llm_vision_timeout = 120 # 视觉模型基础超时时间
|
||
llm_text_timeout = 180 # 文本模型基础超时时间(解说文案生成等复杂任务需要更长时间)
|
||
llm_max_retries = 3 # API 重试次数
|
||
subtitle_translate_batch_size = 20 # 字幕翻译每批处理的字幕条数
|
||
subtitle_translate_max_workers = 3 # 字幕翻译最大并发批次数
|
||
|
||
##########################################
|
||
# 🚀 LLM 配置 - 使用 OpenAI 兼容统一接口
|
||
##########################################
|
||
# 统一使用 OpenAI 兼容协议(/v1/chat/completions)
|
||
# 支持接入 OpenAI、DeepSeek、Gemini 兼容网关、Qwen 网关、SiliconFlow、OpenRouter 等。
|
||
|
||
# ===== 视觉模型配置 =====
|
||
vision_llm_provider = "openai"
|
||
|
||
# 模型格式:provider/model_name
|
||
# 常用视觉模型示例:
|
||
# - Gemini: gemini/gemini-2.0-flash-lite (推荐,速度快成本低)
|
||
# - Gemini: gemini/gemini-1.5-pro (高精度)
|
||
# - OpenAI: gpt-4o, gpt-4o-mini
|
||
# - Qwen: qwen/qwen2.5-vl-32b-instruct
|
||
# - SiliconFlow: siliconflow/Qwen/Qwen2.5-VL-32B-Instruct
|
||
vision_openai_model_name = "Qwen/Qwen3.5-122B-A10B"
|
||
vision_openai_api_key = "" # 填入对应 provider 的 API key
|
||
vision_openai_base_url = "https://api.siliconflow.cn/v1" # 可选:自定义 API base URL(官方 OpenAI 可留空)
|
||
vision_openai_temperature = 1.0
|
||
vision_openai_top_p = 0.95
|
||
vision_openai_max_tokens = 65536
|
||
vision_openai_thinking_level = "auto" # auto/off/low/medium/high
|
||
|
||
# ===== 文本模型配置 =====
|
||
text_llm_provider = "openai"
|
||
|
||
# 常用文本模型示例:
|
||
# - DeepSeek: deepseek/deepseek-chat (推荐,性价比高)
|
||
# - DeepSeek: deepseek/deepseek-reasoner (推理能力强)
|
||
# - Gemini: gemini/gemini-2.0-flash (速度快)
|
||
# - OpenAI: gpt-4o, gpt-4o-mini, gpt-4-turbo
|
||
# - Qwen: qwen/qwen-plus, qwen/qwen-turbo
|
||
# - SiliconFlow: siliconflow/deepseek-ai/DeepSeek-R1
|
||
# - Moonshot: moonshot/moonshot-v1-8k
|
||
text_openai_model_name = "Pro/zai-org/GLM-5"
|
||
text_openai_api_key = "" # 填入对应 provider 的 API key
|
||
text_openai_base_url = "https://api.siliconflow.cn/v1" # 可选:自定义 API base URL(官方 OpenAI 可留空)
|
||
text_openai_temperature = 1.0
|
||
text_openai_top_p = 0.95
|
||
text_openai_max_tokens = 65536
|
||
text_openai_thinking_level = "auto" # auto/off/low/medium/high
|
||
|
||
# ===== Tavily 联网搜索配置 =====
|
||
# 用于短剧剧情理解前,按短剧名称检索公开剧情/人物/分集信息
|
||
tavily_api_key = "" # 获取地址:https://app.tavily.com
|
||
tavily_search_depth = "basic" # basic / advanced / fast / ultra-fast
|
||
tavily_max_results = 5
|
||
|
||
# ===== API Keys 参考 =====
|
||
# 主流 LLM Providers API Key 获取地址:
|
||
#
|
||
# OpenAI: https://platform.openai.com/api-keys
|
||
# Gemini: https://makersuite.google.com/app/apikey
|
||
# DeepSeek: https://platform.deepseek.com/api_keys
|
||
# Qwen (阿里): https://bailian.console.aliyun.com/?tab=model#/api-key
|
||
# SiliconFlow: https://cloud.siliconflow.cn/account/ak (手机号注册)
|
||
# Moonshot: https://platform.moonshot.cn/console/api-keys
|
||
# Anthropic: https://console.anthropic.com/settings/keys
|
||
# Cohere: https://dashboard.cohere.com/api-keys
|
||
# Together AI: https://api.together.xyz/settings/api-keys
|
||
|
||
##########################################
|
||
# 🔧 高级配置(可选)
|
||
##########################################
|
||
|
||
# WebUI 界面是否显示配置项
|
||
hide_config = true
|
||
|
||
# FFmpeg 引擎路径(可选)
|
||
# 为空时使用系统 PATH;也可以在系统设置中通过下拉框选择整合包或本机 ffmpeg。
|
||
ffmpeg_path = ""
|
||
|
||
# 官方 OpenAI 默认端点(可选):
|
||
# text_openai_base_url = "https://api.openai.com/v1"
|
||
|
||
##########################################
|
||
# TTS (文本转语音) 配置
|
||
##########################################
|
||
|
||
[azure]
|
||
# Azure TTS 配置
|
||
# 获取密钥:https://portal.azure.com
|
||
speech_key = ""
|
||
speech_region = ""
|
||
|
||
[tencent]
|
||
# 腾讯云 TTS 配置
|
||
# 访问 https://console.cloud.tencent.com/cam/capi 获取密钥
|
||
secret_id = ""
|
||
secret_key = ""
|
||
region = "ap-beijing" # 地域配置
|
||
|
||
[soulvoice]
|
||
# SoulVoice TTS API 配置
|
||
api_key = ""
|
||
voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
|
||
api_url = "https://tts.scsmtech.cn/tts"
|
||
model = "FunAudioLLM/CosyVoice2-0.5B"
|
||
|
||
[tts_qwen]
|
||
# 通义千问 Qwen3 TTS 配置
|
||
# 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
|
||
api_key = ""
|
||
model_name = "qwen3-tts-flash"
|
||
|
||
[fun_asr]
|
||
# Fun-ASR 字幕转录配置
|
||
# backend = "local" 使用本地 FunASR-Pack API;backend = "firered" 使用本地 FireRedASR2-AED-Pack API;backend = "bailian" 使用阿里百炼在线 fun-asr
|
||
auto_transcribe_enabled = false
|
||
backend = "local"
|
||
api_url = "http://127.0.0.1:7860"
|
||
firered_api_url = "http://127.0.0.1:7867"
|
||
hotword = ""
|
||
enable_spk = false
|
||
# 使用阿里百炼在线 fun-asr 时,访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取 API Key
|
||
api_key = ""
|
||
model = "fun-asr"
|
||
|
||
[indextts]
|
||
# IndexTTS-1.5 语音克隆配置
|
||
# 这是一个开源的零样本语音克隆项目,需要自行部署
|
||
# 项目地址:https://github.com/index-tts/index-tts
|
||
# 默认 API 地址(本地部署)
|
||
api_url = "http://127.0.0.1:8081/tts"
|
||
|
||
# 默认参考音频(可选)
|
||
reference_audio_source = "resource"
|
||
# reference_audio = "/path/to/reference_audio.wav"
|
||
|
||
# 推理模式:普通推理 / 快速推理
|
||
infer_mode = "普通推理"
|
||
|
||
# 高级参数
|
||
temperature = 1.0
|
||
top_p = 0.8
|
||
top_k = 30
|
||
do_sample = true
|
||
num_beams = 3
|
||
repetition_penalty = 10.0
|
||
|
||
[indextts2]
|
||
# IndexTTS-2 语音克隆配置
|
||
# 支持 IndexTTS2-Pack FastAPI 接口:POST /tts
|
||
api_url = "http://192.168.3.6:7863/tts"
|
||
|
||
# 默认参考音频(可选),音色列表复用 IndexTTS-1.5 的资源目录
|
||
reference_audio_source = "resource"
|
||
# reference_audio = "/path/to/reference_audio.wav"
|
||
|
||
# 情感控制:speaker / audio / vector / text
|
||
emotion_mode = "speaker"
|
||
emotion_audio = ""
|
||
emotion_alpha = 0.65
|
||
emotion_text = ""
|
||
use_random = false
|
||
max_text_tokens_per_segment = 120
|
||
|
||
# 8 维情感向量,顺序:happy, angry, sad, afraid, disgusted, melancholic, surprised, calm
|
||
vec_happy = 0.0
|
||
vec_angry = 0.0
|
||
vec_sad = 0.0
|
||
vec_afraid = 0.0
|
||
vec_disgusted = 0.0
|
||
vec_melancholic = 0.0
|
||
vec_surprised = 0.0
|
||
vec_calm = 0.8
|
||
|
||
# 高级生成参数
|
||
temperature = 0.8
|
||
top_p = 0.8
|
||
top_k = 30
|
||
num_beams = 3
|
||
repetition_penalty = 10.0
|
||
max_mel_tokens = 1500
|
||
|
||
[omnivoice]
|
||
# OmniVoice-Pack 语音合成配置
|
||
# 支持 OmniVoice-Pack FastAPI 接口:POST /tts
|
||
api_url = "http://127.0.0.1:7866/tts"
|
||
language = "zh"
|
||
|
||
# 生成模式:auto / voice_design / voice_clone
|
||
mode = "auto"
|
||
instruct = ""
|
||
|
||
# voice_clone 模式下使用,音色列表复用 IndexTTS-1.5 的资源目录
|
||
reference_audio_source = "resource"
|
||
reference_audio = ""
|
||
ref_text = ""
|
||
|
||
# 高级生成参数
|
||
num_step = 32
|
||
guidance_scale = 2.0
|
||
speed = 1.0
|
||
duration = ""
|
||
denoise = true
|
||
postprocess_output = true
|
||
preprocess_prompt = true
|
||
|
||
[doubaotts]
|
||
# 豆包语音 TTS 配置
|
||
# 申请流程:
|
||
# 1. 打开 https://console.volcengine.com/iam/keymanage 新建 Access Key 和 Secret Key
|
||
# 2. 打开 https://www.volcengine.com/product/voice-tech 点击立即使用
|
||
# 3. 在 API 服务中心找到音频生成下面的语音合成,获取 APPID 和 Token
|
||
ak = ""
|
||
sk = ""
|
||
appid = ""
|
||
token = ""
|
||
cluster = "volcano_tts"
|
||
|
||
# 高级参数
|
||
volume = 1.0
|
||
pitch = 1.0
|
||
silence_duration = 0.125
|
||
|
||
[ui]
|
||
# TTS引擎选择 (indextts, indextts2, omnivoice, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
|
||
tts_engine = "indextts"
|
||
|
||
# Edge TTS 配置
|
||
edge_voice_name = "zh-CN-XiaoyiNeural-Female"
|
||
edge_volume = 80
|
||
edge_rate = 1.0
|
||
edge_pitch = 0
|
||
|
||
# Azure Speech Services 配置
|
||
azure_voice_name = "zh-CN-XiaoyiNeural-Female"
|
||
azure_volume = 80
|
||
azure_rate = 1.0
|
||
azure_pitch = 0
|
||
|
||
# 豆包语音 TTS 配置
|
||
doubaotts_voice_type = "BV700_V2_streaming"
|
||
doubaotts_rate = 1.0
|
||
|
||
# 字幕遮罩配置:用于在烧录新字幕前遮盖原视频自带字幕
|
||
subtitle_mask_enabled = false
|
||
subtitle_mask_landscape_x_percent = 10
|
||
subtitle_mask_landscape_y_percent = 78
|
||
subtitle_mask_landscape_width_percent = 80
|
||
subtitle_mask_landscape_height_percent = 14
|
||
subtitle_mask_landscape_blur_radius = 18
|
||
subtitle_mask_landscape_opacity_percent = 82
|
||
subtitle_mask_portrait_x_percent = 8
|
||
subtitle_mask_portrait_y_percent = 79
|
||
subtitle_mask_portrait_width_percent = 84
|
||
subtitle_mask_portrait_height_percent = 16
|
||
subtitle_mask_portrait_blur_radius = 26
|
||
subtitle_mask_portrait_opacity_percent = 84
|
||
subtitle_position_landscape_y_percent = 85
|
||
subtitle_position_portrait_y_percent = 82
|
||
|
||
##########################################
|
||
# 代理和网络配置
|
||
##########################################
|
||
|
||
[proxy]
|
||
# HTTP/HTTPS 代理配置(如需要)
|
||
# clash 默认地址:http://127.0.0.1:7890
|
||
http = ""
|
||
https = ""
|
||
enabled = false
|
||
|
||
##########################################
|
||
# 视频处理配置
|
||
##########################################
|
||
|
||
[frames]
|
||
# 提取关键帧的间隔时间(秒)
|
||
frame_interval_input = 3
|
||
|
||
# 大模型单次处理的关键帧数量
|
||
vision_batch_size = 10
|
||
|
||
# 视觉批处理最大并发批次数(OpenAI 兼容 provider)
|
||
vision_max_concurrency = 2
|