NarratoAI/config.example.toml
viccy 9f28fcfa98 feat: 添加LLM驱动的字幕翻译功能
新增配置项以配置字幕翻译批次大小和最大并发数
补充中、英文国际化文案支持翻译相关界面
实现核心字幕翻译服务,支持批量处理与并发执行
添加WebUI界面用于触发和监控字幕翻译任务
新增完整单元测试覆盖翻译功能全流程
2026-06-11 10:09:58 +08:00

289 lines
9.8 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

[app]
project_version="0.7.8"
# LLM API 超时配置(秒)
llm_vision_timeout = 120 # 视觉模型基础超时时间
llm_text_timeout = 180 # 文本模型基础超时时间(解说文案生成等复杂任务需要更长时间)
llm_max_retries = 3 # API 重试次数
subtitle_translate_batch_size = 20 # 字幕翻译每批处理的字幕条数
subtitle_translate_max_workers = 3 # 字幕翻译最大并发批次数
##########################################
# 🚀 LLM 配置 - 使用 OpenAI 兼容统一接口
##########################################
# 统一使用 OpenAI 兼容协议(/v1/chat/completions
# 支持接入 OpenAI、DeepSeek、Gemini 兼容网关、Qwen 网关、SiliconFlow、OpenRouter 等。
# ===== 视觉模型配置 =====
vision_llm_provider = "openai"
# 模型格式provider/model_name
# 常用视觉模型示例:
# - Gemini: gemini/gemini-2.0-flash-lite (推荐,速度快成本低)
# - Gemini: gemini/gemini-1.5-pro (高精度)
# - OpenAI: gpt-4o, gpt-4o-mini
# - Qwen: qwen/qwen2.5-vl-32b-instruct
# - SiliconFlow: siliconflow/Qwen/Qwen2.5-VL-32B-Instruct
vision_openai_model_name = "Qwen/Qwen3.5-122B-A10B"
vision_openai_api_key = "" # 填入对应 provider 的 API key
vision_openai_base_url = "https://api.siliconflow.cn/v1" # 可选:自定义 API base URL官方 OpenAI 可留空)
vision_openai_temperature = 1.0
vision_openai_top_p = 0.95
vision_openai_max_tokens = 65536
vision_openai_thinking_level = "auto" # auto/off/low/medium/high
# ===== 文本模型配置 =====
text_llm_provider = "openai"
# 常用文本模型示例:
# - DeepSeek: deepseek/deepseek-chat (推荐,性价比高)
# - DeepSeek: deepseek/deepseek-reasoner (推理能力强)
# - Gemini: gemini/gemini-2.0-flash (速度快)
# - OpenAI: gpt-4o, gpt-4o-mini, gpt-4-turbo
# - Qwen: qwen/qwen-plus, qwen/qwen-turbo
# - SiliconFlow: siliconflow/deepseek-ai/DeepSeek-R1
# - Moonshot: moonshot/moonshot-v1-8k
text_openai_model_name = "Pro/zai-org/GLM-5"
text_openai_api_key = "" # 填入对应 provider 的 API key
text_openai_base_url = "https://api.siliconflow.cn/v1" # 可选:自定义 API base URL官方 OpenAI 可留空)
text_openai_temperature = 1.0
text_openai_top_p = 0.95
text_openai_max_tokens = 65536
text_openai_thinking_level = "auto" # auto/off/low/medium/high
# ===== Tavily 联网搜索配置 =====
# 用于短剧剧情理解前,按短剧名称检索公开剧情/人物/分集信息
tavily_api_key = "" # 获取地址https://app.tavily.com
tavily_search_depth = "basic" # basic / advanced / fast / ultra-fast
tavily_max_results = 5
# ===== API Keys 参考 =====
# 主流 LLM Providers API Key 获取地址:
#
# OpenAI: https://platform.openai.com/api-keys
# Gemini: https://makersuite.google.com/app/apikey
# DeepSeek: https://platform.deepseek.com/api_keys
# Qwen (阿里): https://bailian.console.aliyun.com/?tab=model#/api-key
# SiliconFlow: https://cloud.siliconflow.cn/account/ak (手机号注册)
# Moonshot: https://platform.moonshot.cn/console/api-keys
# Anthropic: https://console.anthropic.com/settings/keys
# Cohere: https://dashboard.cohere.com/api-keys
# Together AI: https://api.together.xyz/settings/api-keys
##########################################
# 🔧 高级配置(可选)
##########################################
# WebUI 界面是否显示配置项
hide_config = true
# FFmpeg 引擎路径(可选)
# 为空时使用系统 PATH也可以在系统设置中通过下拉框选择整合包或本机 ffmpeg。
ffmpeg_path = ""
# 官方 OpenAI 默认端点(可选):
# text_openai_base_url = "https://api.openai.com/v1"
##########################################
# TTS (文本转语音) 配置
##########################################
[azure]
# Azure TTS 配置
# 获取密钥https://portal.azure.com
speech_key = ""
speech_region = ""
[tencent]
# 腾讯云 TTS 配置
# 访问 https://console.cloud.tencent.com/cam/capi 获取密钥
secret_id = ""
secret_key = ""
region = "ap-beijing" # 地域配置
[soulvoice]
# SoulVoice TTS API 配置
api_key = ""
voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
api_url = "https://tts.scsmtech.cn/tts"
model = "FunAudioLLM/CosyVoice2-0.5B"
[tts_qwen]
# 通义千问 Qwen3 TTS 配置
# 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
api_key = ""
model_name = "qwen3-tts-flash"
[fun_asr]
# Fun-ASR 字幕转录配置
# backend = "local" 使用本地 FunASR-Pack APIbackend = "firered" 使用本地 FireRedASR2-AED-Pack APIbackend = "bailian" 使用阿里百炼在线 fun-asr
auto_transcribe_enabled = false
backend = "local"
api_url = "http://127.0.0.1:7860"
firered_api_url = "http://127.0.0.1:7867"
hotword = ""
enable_spk = false
# 使用阿里百炼在线 fun-asr 时,访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取 API Key
api_key = ""
model = "fun-asr"
[indextts]
# IndexTTS-1.5 语音克隆配置
# 这是一个开源的零样本语音克隆项目,需要自行部署
# 项目地址https://github.com/index-tts/index-tts
# 默认 API 地址(本地部署)
api_url = "http://127.0.0.1:8081/tts"
# 默认参考音频(可选)
reference_audio_source = "resource"
# reference_audio = "/path/to/reference_audio.wav"
# 推理模式:普通推理 / 快速推理
infer_mode = "普通推理"
# 高级参数
temperature = 1.0
top_p = 0.8
top_k = 30
do_sample = true
num_beams = 3
repetition_penalty = 10.0
[indextts2]
# IndexTTS-2 语音克隆配置
# 支持 IndexTTS2-Pack FastAPI 接口POST /tts
api_url = "http://192.168.3.6:7863/tts"
# 默认参考音频(可选),音色列表复用 IndexTTS-1.5 的资源目录
reference_audio_source = "resource"
# reference_audio = "/path/to/reference_audio.wav"
# 情感控制speaker / audio / vector / text
emotion_mode = "speaker"
emotion_audio = ""
emotion_alpha = 0.65
emotion_text = ""
use_random = false
max_text_tokens_per_segment = 120
# 8 维情感向量顺序happy, angry, sad, afraid, disgusted, melancholic, surprised, calm
vec_happy = 0.0
vec_angry = 0.0
vec_sad = 0.0
vec_afraid = 0.0
vec_disgusted = 0.0
vec_melancholic = 0.0
vec_surprised = 0.0
vec_calm = 0.8
# 高级生成参数
temperature = 0.8
top_p = 0.8
top_k = 30
num_beams = 3
repetition_penalty = 10.0
max_mel_tokens = 1500
[omnivoice]
# OmniVoice-Pack 语音合成配置
# 支持 OmniVoice-Pack FastAPI 接口POST /tts
api_url = "http://127.0.0.1:7866/tts"
language = "zh"
# 生成模式auto / voice_design / voice_clone
mode = "auto"
instruct = ""
# voice_clone 模式下使用,音色列表复用 IndexTTS-1.5 的资源目录
reference_audio_source = "resource"
reference_audio = ""
ref_text = ""
# 高级生成参数
num_step = 32
guidance_scale = 2.0
speed = 1.0
duration = ""
denoise = true
postprocess_output = true
preprocess_prompt = true
[doubaotts]
# 豆包语音 TTS 配置
# 申请流程:
# 1. 打开 https://console.volcengine.com/iam/keymanage 新建 Access Key 和 Secret Key
# 2. 打开 https://www.volcengine.com/product/voice-tech 点击立即使用
# 3. 在 API 服务中心找到音频生成下面的语音合成,获取 APPID 和 Token
ak = ""
sk = ""
appid = ""
token = ""
cluster = "volcano_tts"
# 高级参数
volume = 1.0
pitch = 1.0
silence_duration = 0.125
[ui]
# TTS引擎选择 (indextts, indextts2, omnivoice, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
tts_engine = "indextts"
# Edge TTS 配置
edge_voice_name = "zh-CN-XiaoyiNeural-Female"
edge_volume = 80
edge_rate = 1.0
edge_pitch = 0
# Azure Speech Services 配置
azure_voice_name = "zh-CN-XiaoyiNeural-Female"
azure_volume = 80
azure_rate = 1.0
azure_pitch = 0
# 豆包语音 TTS 配置
doubaotts_voice_type = "BV700_V2_streaming"
doubaotts_rate = 1.0
# 字幕遮罩配置:用于在烧录新字幕前遮盖原视频自带字幕
subtitle_mask_enabled = false
subtitle_mask_landscape_x_percent = 10
subtitle_mask_landscape_y_percent = 78
subtitle_mask_landscape_width_percent = 80
subtitle_mask_landscape_height_percent = 14
subtitle_mask_landscape_blur_radius = 18
subtitle_mask_landscape_opacity_percent = 82
subtitle_mask_portrait_x_percent = 8
subtitle_mask_portrait_y_percent = 79
subtitle_mask_portrait_width_percent = 84
subtitle_mask_portrait_height_percent = 16
subtitle_mask_portrait_blur_radius = 26
subtitle_mask_portrait_opacity_percent = 84
subtitle_position_landscape_y_percent = 85
subtitle_position_portrait_y_percent = 82
##########################################
# 代理和网络配置
##########################################
[proxy]
# HTTP/HTTPS 代理配置(如需要)
# clash 默认地址http://127.0.0.1:7890
http = ""
https = ""
enabled = false
##########################################
# 视频处理配置
##########################################
[frames]
# 提取关键帧的间隔时间(秒)
frame_interval_input = 3
# 大模型单次处理的关键帧数量
vision_batch_size = 10
# 视觉批处理最大并发批次数OpenAI 兼容 provider
vision_max_concurrency = 2