NarratoAI/config.example.toml
viccy d147fe66e4 feat(tts): 新增IndexTTS-2语音合成引擎支持
实现兼容IndexTTS2-Pack API的完整TTS调用流程,包含音频下载、错误重试等处理
重构原有IndexTTS-1.5代码,抽象通用逻辑以同时兼容indextts和indextts2两个引擎
新增IndexTTS-2的WebUI配置界面,支持情感控制与高级生成参数调整
更新配置示例文件与中英多语言文案,完善配置迁移逻辑兼容旧版配置
新增对应单元测试覆盖参数处理与配置迁移流程
2026-06-06 14:31:09 +08:00

252 lines
8.5 KiB
TOML
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

[app]
project_version="0.7.8"
# LLM API 超时配置(秒)
llm_vision_timeout = 120 # 视觉模型基础超时时间
llm_text_timeout = 180 # 文本模型基础超时时间(解说文案生成等复杂任务需要更长时间)
llm_max_retries = 3 # API 重试次数
##########################################
# 🚀 LLM 配置 - 使用 OpenAI 兼容统一接口
##########################################
# 统一使用 OpenAI 兼容协议(/v1/chat/completions
# 支持接入 OpenAI、DeepSeek、Gemini 兼容网关、Qwen 网关、SiliconFlow、OpenRouter 等。
# ===== 视觉模型配置 =====
vision_llm_provider = "openai"
# 模型格式provider/model_name
# 常用视觉模型示例:
# - Gemini: gemini/gemini-2.0-flash-lite (推荐,速度快成本低)
# - Gemini: gemini/gemini-1.5-pro (高精度)
# - OpenAI: gpt-4o, gpt-4o-mini
# - Qwen: qwen/qwen2.5-vl-32b-instruct
# - SiliconFlow: siliconflow/Qwen/Qwen2.5-VL-32B-Instruct
vision_openai_model_name = "Qwen/Qwen3.5-122B-A10B"
vision_openai_api_key = "" # 填入对应 provider 的 API key
vision_openai_base_url = "https://api.siliconflow.cn/v1" # 可选:自定义 API base URL官方 OpenAI 可留空)
vision_openai_temperature = 1.0
vision_openai_top_p = 0.95
vision_openai_max_tokens = 65536
vision_openai_thinking_level = "auto" # auto/off/low/medium/high
# ===== 文本模型配置 =====
text_llm_provider = "openai"
# 常用文本模型示例:
# - DeepSeek: deepseek/deepseek-chat (推荐,性价比高)
# - DeepSeek: deepseek/deepseek-reasoner (推理能力强)
# - Gemini: gemini/gemini-2.0-flash (速度快)
# - OpenAI: gpt-4o, gpt-4o-mini, gpt-4-turbo
# - Qwen: qwen/qwen-plus, qwen/qwen-turbo
# - SiliconFlow: siliconflow/deepseek-ai/DeepSeek-R1
# - Moonshot: moonshot/moonshot-v1-8k
text_openai_model_name = "Pro/zai-org/GLM-5"
text_openai_api_key = "" # 填入对应 provider 的 API key
text_openai_base_url = "https://api.siliconflow.cn/v1" # 可选:自定义 API base URL官方 OpenAI 可留空)
text_openai_temperature = 1.0
text_openai_top_p = 0.95
text_openai_max_tokens = 65536
text_openai_thinking_level = "auto" # auto/off/low/medium/high
# ===== API Keys 参考 =====
# 主流 LLM Providers API Key 获取地址:
#
# OpenAI: https://platform.openai.com/api-keys
# Gemini: https://makersuite.google.com/app/apikey
# DeepSeek: https://platform.deepseek.com/api_keys
# Qwen (阿里): https://bailian.console.aliyun.com/?tab=model#/api-key
# SiliconFlow: https://cloud.siliconflow.cn/account/ak (手机号注册)
# Moonshot: https://platform.moonshot.cn/console/api-keys
# Anthropic: https://console.anthropic.com/settings/keys
# Cohere: https://dashboard.cohere.com/api-keys
# Together AI: https://api.together.xyz/settings/api-keys
##########################################
# 🔧 高级配置(可选)
##########################################
# WebUI 界面是否显示配置项
hide_config = true
# 官方 OpenAI 默认端点(可选):
# text_openai_base_url = "https://api.openai.com/v1"
##########################################
# TTS (文本转语音) 配置
##########################################
[azure]
# Azure TTS 配置
# 获取密钥https://portal.azure.com
speech_key = ""
speech_region = ""
[tencent]
# 腾讯云 TTS 配置
# 访问 https://console.cloud.tencent.com/cam/capi 获取密钥
secret_id = ""
secret_key = ""
region = "ap-beijing" # 地域配置
[soulvoice]
# SoulVoice TTS API 配置
api_key = ""
voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"
api_url = "https://tts.scsmtech.cn/tts"
model = "FunAudioLLM/CosyVoice2-0.5B"
[tts_qwen]
# 通义千问 Qwen3 TTS 配置
# 访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取你的 API 密钥
api_key = ""
model_name = "qwen3-tts-flash"
[fun_asr]
# Fun-ASR 字幕转录配置
# backend = "local" 使用本地 FunASR-Pack APIbackend = "bailian" 使用阿里百炼在线 fun-asr
auto_transcribe_enabled = false
backend = "local"
api_url = "http://127.0.0.1:7860"
hotword = ""
enable_spk = false
# 使用阿里百炼在线 fun-asr 时,访问 https://bailian.console.aliyun.com/?tab=model#/api-key 获取 API Key
api_key = ""
model = "fun-asr"
[indextts]
# IndexTTS-1.5 语音克隆配置
# 这是一个开源的零样本语音克隆项目,需要自行部署
# 项目地址https://github.com/index-tts/index-tts
# 默认 API 地址(本地部署)
api_url = "http://127.0.0.1:8081/tts"
# 默认参考音频(可选)
reference_audio_source = "resource"
# reference_audio = "/path/to/reference_audio.wav"
# 推理模式:普通推理 / 快速推理
infer_mode = "普通推理"
# 高级参数
temperature = 1.0
top_p = 0.8
top_k = 30
do_sample = true
num_beams = 3
repetition_penalty = 10.0
[indextts2]
# IndexTTS-2 语音克隆配置
# 支持 IndexTTS2-Pack FastAPI 接口POST /tts
api_url = "http://192.168.3.6:7863/tts"
# 默认参考音频(可选),音色列表复用 IndexTTS-1.5 的资源目录
reference_audio_source = "resource"
# reference_audio = "/path/to/reference_audio.wav"
# 情感控制speaker / audio / vector / text
emotion_mode = "speaker"
emotion_audio = ""
emotion_alpha = 0.65
emotion_text = ""
use_random = false
max_text_tokens_per_segment = 120
# 8 维情感向量顺序happy, angry, sad, afraid, disgusted, melancholic, surprised, calm
vec_happy = 0.0
vec_angry = 0.0
vec_sad = 0.0
vec_afraid = 0.0
vec_disgusted = 0.0
vec_melancholic = 0.0
vec_surprised = 0.0
vec_calm = 0.8
# 高级生成参数
temperature = 0.8
top_p = 0.8
top_k = 30
num_beams = 3
repetition_penalty = 10.0
max_mel_tokens = 1500
[doubaotts]
# 豆包语音 TTS 配置
# 申请流程:
# 1. 打开 https://console.volcengine.com/iam/keymanage 新建 Access Key 和 Secret Key
# 2. 打开 https://www.volcengine.com/product/voice-tech 点击立即使用
# 3. 在 API 服务中心找到音频生成下面的语音合成,获取 APPID 和 Token
ak = ""
sk = ""
appid = ""
token = ""
cluster = "volcano_tts"
# 高级参数
volume = 1.0
pitch = 1.0
silence_duration = 0.125
[ui]
# TTS引擎选择 (indextts, indextts2, edge_tts, qwen3_tts, tencent_tts, doubaotts, azure_speech)
tts_engine = "indextts"
# Edge TTS 配置
edge_voice_name = "zh-CN-XiaoyiNeural-Female"
edge_volume = 80
edge_rate = 1.0
edge_pitch = 0
# Azure Speech Services 配置
azure_voice_name = "zh-CN-XiaoyiNeural-Female"
azure_volume = 80
azure_rate = 1.0
azure_pitch = 0
# 豆包语音 TTS 配置
doubaotts_voice_type = "BV700_V2_streaming"
doubaotts_rate = 1.0
# 字幕遮罩配置:用于在烧录新字幕前遮盖原视频自带字幕
subtitle_mask_enabled = false
subtitle_mask_landscape_x_percent = 10
subtitle_mask_landscape_y_percent = 78
subtitle_mask_landscape_width_percent = 80
subtitle_mask_landscape_height_percent = 14
subtitle_mask_landscape_blur_radius = 18
subtitle_mask_landscape_opacity_percent = 82
subtitle_mask_portrait_x_percent = 8
subtitle_mask_portrait_y_percent = 79
subtitle_mask_portrait_width_percent = 84
subtitle_mask_portrait_height_percent = 16
subtitle_mask_portrait_blur_radius = 26
subtitle_mask_portrait_opacity_percent = 84
subtitle_position_landscape_y_percent = 85
subtitle_position_portrait_y_percent = 82
##########################################
# 代理和网络配置
##########################################
[proxy]
# HTTP/HTTPS 代理配置(如需要)
# clash 默认地址http://127.0.0.1:7890
http = ""
https = ""
enabled = false
##########################################
# 视频处理配置
##########################################
[frames]
# 提取关键帧的间隔时间(秒)
frame_interval_input = 3
# 大模型单次处理的关键帧数量
vision_batch_size = 10
# 视觉批处理最大并发批次数OpenAI 兼容 provider
vision_max_concurrency = 2