feat: support Doubao TTS API key auth

This commit is contained in:
viccy 2026-07-02 11:35:23 +08:00
parent d02c848977
commit 1b7bd79654
9 changed files with 207 additions and 72 deletions

View File

@ -33,6 +33,7 @@ NarratoAI is an automated video narration tool that provides an all-in-one solut
</div>
## Latest News
- 2026.07.02 Released version 0.8.4 with Doubao TTS API Key setup and legacy credential compatibility
- 2026.04.03 Released version 0.7.8, refactored the documentary frame-analysis pipeline with a shared service and improved extraction, caching, vision batching, and narration generation
- 2025.05.11 Released new version 0.6.0, supports **short drama commentary** and optimized editing process
- 2025.03.06 Released new version 0.5.2, supports DeepSeek R1 and DeepSeek V3 models for short drama mixing

View File

@ -41,6 +41,7 @@ NarratoAI 是一款自动化影视解说工具,基于 LLM 实现文案撰写
本项目仅供学习和研究使用,不得商用。如需商业授权,请联系作者。
## 最新资讯
- 2026.07.02 发布新版本 0.8.4,升级豆包语音 TTS 新版 API Key 配置并保留旧版凭据兼容
- 2026.06.10 发布新版本 0.8.1**大版本更新**,优化多个核心流程
- 2026.04.27 发布新版本 0.7.9,新增 **Fun-ASR一键转录字幕**
- 2026.04.03 发布新版本 0.7.8,重构纪录片逐帧分析链路,统一共享服务并优化抽帧、缓存、视觉并发与文案生成流程

View File

@ -0,0 +1,116 @@
import base64
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from app.services import voice
class FakeDoubaoResponse:
status_code = 200
text = "OK"
def json(self):
return {
"code": 3000,
"data": base64.b64encode(b"mp3-bytes").decode("ascii"),
}
class DoubaoTtsTests(unittest.TestCase):
def setUp(self):
self.original_doubaotts = dict(voice.config.doubaotts)
self.original_proxy = dict(voice.config.proxy)
def tearDown(self):
voice.config.doubaotts.clear()
voice.config.doubaotts.update(self.original_doubaotts)
voice.config.proxy.clear()
voice.config.proxy.update(self.original_proxy)
def test_api_key_auth_does_not_require_legacy_appid_or_token(self):
voice.config.doubaotts.clear()
voice.config.doubaotts.update(
{
"api_key": "db-api-key",
"cluster": "volcano_tts",
"volume": 1.2,
"pitch": 0.9,
"silence_duration": 0.25,
}
)
voice.config.proxy.clear()
voice.config.proxy.update({"enabled": False})
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / "doubao.mp3"
sub_maker = object()
with patch("requests.post", return_value=FakeDoubaoResponse()) as post, patch(
"app.services.voice.new_sub_maker", return_value=sub_maker
):
result = voice.doubaotts_tts(
text=" 你好,豆包新版鉴权。 ",
voice_name="BV700_V2_streaming",
voice_file=str(output_file),
speed=1.25,
)
output_bytes = output_file.read_bytes() if output_file.exists() else b""
self.assertIs(result, sub_maker)
self.assertEqual(output_bytes, b"mp3-bytes")
_, kwargs = post.call_args
self.assertEqual(kwargs["headers"]["X-Api-Key"], "db-api-key")
self.assertNotIn("Authorization", kwargs["headers"])
self.assertEqual(kwargs["json"]["app"], {"cluster": "volcano_tts"})
self.assertEqual(kwargs["json"]["request"]["text"], "你好,豆包新版鉴权。")
self.assertEqual(kwargs["json"]["audio"]["voice_type"], "BV700_V2_streaming")
self.assertEqual(kwargs["json"]["audio"]["speed_ratio"], 1.25)
self.assertEqual(kwargs["json"]["audio"]["volume_ratio"], 1.2)
self.assertEqual(kwargs["json"]["audio"]["pitch_ratio"], 0.9)
self.assertEqual(kwargs["json"]["audio"]["silence_duration"], 0.25)
def test_legacy_token_auth_still_sends_appid_and_token(self):
voice.config.doubaotts.clear()
voice.config.doubaotts.update(
{
"appid": "legacy-appid",
"token": "legacy-token",
"cluster": "volcano_tts",
}
)
voice.config.proxy.clear()
voice.config.proxy.update({"enabled": False})
with tempfile.TemporaryDirectory() as temp_dir:
output_file = Path(temp_dir) / "doubao.mp3"
with patch("requests.post", return_value=FakeDoubaoResponse()) as post:
result = voice.doubaotts_tts(
text="旧版鉴权仍然可用",
voice_name="BV700_streaming",
voice_file=str(output_file),
speed=1.0,
)
output_bytes = output_file.read_bytes()
self.assertIsNotNone(result)
self.assertEqual(output_bytes, b"mp3-bytes")
_, kwargs = post.call_args
self.assertEqual(kwargs["headers"]["Authorization"], "Bearer;legacy-token")
self.assertNotIn("X-Api-Key", kwargs["headers"])
self.assertEqual(
kwargs["json"]["app"],
{
"appid": "legacy-appid",
"token": "legacy-token",
"cluster": "volcano_tts",
},
)
if __name__ == "__main__":
unittest.main()

View File

@ -1150,14 +1150,13 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
"""
# 读取配置
doubaotts_cfg = getattr(config, "doubaotts", {}) or {}
api_key = (doubaotts_cfg.get("api_key", "") or doubaotts_cfg.get("apikey", "")).strip()
appid = doubaotts_cfg.get("appid", "")
token = doubaotts_cfg.get("token", "")
ak = doubaotts_cfg.get("ak", "")
sk = doubaotts_cfg.get("sk", "")
cluster = doubaotts_cfg.get("cluster", "volcano_tts")
if not appid or not token:
logger.error("豆包语音 TTS 配置未完成")
if not api_key and (not appid or not token):
logger.error("豆包语音 TTS 配置未完成,请配置 API Key或填写旧版 AppID 和 Token")
return None
# 准备参数
@ -1174,12 +1173,15 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
pitch = doubaotts_cfg.get("pitch", 1.0)
silence_duration = doubaotts_cfg.get("silence_duration", 0.125)
payload = {
"app": {
app_payload = {"cluster": cluster}
if not api_key:
app_payload.update({
"appid": appid,
"token": token,
"cluster": cluster
},
})
payload = {
"app": app_payload,
"user": {
"uid": "NarratoAI"
},
@ -1206,11 +1208,14 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
# API 地址
url = "https://openspeech.bytedance.com/api/v1/tts"
# 构建请求头使用Bearer Token认证
# 构建请求头。新版控制台优先使用 API Key旧配置继续使用 Token。
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer;{token}"
}
if api_key:
headers["X-Api-Key"] = api_key
else:
headers["Authorization"] = f"Bearer;{token}"
for i in range(3):
try:

View File

@ -1,5 +1,5 @@
[app]
project_version="0.7.8"
project_version="0.8.4"
# LLM API 超时配置(秒)
llm_vision_timeout = 120 # 视觉模型基础超时时间
@ -221,10 +221,14 @@
[doubaotts]
# 豆包语音 TTS 配置
# 新版配置优先填写 API Key旧版 appid/token 配置仍兼容
# 申请流程:
# 1. 打开 https://console.volcengine.com/iam/keymanage 新建 Access Key 和 Secret Key
# 2. 打开 https://www.volcengine.com/product/voice-tech 点击立即使用
# 3. 在 API 服务中心找到音频生成下面的语音合成,获取 APPID 和 Token
# 1. 打开火山引擎豆包语音控制台
# 2. 进入 API Key 管理并创建 API Key
# 3. 确认已开通豆包语音合成服务
api_key = ""
# 旧版配置(兼容保留)
ak = ""
sk = ""
appid = ""

View File

@ -1 +1 @@
0.8.3
0.8.4

View File

@ -1464,42 +1464,55 @@ def render_omnivoice_tts_settings(tr):
def render_doubaotts_settings(tr):
"""渲染豆包语音 TTS 设置"""
# AK 输入
ak = st.text_input(
"Access Key",
value=config.doubaotts.get("ak", ""),
help=tr("Volcengine Access Key Help")
)
# SK 输入
sk = st.text_input(
"Secret Key",
value=config.doubaotts.get("sk", ""),
api_key = st.text_input(
"API Key",
value=config.doubaotts.get("api_key", ""),
type="password",
help=tr("Volcengine Secret Key Help")
help=tr("Doubao API Key Help")
)
ak = config.doubaotts.get("ak", "")
sk = config.doubaotts.get("sk", "")
appid = config.doubaotts.get("appid", "")
token = config.doubaotts.get("token", "")
cluster = config.doubaotts.get("cluster", "volcano_tts")
# AppID 输入
appid = st.text_input(
"AppID",
value=config.doubaotts.get("appid", ""),
help=tr("Doubao AppID Help")
)
with st.expander(tr("Doubao Legacy Credentials"), expanded=False):
# AK 输入
ak = st.text_input(
"Access Key",
value=ak,
help=tr("Volcengine Access Key Help")
)
# Token 输入
token = st.text_input(
"Token",
value=config.doubaotts.get("token", ""),
type="password",
help=tr("Doubao Token Help")
)
# SK 输入
sk = st.text_input(
"Secret Key",
value=sk,
type="password",
help=tr("Volcengine Secret Key Help")
)
# 集群配置
cluster = st.text_input(
tr("Cluster"),
value=config.doubaotts.get("cluster", "volcano_tts"),
help=tr("Doubao Cluster Help")
)
# AppID 输入
appid = st.text_input(
"AppID",
value=appid,
help=tr("Doubao AppID Help")
)
# Token 输入
token = st.text_input(
"Token",
value=token,
type="password",
help=tr("Doubao Token Help")
)
# 集群配置
cluster = st.text_input(
tr("Cluster"),
value=cluster,
help=tr("Doubao Cluster Help")
)
# 音色选择
# 在线音色列表(从文档中提取)
@ -1676,6 +1689,7 @@ def render_doubaotts_settings(tr):
st.info(tr("Doubao TTS Fill Credentials Notice"))
# 保存配置
config.doubaotts["api_key"] = api_key
config.doubaotts["ak"] = ak
config.doubaotts["sk"] = sk
config.doubaotts["appid"] = appid
@ -1690,20 +1704,10 @@ def render_doubaotts_settings(tr):
st.session_state['voice_rate'] = voice_rate # 确保语速参数被保存到session state
# 显示配置状态
if ak and sk and appid and token:
if api_key or (appid and token):
st.success(tr("Doubao TTS configured"))
else:
missing = []
if not ak:
missing.append("Access Key")
if not sk:
missing.append("Secret Key")
if not appid:
missing.append("AppID")
if not token:
missing.append("Token")
if missing:
st.warning(tr("Please configure missing fields").format(fields=', '.join(missing)))
st.warning(tr("Please configure missing fields").format(fields="API Key / AppID + Token"))
def render_voice_preview_new(tr, selected_engine):

View File

@ -652,6 +652,8 @@
"OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
"Volcengine Access Key Help": "Volcengine Access Key",
"Volcengine Secret Key Help": "Volcengine Secret Key",
"Doubao API Key Help": "New Doubao Speech API Key. This field is preferred and does not require AppID or Token.",
"Doubao Legacy Credentials": "Legacy AppID / Token Credentials",
"Doubao AppID Help": "Doubao TTS application AppID",
"Doubao Token Help": "Doubao TTS application Token",
"Cluster": "Cluster",
@ -664,13 +666,13 @@
"Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)",
"Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process",
"Application Steps": "Application Steps",
"Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
"Doubao TTS Step 2": "2. Create a new Access Key and Secret Key",
"Doubao TTS Step 3": "3. Open [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)",
"Doubao TTS Step 4": "4. Click Start Now",
"Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)",
"Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token",
"Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.",
"Doubao TTS Step 1": "1. Open the Volcengine Doubao Speech console",
"Doubao TTS Step 2": "2. Open API Key management and create an API Key",
"Doubao TTS Step 3": "3. Make sure Doubao speech synthesis is enabled",
"Doubao TTS Step 4": "4. Copy the API Key into the API Key field above",
"Doubao TTS Step 5": "5. The default cluster is volcano_tts and usually does not need changes",
"Doubao TTS Step 6": "6. Legacy AppID/Token users can keep using the compatibility fields",
"Doubao TTS Fill Credentials Notice": "The new setup only requires an API Key. Legacy AppID/Token credentials remain supported.",
"Doubao TTS configured": "Doubao TTS is configured",
"Please configure missing fields": "Please configure: {fields}",
"Preview Voice Synthesis": "Preview Voice Synthesis",

View File

@ -591,6 +591,8 @@
"OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**:只需要设置 API 地址和语言,可直接合成。\n2. **指令音色**:填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**:上传或选择参考音频,并填写该音频对应文本。\n\n**注意事项**\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频,系统会按音频时长估算字幕段落",
"Volcengine Access Key Help": "火山引擎 Access Key",
"Volcengine Secret Key Help": "火山引擎 Secret Key",
"Doubao API Key Help": "新版豆包语音 API Key优先使用该字段无需填写 AppID 和 Token",
"Doubao Legacy Credentials": "旧版 AppID / Token 配置(兼容)",
"Doubao AppID Help": "豆包语音应用 AppID",
"Doubao Token Help": "豆包语音应用 Token",
"Cluster": "集群",
@ -603,13 +605,13 @@
"Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)",
"Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程",
"Application Steps": "申请步骤",
"Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
"Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key",
"Doubao TTS Step 3": "3. 打开 [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)",
"Doubao TTS Step 4": "4. 点击立即使用",
"Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成(注意:是语音合成,不是语音合成大模型)",
"Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token",
"Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
"Doubao TTS Step 1": "1. 打开火山引擎豆包语音控制台",
"Doubao TTS Step 2": "2. 进入 API Key 管理并创建 API Key",
"Doubao TTS Step 3": "3. 确认已开通豆包语音合成服务",
"Doubao TTS Step 4": "4. 复制 API Key 并填写到上方 API Key 输入框",
"Doubao TTS Step 5": "5. 默认集群使用 volcano_tts通常无需修改",
"Doubao TTS Step 6": "6. 旧版 AppID/Token 用户可继续在兼容配置中填写原凭据",
"Doubao TTS Fill Credentials Notice": "新版配置只需要填写 API Key旧版 AppID/Token 仍保留兼容",
"Doubao TTS configured": "豆包语音 TTS 配置已设置",
"Please configure missing fields": "请配置: {fields}",
"Preview Voice Synthesis": "试听语音合成",