mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-07-02 12:25:35 +00:00
feat: support Doubao TTS API key auth
This commit is contained in:
parent
d02c848977
commit
1b7bd79654
@ -33,6 +33,7 @@ NarratoAI is an automated video narration tool that provides an all-in-one solut
|
||||
</div>
|
||||
|
||||
## Latest News
|
||||
- 2026.07.02 Released version 0.8.4 with Doubao TTS API Key setup and legacy credential compatibility
|
||||
- 2026.04.03 Released version 0.7.8, refactored the documentary frame-analysis pipeline with a shared service and improved extraction, caching, vision batching, and narration generation
|
||||
- 2025.05.11 Released new version 0.6.0, supports **short drama commentary** and optimized editing process
|
||||
- 2025.03.06 Released new version 0.5.2, supports DeepSeek R1 and DeepSeek V3 models for short drama mixing
|
||||
|
||||
@ -41,6 +41,7 @@ NarratoAI 是一款自动化影视解说工具,基于 LLM 实现文案撰写
|
||||
本项目仅供学习和研究使用,不得商用。如需商业授权,请联系作者。
|
||||
|
||||
## 最新资讯
|
||||
- 2026.07.02 发布新版本 0.8.4,升级豆包语音 TTS 新版 API Key 配置并保留旧版凭据兼容
|
||||
- 2026.06.10 发布新版本 0.8.1,**大版本更新**,优化多个核心流程
|
||||
- 2026.04.27 发布新版本 0.7.9,新增 **Fun-ASR一键转录字幕**
|
||||
- 2026.04.03 发布新版本 0.7.8,重构纪录片逐帧分析链路,统一共享服务并优化抽帧、缓存、视觉并发与文案生成流程
|
||||
|
||||
116
app/services/test_doubaotts_tts_unittest.py
Normal file
116
app/services/test_doubaotts_tts_unittest.py
Normal file
@ -0,0 +1,116 @@
|
||||
import base64
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from app.services import voice
|
||||
|
||||
|
||||
class FakeDoubaoResponse:
|
||||
status_code = 200
|
||||
text = "OK"
|
||||
|
||||
def json(self):
|
||||
return {
|
||||
"code": 3000,
|
||||
"data": base64.b64encode(b"mp3-bytes").decode("ascii"),
|
||||
}
|
||||
|
||||
|
||||
class DoubaoTtsTests(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.original_doubaotts = dict(voice.config.doubaotts)
|
||||
self.original_proxy = dict(voice.config.proxy)
|
||||
|
||||
def tearDown(self):
|
||||
voice.config.doubaotts.clear()
|
||||
voice.config.doubaotts.update(self.original_doubaotts)
|
||||
voice.config.proxy.clear()
|
||||
voice.config.proxy.update(self.original_proxy)
|
||||
|
||||
def test_api_key_auth_does_not_require_legacy_appid_or_token(self):
|
||||
voice.config.doubaotts.clear()
|
||||
voice.config.doubaotts.update(
|
||||
{
|
||||
"api_key": "db-api-key",
|
||||
"cluster": "volcano_tts",
|
||||
"volume": 1.2,
|
||||
"pitch": 0.9,
|
||||
"silence_duration": 0.25,
|
||||
}
|
||||
)
|
||||
voice.config.proxy.clear()
|
||||
voice.config.proxy.update({"enabled": False})
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_file = Path(temp_dir) / "doubao.mp3"
|
||||
sub_maker = object()
|
||||
|
||||
with patch("requests.post", return_value=FakeDoubaoResponse()) as post, patch(
|
||||
"app.services.voice.new_sub_maker", return_value=sub_maker
|
||||
):
|
||||
result = voice.doubaotts_tts(
|
||||
text=" 你好,豆包新版鉴权。 ",
|
||||
voice_name="BV700_V2_streaming",
|
||||
voice_file=str(output_file),
|
||||
speed=1.25,
|
||||
)
|
||||
output_bytes = output_file.read_bytes() if output_file.exists() else b""
|
||||
|
||||
self.assertIs(result, sub_maker)
|
||||
self.assertEqual(output_bytes, b"mp3-bytes")
|
||||
|
||||
_, kwargs = post.call_args
|
||||
self.assertEqual(kwargs["headers"]["X-Api-Key"], "db-api-key")
|
||||
self.assertNotIn("Authorization", kwargs["headers"])
|
||||
self.assertEqual(kwargs["json"]["app"], {"cluster": "volcano_tts"})
|
||||
self.assertEqual(kwargs["json"]["request"]["text"], "你好,豆包新版鉴权。")
|
||||
self.assertEqual(kwargs["json"]["audio"]["voice_type"], "BV700_V2_streaming")
|
||||
self.assertEqual(kwargs["json"]["audio"]["speed_ratio"], 1.25)
|
||||
self.assertEqual(kwargs["json"]["audio"]["volume_ratio"], 1.2)
|
||||
self.assertEqual(kwargs["json"]["audio"]["pitch_ratio"], 0.9)
|
||||
self.assertEqual(kwargs["json"]["audio"]["silence_duration"], 0.25)
|
||||
|
||||
def test_legacy_token_auth_still_sends_appid_and_token(self):
|
||||
voice.config.doubaotts.clear()
|
||||
voice.config.doubaotts.update(
|
||||
{
|
||||
"appid": "legacy-appid",
|
||||
"token": "legacy-token",
|
||||
"cluster": "volcano_tts",
|
||||
}
|
||||
)
|
||||
voice.config.proxy.clear()
|
||||
voice.config.proxy.update({"enabled": False})
|
||||
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output_file = Path(temp_dir) / "doubao.mp3"
|
||||
|
||||
with patch("requests.post", return_value=FakeDoubaoResponse()) as post:
|
||||
result = voice.doubaotts_tts(
|
||||
text="旧版鉴权仍然可用",
|
||||
voice_name="BV700_streaming",
|
||||
voice_file=str(output_file),
|
||||
speed=1.0,
|
||||
)
|
||||
output_bytes = output_file.read_bytes()
|
||||
|
||||
self.assertIsNotNone(result)
|
||||
self.assertEqual(output_bytes, b"mp3-bytes")
|
||||
|
||||
_, kwargs = post.call_args
|
||||
self.assertEqual(kwargs["headers"]["Authorization"], "Bearer;legacy-token")
|
||||
self.assertNotIn("X-Api-Key", kwargs["headers"])
|
||||
self.assertEqual(
|
||||
kwargs["json"]["app"],
|
||||
{
|
||||
"appid": "legacy-appid",
|
||||
"token": "legacy-token",
|
||||
"cluster": "volcano_tts",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@ -1150,14 +1150,13 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
|
||||
"""
|
||||
# 读取配置
|
||||
doubaotts_cfg = getattr(config, "doubaotts", {}) or {}
|
||||
api_key = (doubaotts_cfg.get("api_key", "") or doubaotts_cfg.get("apikey", "")).strip()
|
||||
appid = doubaotts_cfg.get("appid", "")
|
||||
token = doubaotts_cfg.get("token", "")
|
||||
ak = doubaotts_cfg.get("ak", "")
|
||||
sk = doubaotts_cfg.get("sk", "")
|
||||
cluster = doubaotts_cfg.get("cluster", "volcano_tts")
|
||||
|
||||
if not appid or not token:
|
||||
logger.error("豆包语音 TTS 配置未完成")
|
||||
if not api_key and (not appid or not token):
|
||||
logger.error("豆包语音 TTS 配置未完成,请配置 API Key,或填写旧版 AppID 和 Token")
|
||||
return None
|
||||
|
||||
# 准备参数
|
||||
@ -1174,12 +1173,15 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
|
||||
pitch = doubaotts_cfg.get("pitch", 1.0)
|
||||
silence_duration = doubaotts_cfg.get("silence_duration", 0.125)
|
||||
|
||||
payload = {
|
||||
"app": {
|
||||
app_payload = {"cluster": cluster}
|
||||
if not api_key:
|
||||
app_payload.update({
|
||||
"appid": appid,
|
||||
"token": token,
|
||||
"cluster": cluster
|
||||
},
|
||||
})
|
||||
|
||||
payload = {
|
||||
"app": app_payload,
|
||||
"user": {
|
||||
"uid": "NarratoAI"
|
||||
},
|
||||
@ -1206,11 +1208,14 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.
|
||||
# API 地址
|
||||
url = "https://openspeech.bytedance.com/api/v1/tts"
|
||||
|
||||
# 构建请求头(使用Bearer Token认证)
|
||||
# 构建请求头。新版控制台优先使用 API Key,旧配置继续使用 Token。
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer;{token}"
|
||||
}
|
||||
if api_key:
|
||||
headers["X-Api-Key"] = api_key
|
||||
else:
|
||||
headers["Authorization"] = f"Bearer;{token}"
|
||||
|
||||
for i in range(3):
|
||||
try:
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
[app]
|
||||
project_version="0.7.8"
|
||||
project_version="0.8.4"
|
||||
|
||||
# LLM API 超时配置(秒)
|
||||
llm_vision_timeout = 120 # 视觉模型基础超时时间
|
||||
@ -221,10 +221,14 @@
|
||||
|
||||
[doubaotts]
|
||||
# 豆包语音 TTS 配置
|
||||
# 新版配置优先填写 API Key;旧版 appid/token 配置仍兼容
|
||||
# 申请流程:
|
||||
# 1. 打开 https://console.volcengine.com/iam/keymanage 新建 Access Key 和 Secret Key
|
||||
# 2. 打开 https://www.volcengine.com/product/voice-tech 点击立即使用
|
||||
# 3. 在 API 服务中心找到音频生成下面的语音合成,获取 APPID 和 Token
|
||||
# 1. 打开火山引擎豆包语音控制台
|
||||
# 2. 进入 API Key 管理并创建 API Key
|
||||
# 3. 确认已开通豆包语音合成服务
|
||||
api_key = ""
|
||||
|
||||
# 旧版配置(兼容保留)
|
||||
ak = ""
|
||||
sk = ""
|
||||
appid = ""
|
||||
|
||||
@ -1 +1 @@
|
||||
0.8.3
|
||||
0.8.4
|
||||
|
||||
@ -1464,42 +1464,55 @@ def render_omnivoice_tts_settings(tr):
|
||||
|
||||
def render_doubaotts_settings(tr):
|
||||
"""渲染豆包语音 TTS 设置"""
|
||||
# AK 输入
|
||||
ak = st.text_input(
|
||||
"Access Key",
|
||||
value=config.doubaotts.get("ak", ""),
|
||||
help=tr("Volcengine Access Key Help")
|
||||
)
|
||||
|
||||
# SK 输入
|
||||
sk = st.text_input(
|
||||
"Secret Key",
|
||||
value=config.doubaotts.get("sk", ""),
|
||||
api_key = st.text_input(
|
||||
"API Key",
|
||||
value=config.doubaotts.get("api_key", ""),
|
||||
type="password",
|
||||
help=tr("Volcengine Secret Key Help")
|
||||
help=tr("Doubao API Key Help")
|
||||
)
|
||||
ak = config.doubaotts.get("ak", "")
|
||||
sk = config.doubaotts.get("sk", "")
|
||||
appid = config.doubaotts.get("appid", "")
|
||||
token = config.doubaotts.get("token", "")
|
||||
cluster = config.doubaotts.get("cluster", "volcano_tts")
|
||||
|
||||
# AppID 输入
|
||||
appid = st.text_input(
|
||||
"AppID",
|
||||
value=config.doubaotts.get("appid", ""),
|
||||
help=tr("Doubao AppID Help")
|
||||
)
|
||||
with st.expander(tr("Doubao Legacy Credentials"), expanded=False):
|
||||
# AK 输入
|
||||
ak = st.text_input(
|
||||
"Access Key",
|
||||
value=ak,
|
||||
help=tr("Volcengine Access Key Help")
|
||||
)
|
||||
|
||||
# Token 输入
|
||||
token = st.text_input(
|
||||
"Token",
|
||||
value=config.doubaotts.get("token", ""),
|
||||
type="password",
|
||||
help=tr("Doubao Token Help")
|
||||
)
|
||||
# SK 输入
|
||||
sk = st.text_input(
|
||||
"Secret Key",
|
||||
value=sk,
|
||||
type="password",
|
||||
help=tr("Volcengine Secret Key Help")
|
||||
)
|
||||
|
||||
# 集群配置
|
||||
cluster = st.text_input(
|
||||
tr("Cluster"),
|
||||
value=config.doubaotts.get("cluster", "volcano_tts"),
|
||||
help=tr("Doubao Cluster Help")
|
||||
)
|
||||
# AppID 输入
|
||||
appid = st.text_input(
|
||||
"AppID",
|
||||
value=appid,
|
||||
help=tr("Doubao AppID Help")
|
||||
)
|
||||
|
||||
# Token 输入
|
||||
token = st.text_input(
|
||||
"Token",
|
||||
value=token,
|
||||
type="password",
|
||||
help=tr("Doubao Token Help")
|
||||
)
|
||||
|
||||
# 集群配置
|
||||
cluster = st.text_input(
|
||||
tr("Cluster"),
|
||||
value=cluster,
|
||||
help=tr("Doubao Cluster Help")
|
||||
)
|
||||
|
||||
# 音色选择
|
||||
# 在线音色列表(从文档中提取)
|
||||
@ -1676,6 +1689,7 @@ def render_doubaotts_settings(tr):
|
||||
st.info(tr("Doubao TTS Fill Credentials Notice"))
|
||||
|
||||
# 保存配置
|
||||
config.doubaotts["api_key"] = api_key
|
||||
config.doubaotts["ak"] = ak
|
||||
config.doubaotts["sk"] = sk
|
||||
config.doubaotts["appid"] = appid
|
||||
@ -1690,20 +1704,10 @@ def render_doubaotts_settings(tr):
|
||||
st.session_state['voice_rate'] = voice_rate # 确保语速参数被保存到session state
|
||||
|
||||
# 显示配置状态
|
||||
if ak and sk and appid and token:
|
||||
if api_key or (appid and token):
|
||||
st.success(tr("Doubao TTS configured"))
|
||||
else:
|
||||
missing = []
|
||||
if not ak:
|
||||
missing.append("Access Key")
|
||||
if not sk:
|
||||
missing.append("Secret Key")
|
||||
if not appid:
|
||||
missing.append("AppID")
|
||||
if not token:
|
||||
missing.append("Token")
|
||||
if missing:
|
||||
st.warning(tr("Please configure missing fields").format(fields=', '.join(missing)))
|
||||
st.warning(tr("Please configure missing fields").format(fields="API Key / AppID + Token"))
|
||||
|
||||
|
||||
def render_voice_preview_new(tr, selected_engine):
|
||||
|
||||
@ -652,6 +652,8 @@
|
||||
"OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration",
|
||||
"Volcengine Access Key Help": "Volcengine Access Key",
|
||||
"Volcengine Secret Key Help": "Volcengine Secret Key",
|
||||
"Doubao API Key Help": "New Doubao Speech API Key. This field is preferred and does not require AppID or Token.",
|
||||
"Doubao Legacy Credentials": "Legacy AppID / Token Credentials",
|
||||
"Doubao AppID Help": "Doubao TTS application AppID",
|
||||
"Doubao Token Help": "Doubao TTS application Token",
|
||||
"Cluster": "Cluster",
|
||||
@ -664,13 +666,13 @@
|
||||
"Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)",
|
||||
"Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process",
|
||||
"Application Steps": "Application Steps",
|
||||
"Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
|
||||
"Doubao TTS Step 2": "2. Create a new Access Key and Secret Key",
|
||||
"Doubao TTS Step 3": "3. Open [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)",
|
||||
"Doubao TTS Step 4": "4. Click Start Now",
|
||||
"Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)",
|
||||
"Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token",
|
||||
"Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.",
|
||||
"Doubao TTS Step 1": "1. Open the Volcengine Doubao Speech console",
|
||||
"Doubao TTS Step 2": "2. Open API Key management and create an API Key",
|
||||
"Doubao TTS Step 3": "3. Make sure Doubao speech synthesis is enabled",
|
||||
"Doubao TTS Step 4": "4. Copy the API Key into the API Key field above",
|
||||
"Doubao TTS Step 5": "5. The default cluster is volcano_tts and usually does not need changes",
|
||||
"Doubao TTS Step 6": "6. Legacy AppID/Token users can keep using the compatibility fields",
|
||||
"Doubao TTS Fill Credentials Notice": "The new setup only requires an API Key. Legacy AppID/Token credentials remain supported.",
|
||||
"Doubao TTS configured": "Doubao TTS is configured",
|
||||
"Please configure missing fields": "Please configure: {fields}",
|
||||
"Preview Voice Synthesis": "Preview Voice Synthesis",
|
||||
|
||||
@ -591,6 +591,8 @@
|
||||
"OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**:只需要设置 API 地址和语言,可直接合成。\n2. **指令音色**:填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**:上传或选择参考音频,并填写该音频对应文本。\n\n**注意事项**:\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频,系统会按音频时长估算字幕段落",
|
||||
"Volcengine Access Key Help": "火山引擎 Access Key",
|
||||
"Volcengine Secret Key Help": "火山引擎 Secret Key",
|
||||
"Doubao API Key Help": "新版豆包语音 API Key;优先使用该字段,无需填写 AppID 和 Token",
|
||||
"Doubao Legacy Credentials": "旧版 AppID / Token 配置(兼容)",
|
||||
"Doubao AppID Help": "豆包语音应用 AppID",
|
||||
"Doubao Token Help": "豆包语音应用 Token",
|
||||
"Cluster": "集群",
|
||||
@ -603,13 +605,13 @@
|
||||
"Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)",
|
||||
"Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程",
|
||||
"Application Steps": "申请步骤",
|
||||
"Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)",
|
||||
"Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key",
|
||||
"Doubao TTS Step 3": "3. 打开 [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)",
|
||||
"Doubao TTS Step 4": "4. 点击立即使用",
|
||||
"Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成(注意:是语音合成,不是语音合成大模型)",
|
||||
"Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token",
|
||||
"Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中",
|
||||
"Doubao TTS Step 1": "1. 打开火山引擎豆包语音控制台",
|
||||
"Doubao TTS Step 2": "2. 进入 API Key 管理并创建 API Key",
|
||||
"Doubao TTS Step 3": "3. 确认已开通豆包语音合成服务",
|
||||
"Doubao TTS Step 4": "4. 复制 API Key 并填写到上方 API Key 输入框",
|
||||
"Doubao TTS Step 5": "5. 默认集群使用 volcano_tts,通常无需修改",
|
||||
"Doubao TTS Step 6": "6. 旧版 AppID/Token 用户可继续在兼容配置中填写原凭据",
|
||||
"Doubao TTS Fill Credentials Notice": "新版配置只需要填写 API Key;旧版 AppID/Token 仍保留兼容",
|
||||
"Doubao TTS configured": "豆包语音 TTS 配置已设置",
|
||||
"Please configure missing fields": "请配置: {fields}",
|
||||
"Preview Voice Synthesis": "试听语音合成",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user