diff --git a/README-en.md b/README-en.md index 9e9d481..d6082a7 100644 --- a/README-en.md +++ b/README-en.md @@ -33,6 +33,7 @@ NarratoAI is an automated video narration tool that provides an all-in-one solut ## Latest News +- 2026.07.02 Released version 0.8.4 with Doubao TTS API Key setup and legacy credential compatibility - 2026.04.03 Released version 0.7.8, refactored the documentary frame-analysis pipeline with a shared service and improved extraction, caching, vision batching, and narration generation - 2025.05.11 Released new version 0.6.0, supports **short drama commentary** and optimized editing process - 2025.03.06 Released new version 0.5.2, supports DeepSeek R1 and DeepSeek V3 models for short drama mixing diff --git a/README.md b/README.md index 515ab26..fdfca96 100644 --- a/README.md +++ b/README.md @@ -41,6 +41,7 @@ NarratoAI 是一款自动化影视解说工具,基于 LLM 实现文案撰写 本项目仅供学习和研究使用,不得商用。如需商业授权,请联系作者。 ## 最新资讯 +- 2026.07.02 发布新版本 0.8.4,升级豆包语音 TTS 新版 API Key 配置并保留旧版凭据兼容 - 2026.06.10 发布新版本 0.8.1,**大版本更新**,优化多个核心流程 - 2026.04.27 发布新版本 0.7.9,新增 **Fun-ASR一键转录字幕** - 2026.04.03 发布新版本 0.7.8,重构纪录片逐帧分析链路,统一共享服务并优化抽帧、缓存、视觉并发与文案生成流程 diff --git a/app/services/test_doubaotts_tts_unittest.py b/app/services/test_doubaotts_tts_unittest.py new file mode 100644 index 0000000..6f7c122 --- /dev/null +++ b/app/services/test_doubaotts_tts_unittest.py @@ -0,0 +1,116 @@ +import base64 +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from app.services import voice + + +class FakeDoubaoResponse: + status_code = 200 + text = "OK" + + def json(self): + return { + "code": 3000, + "data": base64.b64encode(b"mp3-bytes").decode("ascii"), + } + + +class DoubaoTtsTests(unittest.TestCase): + def setUp(self): + self.original_doubaotts = dict(voice.config.doubaotts) + self.original_proxy = dict(voice.config.proxy) + + def tearDown(self): + voice.config.doubaotts.clear() + voice.config.doubaotts.update(self.original_doubaotts) + voice.config.proxy.clear() + voice.config.proxy.update(self.original_proxy) + + def test_api_key_auth_does_not_require_legacy_appid_or_token(self): + voice.config.doubaotts.clear() + voice.config.doubaotts.update( + { + "api_key": "db-api-key", + "cluster": "volcano_tts", + "volume": 1.2, + "pitch": 0.9, + "silence_duration": 0.25, + } + ) + voice.config.proxy.clear() + voice.config.proxy.update({"enabled": False}) + + with tempfile.TemporaryDirectory() as temp_dir: + output_file = Path(temp_dir) / "doubao.mp3" + sub_maker = object() + + with patch("requests.post", return_value=FakeDoubaoResponse()) as post, patch( + "app.services.voice.new_sub_maker", return_value=sub_maker + ): + result = voice.doubaotts_tts( + text=" 你好,豆包新版鉴权。 ", + voice_name="BV700_V2_streaming", + voice_file=str(output_file), + speed=1.25, + ) + output_bytes = output_file.read_bytes() if output_file.exists() else b"" + + self.assertIs(result, sub_maker) + self.assertEqual(output_bytes, b"mp3-bytes") + + _, kwargs = post.call_args + self.assertEqual(kwargs["headers"]["X-Api-Key"], "db-api-key") + self.assertNotIn("Authorization", kwargs["headers"]) + self.assertEqual(kwargs["json"]["app"], {"cluster": "volcano_tts"}) + self.assertEqual(kwargs["json"]["request"]["text"], "你好,豆包新版鉴权。") + self.assertEqual(kwargs["json"]["audio"]["voice_type"], "BV700_V2_streaming") + self.assertEqual(kwargs["json"]["audio"]["speed_ratio"], 1.25) + self.assertEqual(kwargs["json"]["audio"]["volume_ratio"], 1.2) + self.assertEqual(kwargs["json"]["audio"]["pitch_ratio"], 0.9) + self.assertEqual(kwargs["json"]["audio"]["silence_duration"], 0.25) + + def test_legacy_token_auth_still_sends_appid_and_token(self): + voice.config.doubaotts.clear() + voice.config.doubaotts.update( + { + "appid": "legacy-appid", + "token": "legacy-token", + "cluster": "volcano_tts", + } + ) + voice.config.proxy.clear() + voice.config.proxy.update({"enabled": False}) + + with tempfile.TemporaryDirectory() as temp_dir: + output_file = Path(temp_dir) / "doubao.mp3" + + with patch("requests.post", return_value=FakeDoubaoResponse()) as post: + result = voice.doubaotts_tts( + text="旧版鉴权仍然可用", + voice_name="BV700_streaming", + voice_file=str(output_file), + speed=1.0, + ) + output_bytes = output_file.read_bytes() + + self.assertIsNotNone(result) + self.assertEqual(output_bytes, b"mp3-bytes") + + _, kwargs = post.call_args + self.assertEqual(kwargs["headers"]["Authorization"], "Bearer;legacy-token") + self.assertNotIn("X-Api-Key", kwargs["headers"]) + self.assertEqual( + kwargs["json"]["app"], + { + "appid": "legacy-appid", + "token": "legacy-token", + "cluster": "volcano_tts", + }, + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/app/services/voice.py b/app/services/voice.py index 476c2fe..d003151 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -1150,14 +1150,13 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1. """ # 读取配置 doubaotts_cfg = getattr(config, "doubaotts", {}) or {} + api_key = (doubaotts_cfg.get("api_key", "") or doubaotts_cfg.get("apikey", "")).strip() appid = doubaotts_cfg.get("appid", "") token = doubaotts_cfg.get("token", "") - ak = doubaotts_cfg.get("ak", "") - sk = doubaotts_cfg.get("sk", "") cluster = doubaotts_cfg.get("cluster", "volcano_tts") - if not appid or not token: - logger.error("豆包语音 TTS 配置未完成") + if not api_key and (not appid or not token): + logger.error("豆包语音 TTS 配置未完成,请配置 API Key,或填写旧版 AppID 和 Token") return None # 准备参数 @@ -1174,12 +1173,15 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1. pitch = doubaotts_cfg.get("pitch", 1.0) silence_duration = doubaotts_cfg.get("silence_duration", 0.125) - payload = { - "app": { + app_payload = {"cluster": cluster} + if not api_key: + app_payload.update({ "appid": appid, "token": token, - "cluster": cluster - }, + }) + + payload = { + "app": app_payload, "user": { "uid": "NarratoAI" }, @@ -1206,11 +1208,14 @@ def doubaotts_tts(text: str, voice_name: str, voice_file: str, speed: float = 1. # API 地址 url = "https://openspeech.bytedance.com/api/v1/tts" - # 构建请求头(使用Bearer Token认证) + # 构建请求头。新版控制台优先使用 API Key,旧配置继续使用 Token。 headers = { "Content-Type": "application/json", - "Authorization": f"Bearer;{token}" } + if api_key: + headers["X-Api-Key"] = api_key + else: + headers["Authorization"] = f"Bearer;{token}" for i in range(3): try: diff --git a/config.example.toml b/config.example.toml index c80a3dc..db6f34d 100644 --- a/config.example.toml +++ b/config.example.toml @@ -1,5 +1,5 @@ [app] - project_version="0.7.8" + project_version="0.8.4" # LLM API 超时配置(秒) llm_vision_timeout = 120 # 视觉模型基础超时时间 @@ -221,10 +221,14 @@ [doubaotts] # 豆包语音 TTS 配置 + # 新版配置优先填写 API Key;旧版 appid/token 配置仍兼容 # 申请流程: - # 1. 打开 https://console.volcengine.com/iam/keymanage 新建 Access Key 和 Secret Key - # 2. 打开 https://www.volcengine.com/product/voice-tech 点击立即使用 - # 3. 在 API 服务中心找到音频生成下面的语音合成,获取 APPID 和 Token + # 1. 打开火山引擎豆包语音控制台 + # 2. 进入 API Key 管理并创建 API Key + # 3. 确认已开通豆包语音合成服务 + api_key = "" + + # 旧版配置(兼容保留) ak = "" sk = "" appid = "" diff --git a/project_version b/project_version index fab77af..b60d719 100644 --- a/project_version +++ b/project_version @@ -1 +1 @@ -0.8.3 \ No newline at end of file +0.8.4 diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index 31464de..dd64434 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -1464,42 +1464,55 @@ def render_omnivoice_tts_settings(tr): def render_doubaotts_settings(tr): """渲染豆包语音 TTS 设置""" - # AK 输入 - ak = st.text_input( - "Access Key", - value=config.doubaotts.get("ak", ""), - help=tr("Volcengine Access Key Help") - ) - - # SK 输入 - sk = st.text_input( - "Secret Key", - value=config.doubaotts.get("sk", ""), + api_key = st.text_input( + "API Key", + value=config.doubaotts.get("api_key", ""), type="password", - help=tr("Volcengine Secret Key Help") + help=tr("Doubao API Key Help") ) + ak = config.doubaotts.get("ak", "") + sk = config.doubaotts.get("sk", "") + appid = config.doubaotts.get("appid", "") + token = config.doubaotts.get("token", "") + cluster = config.doubaotts.get("cluster", "volcano_tts") - # AppID 输入 - appid = st.text_input( - "AppID", - value=config.doubaotts.get("appid", ""), - help=tr("Doubao AppID Help") - ) + with st.expander(tr("Doubao Legacy Credentials"), expanded=False): + # AK 输入 + ak = st.text_input( + "Access Key", + value=ak, + help=tr("Volcengine Access Key Help") + ) - # Token 输入 - token = st.text_input( - "Token", - value=config.doubaotts.get("token", ""), - type="password", - help=tr("Doubao Token Help") - ) + # SK 输入 + sk = st.text_input( + "Secret Key", + value=sk, + type="password", + help=tr("Volcengine Secret Key Help") + ) - # 集群配置 - cluster = st.text_input( - tr("Cluster"), - value=config.doubaotts.get("cluster", "volcano_tts"), - help=tr("Doubao Cluster Help") - ) + # AppID 输入 + appid = st.text_input( + "AppID", + value=appid, + help=tr("Doubao AppID Help") + ) + + # Token 输入 + token = st.text_input( + "Token", + value=token, + type="password", + help=tr("Doubao Token Help") + ) + + # 集群配置 + cluster = st.text_input( + tr("Cluster"), + value=cluster, + help=tr("Doubao Cluster Help") + ) # 音色选择 # 在线音色列表(从文档中提取) @@ -1676,6 +1689,7 @@ def render_doubaotts_settings(tr): st.info(tr("Doubao TTS Fill Credentials Notice")) # 保存配置 + config.doubaotts["api_key"] = api_key config.doubaotts["ak"] = ak config.doubaotts["sk"] = sk config.doubaotts["appid"] = appid @@ -1690,20 +1704,10 @@ def render_doubaotts_settings(tr): st.session_state['voice_rate'] = voice_rate # 确保语速参数被保存到session state # 显示配置状态 - if ak and sk and appid and token: + if api_key or (appid and token): st.success(tr("Doubao TTS configured")) else: - missing = [] - if not ak: - missing.append("Access Key") - if not sk: - missing.append("Secret Key") - if not appid: - missing.append("AppID") - if not token: - missing.append("Token") - if missing: - st.warning(tr("Please configure missing fields").format(fields=', '.join(missing))) + st.warning(tr("Please configure missing fields").format(fields="API Key / AppID + Token")) def render_voice_preview_new(tr, selected_engine): diff --git a/webui/i18n/en.json b/webui/i18n/en.json index 00dedb5..306a738 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -652,6 +652,8 @@ "OmniVoice Usage Instructions": "**OmniVoice-Pack speech synthesis**\n\n1. **Automatic voice**: set the API URL and language, then synthesize directly.\n2. **Voice design**: fill instruct with the desired gender, pitch, accent, or style.\n3. **Reference-audio clone**: upload or choose reference audio and fill its matching transcript.\n\n**Notes**:\n- The default service URL is http://127.0.0.1:7866/tts\n- Reference-audio cloning requires reference text when the service has no ASR model loaded\n- OmniVoice returns WAV audio, and NarratoAI estimates subtitle segment timing from the audio duration", "Volcengine Access Key Help": "Volcengine Access Key", "Volcengine Secret Key Help": "Volcengine Secret Key", + "Doubao API Key Help": "New Doubao Speech API Key. This field is preferred and does not require AppID or Token.", + "Doubao Legacy Credentials": "Legacy AppID / Token Credentials", "Doubao AppID Help": "Doubao TTS application AppID", "Doubao Token Help": "Doubao TTS application Token", "Cluster": "Cluster", @@ -664,13 +666,13 @@ "Sentence Silence Duration Help": "Adjust sentence-end silence duration (0.0-2.0 seconds)", "Doubao TTS API Key Application Process": "Doubao TTS API Key Application Process", "Application Steps": "Application Steps", - "Doubao TTS Step 1": "1. Open [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)", - "Doubao TTS Step 2": "2. Create a new Access Key and Secret Key", - "Doubao TTS Step 3": "3. Open [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)", - "Doubao TTS Step 4": "4. Click Start Now", - "Doubao TTS Step 5": "5. In the left API Service Center, find Speech Synthesis under Audio Generation (note: Speech Synthesis, not the speech synthesis large model)", - "Doubao TTS Step 6": "6. Scroll to the bottom to get the APPID and Access Token", - "Doubao TTS Fill Credentials Notice": "Fill the Access Key, Secret Key, AppID, and Token above.", + "Doubao TTS Step 1": "1. Open the Volcengine Doubao Speech console", + "Doubao TTS Step 2": "2. Open API Key management and create an API Key", + "Doubao TTS Step 3": "3. Make sure Doubao speech synthesis is enabled", + "Doubao TTS Step 4": "4. Copy the API Key into the API Key field above", + "Doubao TTS Step 5": "5. The default cluster is volcano_tts and usually does not need changes", + "Doubao TTS Step 6": "6. Legacy AppID/Token users can keep using the compatibility fields", + "Doubao TTS Fill Credentials Notice": "The new setup only requires an API Key. Legacy AppID/Token credentials remain supported.", "Doubao TTS configured": "Doubao TTS is configured", "Please configure missing fields": "Please configure: {fields}", "Preview Voice Synthesis": "Preview Voice Synthesis", diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index b711fc0..fbfbd66 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -591,6 +591,8 @@ "OmniVoice Usage Instructions": "**OmniVoice-Pack 语音合成**\n\n1. **自动音色**:只需要设置 API 地址和语言,可直接合成。\n2. **指令音色**:填写 instruct 描述想要的性别、音高、口音或风格。\n3. **参考音频克隆**:上传或选择参考音频,并填写该音频对应文本。\n\n**注意事项**:\n- 当前默认服务地址为 http://127.0.0.1:7866/tts\n- 参考音频克隆在服务未加载 ASR 模型时必须填写参考文本\n- OmniVoice 返回 WAV 音频,系统会按音频时长估算字幕段落", "Volcengine Access Key Help": "火山引擎 Access Key", "Volcengine Secret Key Help": "火山引擎 Secret Key", + "Doubao API Key Help": "新版豆包语音 API Key;优先使用该字段,无需填写 AppID 和 Token", + "Doubao Legacy Credentials": "旧版 AppID / Token 配置(兼容)", "Doubao AppID Help": "豆包语音应用 AppID", "Doubao Token Help": "豆包语音应用 Token", "Cluster": "集群", @@ -603,13 +605,13 @@ "Sentence Silence Duration Help": "调节句尾静音时长 (0.0-2.0 秒)", "Doubao TTS API Key Application Process": "豆包语音 TTS API Key申请流程", "Application Steps": "申请步骤", - "Doubao TTS Step 1": "1. 打开 [https://console.volcengine.com/iam/keymanage](https://console.volcengine.com/iam/keymanage)", - "Doubao TTS Step 2": "2. 新建 Access Key 和 Secret Key", - "Doubao TTS Step 3": "3. 打开 [https://www.volcengine.com/product/voice-tech](https://www.volcengine.com/product/voice-tech)", - "Doubao TTS Step 4": "4. 点击立即使用", - "Doubao TTS Step 5": "5. 在最左边的 API 服务中心找到音频生成下面的语音合成(注意:是语音合成,不是语音合成大模型)", - "Doubao TTS Step 6": "6. 翻到最下面获取 APPID 和 Access Token", - "Doubao TTS Fill Credentials Notice": "请将获取到的 Access Key、Secret Key、AppID 和 Token 填写到上方的配置中", + "Doubao TTS Step 1": "1. 打开火山引擎豆包语音控制台", + "Doubao TTS Step 2": "2. 进入 API Key 管理并创建 API Key", + "Doubao TTS Step 3": "3. 确认已开通豆包语音合成服务", + "Doubao TTS Step 4": "4. 复制 API Key 并填写到上方 API Key 输入框", + "Doubao TTS Step 5": "5. 默认集群使用 volcano_tts,通常无需修改", + "Doubao TTS Step 6": "6. 旧版 AppID/Token 用户可继续在兼容配置中填写原凭据", + "Doubao TTS Fill Credentials Notice": "新版配置只需要填写 API Key;旧版 AppID/Token 仍保留兼容", "Doubao TTS configured": "豆包语音 TTS 配置已设置", "Please configure missing fields": "请配置: {fields}", "Preview Voice Synthesis": "试听语音合成",