From e1f45db95a330549bedd1e1d0dd6c537b9aab847 Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 04:26:42 +0800 Subject: [PATCH 01/10] =?UTF-8?q?feat(tts):=20=E6=B7=BB=E5=8A=A0=20SoulVoi?= =?UTF-8?q?ce=20TTS=20=E5=BC=95=E6=93=8E=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 实现 SoulVoice TTS 引擎集成,包括配置管理、语音选择、API 调用和字幕处理 新增 SoulVoice 配置项和示例配置 修改音频设置面板以支持 SoulVoice 选项 优化音频时长计算和异常处理 更新多语言文案以反映 SoulVoice 支持 --- app/config/config.py | 2 + app/services/clip_video.py | 43 +++++ app/services/voice.py | 228 +++++++++++++++++++++++++- config.example.toml | 15 ++ webui/components/audio_settings.py | 153 ++++++++++++++--- webui/components/subtitle_settings.py | 35 +++- webui/i18n/en.json | 2 +- webui/i18n/zh.json | 2 +- 8 files changed, 440 insertions(+), 40 deletions(-) diff --git a/app/config/config.py b/app/config/config.py index 4b2b0b4..ceb8f11 100644 --- a/app/config/config.py +++ b/app/config/config.py @@ -48,6 +48,7 @@ def save_config(): with open(config_file, "w", encoding="utf-8") as f: _cfg["app"] = app _cfg["azure"] = azure + _cfg["soulvoice"] = soulvoice _cfg["ui"] = ui f.write(toml.dumps(_cfg)) @@ -57,6 +58,7 @@ app = _cfg.get("app", {}) whisper = _cfg.get("whisper", {}) proxy = _cfg.get("proxy", {}) azure = _cfg.get("azure", {}) +soulvoice = _cfg.get("soulvoice", {}) ui = _cfg.get("ui", {}) frames = _cfg.get("frames", {}) diff --git a/app/services/clip_video.py b/app/services/clip_video.py index 1a0e8e1..1c5fddf 100644 --- a/app/services/clip_video.py +++ b/app/services/clip_video.py @@ -613,6 +613,49 @@ def clip_video( # 根据持续时间计算真正的结束时间(加上1秒余量) duration = item["duration"] + + # 时长合理性检查和修正 + if duration <= 0 or duration > 300: # 超过5分钟认为不合理 + logger.warning(f"检测到异常时长 {duration}秒,片段: {timestamp}") + + # 尝试从时间戳计算实际时长 + try: + start_time_str, end_time_str = timestamp.split('-') + + # 解析开始时间 + if ',' in start_time_str: + time_part, ms_part = start_time_str.split(',') + h1, m1, s1 = map(int, time_part.split(':')) + ms1 = int(ms_part) + else: + h1, m1, s1 = map(int, start_time_str.split(':')) + ms1 = 0 + + # 解析结束时间 + if ',' in end_time_str: + time_part, ms_part = end_time_str.split(',') + h2, m2, s2 = map(int, time_part.split(':')) + ms2 = int(ms_part) + else: + h2, m2, s2 = map(int, end_time_str.split(':')) + ms2 = 0 + + # 计算实际时长 + start_total_ms = (h1 * 3600 + m1 * 60 + s1) * 1000 + ms1 + end_total_ms = (h2 * 3600 + m2 * 60 + s2) * 1000 + ms2 + actual_duration = (end_total_ms - start_total_ms) / 1000.0 + + if actual_duration > 0 and actual_duration <= 300: + duration = actual_duration + logger.info(f"使用时间戳计算的实际时长: {duration:.3f}秒") + else: + duration = 5.0 # 默认5秒 + logger.warning(f"时间戳计算也异常,使用默认时长: {duration}秒") + + except Exception as e: + duration = 5.0 # 默认5秒 + logger.warning(f"时长修正失败,使用默认时长: {duration}秒, 错误: {str(e)}") + calculated_end_time = calculate_end_time(start_time, duration) # 转换为FFmpeg兼容的时间格式(逗号替换为点) diff --git a/app/services/voice.py b/app/services/voice.py index 31f6d66..d45db75 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -4,19 +4,42 @@ import json import traceback import edge_tts import asyncio +import requests from loguru import logger -from typing import List, Union +from typing import List, Union, Tuple from datetime import datetime from xml.sax.saxutils import unescape from edge_tts import submaker, SubMaker -from edge_tts.submaker import mktimestamp +# from edge_tts.submaker import mktimestamp # 函数可能不存在,我们自己实现 from moviepy.video.tools import subtitles +try: + from moviepy import AudioFileClip + MOVIEPY_AVAILABLE = True +except ImportError: + MOVIEPY_AVAILABLE = False + logger.warning("moviepy 未安装,将使用估算方法计算音频时长") import time from app.config import config from app.utils import utils +def mktimestamp(time_seconds: float) -> str: + """ + 将秒数转换为 SRT 时间戳格式 + + Args: + time_seconds: 时间(秒) + + Returns: + str: SRT 格式的时间戳,如 "00:01:23.456" + """ + hours = int(time_seconds // 3600) + minutes = int((time_seconds % 3600) // 60) + seconds = time_seconds % 60 + return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}" + + def get_all_azure_voices(filter_locals=None) -> list[str]: if filter_locals is None: filter_locals = ["zh-CN", "en-US", "zh-HK", "zh-TW", "vi-VN"] @@ -1038,8 +1061,15 @@ def is_azure_v2_voice(voice_name: str): def tts( text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str ) -> Union[SubMaker, None]: + # 检查是否为 SoulVoice 引擎 + if is_soulvoice_voice(voice_name): + return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) + + # 检查是否为 Azure V2 引擎 if is_azure_v2_voice(voice_name): return azure_tts_v2(text, voice_name, voice_file) + + # 默认使用 Azure V1 引擎 return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) @@ -1368,6 +1398,10 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) if start_time < 0: start_time = _start_time + # 将 100纳秒单位转换为秒 + start_time_seconds = start_time / 10000000 + end_time_seconds = end_time / 10000000 + sub = unescape(sub) sub_line += sub sub_text = match_line(sub_line, sub_index) @@ -1375,8 +1409,8 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) sub_index += 1 line = formatter( idx=sub_index, - start_time=start_time, - end_time=end_time, + start_time=start_time_seconds, + end_time=end_time_seconds, sub_text=sub_text, ) sub_items.append(line) @@ -1402,9 +1436,13 @@ def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str) f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}" f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}" ) + # 返回默认值,避免 None 错误 + return subtitle_file, 3.0 except Exception as e: logger.error(f"failed, error: {str(e)}") + # 返回默认值,避免 None 错误 + return subtitle_file, 3.0 def get_audio_duration(sub_maker: submaker.SubMaker): @@ -1453,8 +1491,21 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f f"或者使用其他 tts 引擎") continue else: - # 为当前片段生成字幕文件 - _, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file) + # SoulVoice 引擎不生成字幕文件 + if is_soulvoice_voice(voice_name): + # 获取实际音频文件的时长 + duration = get_audio_duration_from_file(audio_file) + if duration <= 0: + # 如果无法获取文件时长,尝试从 SubMaker 获取 + duration = get_audio_duration(sub_maker) + if duration <= 0: + # 最后的 fallback,基于文本长度估算 + duration = max(1.0, len(text) / 3.0) + logger.warning(f"无法获取音频时长,使用文本估算: {duration:.2f}秒") + # 不创建字幕文件 + subtitle_file = "" + else: + _, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file) tts_results.append({ "_id": item['_id'], @@ -1467,3 +1518,168 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f logger.info(f"已生成音频文件: {audio_file}") return tts_results + + +def get_audio_duration_from_file(audio_file: str) -> float: + """ + 获取音频文件的时长(秒) + """ + if MOVIEPY_AVAILABLE: + try: + audio_clip = AudioFileClip(audio_file) + duration = audio_clip.duration + audio_clip.close() + return duration + except Exception as e: + logger.error(f"使用 moviepy 获取音频时长失败: {str(e)}") + + # Fallback: 使用更准确的估算方法 + try: + import os + file_size = os.path.getsize(audio_file) + + # 更准确的 MP3 时长估算 + # 假设 MP3 平均比特率为 128kbps = 16KB/s + # 但实际文件还包含头部信息,所以调整系数 + estimated_duration = max(1.0, file_size / 20000) # 调整为更保守的估算 + + # 对于中文语音,根据文本长度进行二次校正 + # 一般中文语音速度约为 3-4 字/秒 + logger.warning(f"使用文件大小估算音频时长: {estimated_duration:.2f}秒") + return estimated_duration + except Exception as e: + logger.error(f"获取音频时长失败: {str(e)}") + # 如果所有方法都失败,返回一个基于文本长度的估算 + return 3.0 # 默认3秒,避免返回0 + + +def is_soulvoice_voice(voice_name: str) -> bool: + """ + 检查是否为 SoulVoice 语音 + """ + return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:") + + +def parse_soulvoice_voice(voice_name: str) -> str: + """ + 解析 SoulVoice 语音名称 + 支持格式: + - soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + - speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + """ + if voice_name.startswith("soulvoice:"): + return voice_name[10:] # 移除 "soulvoice:" 前缀 + return voice_name + + +def soulvoice_tts(text: str, voice_name: str, voice_file: str, speed: float = 1.0) -> Union[SubMaker, None]: + """ + 使用 SoulVoice API 进行文本转语音 + + Args: + text: 要转换的文本 + voice_name: 语音名称 + voice_file: 输出音频文件路径 + speed: 语音速度 + + Returns: + SubMaker: 包含时间戳信息的字幕制作器,失败时返回 None + """ + # 获取配置 + api_key = config.soulvoice.get("api_key", "") + api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts") + default_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") + + if not api_key: + logger.error("SoulVoice API key 未配置") + return None + + # 解析语音名称 + parsed_voice = parse_soulvoice_voice(voice_name) + + # 准备请求数据 + headers = { + 'Authorization': f'Bearer {api_key}', + 'Content-Type': 'application/json' + } + + data = { + 'text': text.strip(), + 'model': default_model, + 'voice': parsed_voice, + 'speed': speed + } + + # 重试机制 + for attempt in range(3): + try: + logger.info(f"第 {attempt + 1} 次调用 SoulVoice API") + + # 设置代理 + proxies = {} + if config.proxy.get("http"): + proxies = { + 'http': config.proxy.get("http"), + 'https': config.proxy.get("https", config.proxy.get("http")) + } + + # 调用 API + response = requests.post( + api_url, + headers=headers, + json=data, + proxies=proxies, + timeout=60 + ) + + if response.status_code == 200: + # 保存音频文件 + with open(voice_file, 'wb') as f: + f.write(response.content) + + logger.info(f"SoulVoice TTS 成功生成音频: {voice_file}") + + # SoulVoice 不支持精确字幕生成,返回简单的 SubMaker 对象 + sub_maker = SubMaker() + sub_maker.subs = [text] # 整个文本作为一个段落 + sub_maker.offset = [(0, 0)] # 占位时间戳 + + return sub_maker + + else: + logger.error(f"SoulVoice API 调用失败: {response.status_code} - {response.text}") + + except requests.exceptions.Timeout: + logger.error(f"SoulVoice API 调用超时 (尝试 {attempt + 1}/3)") + except requests.exceptions.RequestException as e: + logger.error(f"SoulVoice API 网络错误: {str(e)} (尝试 {attempt + 1}/3)") + except Exception as e: + logger.error(f"SoulVoice TTS 处理错误: {str(e)} (尝试 {attempt + 1}/3)") + + if attempt < 2: # 不是最后一次尝试 + time.sleep(2) # 等待2秒后重试 + + logger.error("SoulVoice TTS 生成失败,已达到最大重试次数") + return None + + +def is_soulvoice_voice(voice_name: str) -> bool: + """ + 检查是否为 SoulVoice 语音 + """ + return voice_name.startswith("soulvoice:") or voice_name.startswith("speech:") + + +def parse_soulvoice_voice(voice_name: str) -> str: + """ + 解析 SoulVoice 语音名称 + 支持格式: + - soulvoice:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + - speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr + """ + if voice_name.startswith("soulvoice:"): + return voice_name[10:] # 移除 "soulvoice:" 前缀 + return voice_name + + + diff --git a/config.example.toml b/config.example.toml index c9ca75f..ddf529a 100644 --- a/config.example.toml +++ b/config.example.toml @@ -77,6 +77,21 @@ # webui界面是否显示配置项 hide_config = true +[azure] + # Azure TTS 配置 + speech_key = "" + speech_region = "" + +[soulvoice] + # SoulVoice TTS API 密钥 + api_key = "" + # 音色 URI(必需) + voice_uri = "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" + # API 接口地址(可选,默认值如下) + api_url = "https://tts.scsmtech.cn/tts" + # 默认模型(可选) + model = "FunAudioLLM/CosyVoice2-0.5B" + [proxy] # clash 默认地址:http://127.0.0.1:7890 http = "" diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index e422d48..b194e81 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -8,6 +8,17 @@ from app.utils import utils from webui.utils.cache import get_songs_cache +def get_soulvoice_voices(): + """获取 SoulVoice 语音列表""" + # 检查是否配置了 SoulVoice API key + api_key = config.soulvoice.get("api_key", "") + if not api_key: + return [] + + # 只返回一个 SoulVoice 选项,音色通过输入框自定义 + return ["soulvoice:custom"] + + def render_audio_panel(tr): """渲染音频设置面板""" with st.container(border=True): @@ -24,15 +35,24 @@ def render_tts_settings(tr): """渲染TTS(文本转语音)设置""" # 获取支持的语音列表 support_locales = ["zh-CN", "en-US"] - voices = voice.get_all_azure_voices(filter_locals=support_locales) + azure_voices = voice.get_all_azure_voices(filter_locals=support_locales) + + # 添加 SoulVoice 语音选项 + soulvoice_voices = get_soulvoice_voices() + + # 合并所有语音选项 + all_voices = azure_voices + soulvoice_voices # 创建友好的显示名称 - friendly_names = { - v: v.replace("Female", tr("Female")) - .replace("Male", tr("Male")) - .replace("Neural", "") - for v in voices - } + friendly_names = {} + + # Azure 语音的友好名称 + for v in azure_voices: + friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "") + + # SoulVoice 语音的友好名称 + for v in soulvoice_voices: + friendly_names[v] = "SoulVoice (自定义音色)" # 获取保存的语音设置 saved_voice_name = config.ui.get("voice_name", "") @@ -42,9 +62,9 @@ def render_tts_settings(tr): saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) else: # 如果没有保存的设置,选择与UI语言匹配的第一个语音 - for i, v in enumerate(voices): + for i, v in enumerate(all_voices): if (v.lower().startswith(st.session_state["ui_language"].lower()) - and "V2" not in v): + and "V2" not in v and not v.startswith("soulvoice:")): saved_voice_name_index = i break @@ -60,20 +80,84 @@ def render_tts_settings(tr): list(friendly_names.values()).index(selected_friendly_name) ] + # 如果选择的是 SoulVoice 自定义选项,使用配置的音色 URI + if voice_name == "soulvoice:custom": + custom_voice_uri = config.soulvoice.get("voice_uri", "") + if custom_voice_uri: + # 确保音色 URI 有正确的前缀 + if not custom_voice_uri.startswith("soulvoice:") and not custom_voice_uri.startswith("speech:"): + voice_name = f"soulvoice:{custom_voice_uri}" + else: + voice_name = custom_voice_uri if custom_voice_uri.startswith("soulvoice:") else f"soulvoice:{custom_voice_uri}" + # 保存设置 config.ui["voice_name"] = voice_name - # Azure V2语音特殊处理 - if voice.is_azure_v2_voice(voice_name): + # 根据语音类型渲染不同的设置 + if voice.is_soulvoice_voice(voice_name): + render_soulvoice_settings(tr) + elif voice.is_azure_v2_voice(voice_name): render_azure_v2_settings(tr) # 语音参数设置 - render_voice_parameters(tr) + render_voice_parameters(tr, voice_name) # 试听按钮 render_voice_preview(tr, voice_name) +def render_soulvoice_settings(tr): + """渲染 SoulVoice 语音设置""" + saved_api_key = config.soulvoice.get("api_key", "") + saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts") + saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") + saved_voice_uri = config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr") + + # API Key 输入 + api_key = st.text_input( + "SoulVoice API Key", + value=saved_api_key, + type="password", + help="请输入您的 SoulVoice API 密钥" + ) + + # 音色 URI 输入 + voice_uri = st.text_input( + "音色 URI", + value=saved_voice_uri, + help="请输入 SoulVoice 音色标识符,格式如:speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr", + placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" + ) + + # API URL 输入(可选) + with st.expander("高级设置", expanded=False): + api_url = st.text_input( + "API 地址", + value=saved_api_url, + help="SoulVoice API 接口地址" + ) + + model = st.text_input( + "模型名称", + value=saved_model, + help="使用的 TTS 模型" + ) + + # 保存配置 + config.soulvoice["api_key"] = api_key + config.soulvoice["voice_uri"] = voice_uri + config.soulvoice["api_url"] = api_url + config.soulvoice["model"] = model + + # 显示配置状态 + if api_key and voice_uri: + st.success("✅ SoulVoice 配置已设置") + elif not api_key: + st.warning("⚠️ 请配置 SoulVoice API Key") + elif not voice_uri: + st.warning("⚠️ 请配置音色 URI") + + def render_azure_v2_settings(tr): """渲染Azure V2语音设置""" saved_azure_speech_region = config.azure.get("speech_region", "") @@ -93,7 +177,7 @@ def render_azure_v2_settings(tr): config.azure["speech_key"] = azure_speech_key -def render_voice_parameters(tr): +def render_voice_parameters(tr, voice_name): """渲染语音参数设置""" # 音量 - 使用统一的默认值 voice_volume = st.slider( @@ -106,22 +190,41 @@ def render_voice_parameters(tr): ) st.session_state['voice_volume'] = voice_volume + # 检查是否为 SoulVoice 引擎 + is_soulvoice = voice.is_soulvoice_voice(voice_name) # 语速 - voice_rate = st.selectbox( - tr("Speech Rate"), - options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], - index=2, - ) + if is_soulvoice: + # SoulVoice 支持更精细的语速控制 + voice_rate = st.slider( + tr("Speech Rate"), + min_value=0.5, + max_value=2.0, + value=1.0, + step=0.1, + help="SoulVoice 语音速度控制" + ) + else: + # Azure TTS 使用预设选项 + voice_rate = st.selectbox( + tr("Speech Rate"), + options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], + index=2, + ) st.session_state['voice_rate'] = voice_rate - # 音调 - voice_pitch = st.selectbox( - tr("Speech Pitch"), - options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], - index=2, - ) - st.session_state['voice_pitch'] = voice_pitch + # 音调 - SoulVoice 不支持音调调节 + if not is_soulvoice: + voice_pitch = st.selectbox( + tr("Speech Pitch"), + options=[0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.5, 1.8, 2.0], + index=2, + ) + st.session_state['voice_pitch'] = voice_pitch + else: + # SoulVoice 不支持音调调节,设置默认值 + st.session_state['voice_pitch'] = 1.0 + st.info("ℹ️ SoulVoice 引擎不支持音调调节") def render_voice_preview(tr, voice_name): diff --git a/webui/components/subtitle_settings.py b/webui/components/subtitle_settings.py index ee27985..53b98c7 100644 --- a/webui/components/subtitle_settings.py +++ b/webui/components/subtitle_settings.py @@ -9,14 +9,35 @@ def render_subtitle_panel(tr): with st.container(border=True): st.write(tr("Subtitle Settings")) - # 启用字幕选项 - enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) - st.session_state['subtitle_enabled'] = enable_subtitles + # 检查是否选择了 SoulVoice 引擎 + from app.services import voice + current_voice = st.session_state.get('voice_name', '') + is_soulvoice = voice.is_soulvoice_voice(current_voice) - if enable_subtitles: - render_font_settings(tr) - render_position_settings(tr) - render_style_settings(tr) + if is_soulvoice: + # SoulVoice 引擎时显示禁用提示 + st.warning("⚠️ SoulVoice TTS 不支持精确字幕生成") + st.info("💡 建议使用专业剪辑工具(如剪映、PR等)手动添加字幕") + + # 强制禁用字幕 + st.session_state['subtitle_enabled'] = False + + # 显示禁用状态的复选框 + st.checkbox( + tr("Enable Subtitles"), + value=False, + disabled=True, + help="SoulVoice 引擎不支持字幕生成,请使用其他 TTS 引擎" + ) + else: + # 其他引擎正常显示字幕选项 + enable_subtitles = st.checkbox(tr("Enable Subtitles"), value=True) + st.session_state['subtitle_enabled'] = enable_subtitles + + if enable_subtitles: + render_font_settings(tr) + render_position_settings(tr) + render_style_settings(tr) def render_font_settings(tr): diff --git a/webui/i18n/en.json b/webui/i18n/en.json index 63a2c36..3a69807 100644 --- a/webui/i18n/en.json +++ b/webui/i18n/en.json @@ -29,7 +29,7 @@ "Clip Duration": "Maximum Clip Duration (Seconds) (**Not the total length of the video**, refers to the length of each **composite segment**)", "Number of Videos Generated Simultaneously": "Number of Videos Generated Simultaneously", "Audio Settings": "**Audio Settings**", - "Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY])", + "Speech Synthesis": "Speech Synthesis Voice (:red[**Keep consistent with the script language**. Note: V2 version performs better, but requires an API KEY; SoulVoice provides high-quality Chinese voices])", "Speech Region": "Service Region (:red[Required, [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Key": "API Key (:red[Required, either Key 1 or Key 2 is acceptable [Click to Get](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Volume": "Speech Volume (1.0 represents 100%)", diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index e028c9e..aad77e8 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -29,7 +29,7 @@ "Clip Duration": "视频片段最大时长(秒)(**不是视频总长度**,是指每个**合成片段**的长度)", "Number of Videos Generated Simultaneously": "同时生成视频数量", "Audio Settings": "**音频设置**", - "Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY])", + "Speech Synthesis": "朗读声音(:red[**与文案语言保持一致**。注意:V2版效果更好,但是需要API KEY;SoulVoice 提供高质量中文语音])", "Speech Region": "服务区域 (:red[必填,[点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Key": "API Key (:red[必填,密钥1 或 密钥2 均可 [点击获取](https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices)])", "Speech Volume": "朗读音量(1.0表示100%)", From cd1ee1441e9ffd7eca9fe15c2773f356b1b5e166 Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 16:34:13 +0800 Subject: [PATCH 02/10] =?UTF-8?q?feat(video):=20=E5=AE=9E=E7=8E=B0?= =?UTF-8?q?=E7=BB=9F=E4=B8=80=E8=A7=86=E9=A2=91=E8=A3=81=E5=89=AA=E7=AD=96?= =?UTF-8?q?=E7=95=A5=E5=B9=B6=E7=A7=BB=E9=99=A4=E6=97=A7=E9=80=BB=E8=BE=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 重构视频处理流程,引入基于OST类型的统一裁剪策略: - 新增 clip_video_unified 函数处理三种OST类型 - 移除预裁剪步骤和相关UI组件 - 优化任务处理流程,减少重复裁剪 - 添加详细的错误处理和日志记录 --- app/services/clip_video.py | 353 ++++++++++++++++++++++++++++ app/services/task.py | 245 ++++++++++++++++++- app/utils/utils.py | 6 + webui.py | 19 +- webui/components/script_settings.py | 30 +-- webui/i18n/zh.json | 1 - 6 files changed, 608 insertions(+), 46 deletions(-) diff --git a/app/services/clip_video.py b/app/services/clip_video.py index 1c5fddf..8574173 100644 --- a/app/services/clip_video.py +++ b/app/services/clip_video.py @@ -546,6 +546,359 @@ def try_fallback_encoding( return execute_simple_command(fallback_cmd, timestamp, "通用Fallback") +def _process_narration_only_segment( + video_origin_path: str, + script_item: Dict, + tts_map: Dict, + output_dir: str, + encoder_config: Dict, + hwaccel_args: List[str] +) -> Optional[str]: + """ + 处理OST=0的纯解说片段 + - 根据TTS音频时长动态裁剪 + - 移除原声,生成静音视频 + """ + _id = script_item["_id"] + timestamp = script_item["timestamp"] + + # 获取对应的TTS结果 + tts_item = tts_map.get(_id) + if not tts_item: + logger.error(f"未找到片段 {_id} 的TTS结果") + return None + + # 解析起始时间,使用TTS音频时长计算结束时间 + start_time, _ = parse_timestamp(timestamp) + duration = tts_item["duration"] + calculated_end_time = calculate_end_time(start_time, duration, extra_seconds=0) + + # 转换为FFmpeg兼容的时间格式 + ffmpeg_start_time = start_time.replace(',', '.') + ffmpeg_end_time = calculated_end_time.replace(',', '.') + + # 生成输出文件名 + safe_start_time = start_time.replace(':', '-').replace(',', '-') + safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-') + output_filename = f"ost0_vid_{safe_start_time}@{safe_end_time}.mp4" + output_path = os.path.join(output_dir, output_filename) + + # 构建FFmpeg命令 - 移除音频 + cmd = _build_ffmpeg_command_with_audio_control( + video_origin_path, output_path, ffmpeg_start_time, ffmpeg_end_time, + encoder_config, hwaccel_args, remove_audio=True + ) + + # 执行命令 + success = execute_ffmpeg_with_fallback( + cmd, timestamp, video_origin_path, output_path, + ffmpeg_start_time, ffmpeg_end_time + ) + + return output_path if success else None + + +def _process_original_audio_segment( + video_origin_path: str, + script_item: Dict, + output_dir: str, + encoder_config: Dict, + hwaccel_args: List[str] +) -> Optional[str]: + """ + 处理OST=1的纯原声片段 + - 严格按照脚本timestamp精确裁剪 + - 保持原声不变 + """ + _id = script_item["_id"] + timestamp = script_item["timestamp"] + + # 严格按照timestamp进行裁剪 + start_time, end_time = parse_timestamp(timestamp) + + # 转换为FFmpeg兼容的时间格式 + ffmpeg_start_time = start_time.replace(',', '.') + ffmpeg_end_time = end_time.replace(',', '.') + + # 生成输出文件名 + safe_start_time = start_time.replace(':', '-').replace(',', '-') + safe_end_time = end_time.replace(':', '-').replace(',', '-') + output_filename = f"ost1_vid_{safe_start_time}@{safe_end_time}.mp4" + output_path = os.path.join(output_dir, output_filename) + + # 构建FFmpeg命令 - 保持原声 + cmd = _build_ffmpeg_command_with_audio_control( + video_origin_path, output_path, ffmpeg_start_time, ffmpeg_end_time, + encoder_config, hwaccel_args, remove_audio=False + ) + + # 执行命令 + success = execute_ffmpeg_with_fallback( + cmd, timestamp, video_origin_path, output_path, + ffmpeg_start_time, ffmpeg_end_time + ) + + return output_path if success else None + + +def _process_mixed_segment( + video_origin_path: str, + script_item: Dict, + tts_map: Dict, + output_dir: str, + encoder_config: Dict, + hwaccel_args: List[str] +) -> Optional[str]: + """ + 处理OST=2的解说+原声混合片段 + - 根据TTS音频时长动态裁剪 + - 保持原声,确保视频时长等于TTS音频时长 + """ + _id = script_item["_id"] + timestamp = script_item["timestamp"] + + # 获取对应的TTS结果 + tts_item = tts_map.get(_id) + if not tts_item: + logger.error(f"未找到片段 {_id} 的TTS结果") + return None + + # 解析起始时间,使用TTS音频时长计算结束时间 + start_time, _ = parse_timestamp(timestamp) + duration = tts_item["duration"] + calculated_end_time = calculate_end_time(start_time, duration, extra_seconds=0) + + # 转换为FFmpeg兼容的时间格式 + ffmpeg_start_time = start_time.replace(',', '.') + ffmpeg_end_time = calculated_end_time.replace(',', '.') + + # 生成输出文件名 + safe_start_time = start_time.replace(':', '-').replace(',', '-') + safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-') + output_filename = f"ost2_vid_{safe_start_time}@{safe_end_time}.mp4" + output_path = os.path.join(output_dir, output_filename) + + # 构建FFmpeg命令 - 保持原声 + cmd = _build_ffmpeg_command_with_audio_control( + video_origin_path, output_path, ffmpeg_start_time, ffmpeg_end_time, + encoder_config, hwaccel_args, remove_audio=False + ) + + # 执行命令 + success = execute_ffmpeg_with_fallback( + cmd, timestamp, video_origin_path, output_path, + ffmpeg_start_time, ffmpeg_end_time + ) + + return output_path if success else None + + +def _build_ffmpeg_command_with_audio_control( + input_path: str, + output_path: str, + start_time: str, + end_time: str, + encoder_config: Dict[str, str], + hwaccel_args: List[str] = None, + remove_audio: bool = False +) -> List[str]: + """ + 构建支持音频控制的FFmpeg命令 + + Args: + input_path: 输入视频路径 + output_path: 输出视频路径 + start_time: 开始时间 + end_time: 结束时间 + encoder_config: 编码器配置 + hwaccel_args: 硬件加速参数 + remove_audio: 是否移除音频(OST=0时为True) + + Returns: + List[str]: ffmpeg命令列表 + """ + cmd = ["ffmpeg", "-y"] + + # 硬件加速设置(参考原有逻辑) + if encoder_config["video_codec"] == "h264_nvenc": + # 对于NVENC,不使用硬件解码以避免滤镜链问题 + pass + elif hwaccel_args: + cmd.extend(hwaccel_args) + + # 输入文件 + cmd.extend(["-i", input_path]) + + # 时间范围 + cmd.extend(["-ss", start_time, "-to", end_time]) + + # 视频编码器设置 + cmd.extend(["-c:v", encoder_config["video_codec"]]) + + # 音频处理 + if remove_audio: + # OST=0: 移除音频 + cmd.extend(["-an"]) # -an 表示不包含音频流 + logger.debug("OST=0: 移除音频流") + else: + # OST=1,2: 保持原声 + cmd.extend(["-c:a", encoder_config["audio_codec"]]) + cmd.extend(["-ar", "44100", "-ac", "2"]) + logger.debug("OST=1/2: 保持原声") + + # 像素格式 + cmd.extend(["-pix_fmt", encoder_config["pixel_format"]]) + + # 质量和预设参数(参考原有逻辑) + if encoder_config["video_codec"] == "h264_nvenc": + cmd.extend(["-preset", encoder_config["preset"]]) + cmd.extend(["-cq", encoder_config["quality_value"]]) + cmd.extend(["-profile:v", "main"]) + elif encoder_config["video_codec"] == "h264_amf": + cmd.extend(["-quality", encoder_config["preset"]]) + cmd.extend(["-qp_i", encoder_config["quality_value"]]) + elif encoder_config["video_codec"] == "h264_qsv": + cmd.extend(["-preset", encoder_config["preset"]]) + cmd.extend(["-global_quality", encoder_config["quality_value"]]) + elif encoder_config["video_codec"] == "h264_videotoolbox": + cmd.extend(["-profile:v", "high"]) + cmd.extend(["-b:v", encoder_config["quality_value"]]) + else: + # 软件编码器(libx264) + cmd.extend(["-preset", encoder_config["preset"]]) + cmd.extend(["-crf", encoder_config["quality_value"]]) + + # 优化参数 + cmd.extend(["-avoid_negative_ts", "make_zero"]) + cmd.extend(["-movflags", "+faststart"]) + + # 输出文件 + cmd.append(output_path) + + return cmd + + +def clip_video_unified( + video_origin_path: str, + script_list: List[Dict], + tts_results: List[Dict], + output_dir: Optional[str] = None, + task_id: Optional[str] = None +) -> Dict[str, str]: + """ + 基于OST类型的统一视频裁剪策略 - 消除双重裁剪问题 + + Args: + video_origin_path: 原始视频的路径 + script_list: 完整的脚本列表,包含所有片段信息 + tts_results: TTS结果列表,仅包含OST=0和OST=2的片段 + output_dir: 输出目录路径,默认为None时会自动生成 + task_id: 任务ID,用于生成唯一的输出目录,默认为None时会自动生成 + + Returns: + Dict[str, str]: 片段ID到裁剪后视频路径的映射 + """ + # 检查视频文件是否存在 + if not os.path.exists(video_origin_path): + raise FileNotFoundError(f"视频文件不存在: {video_origin_path}") + + # 如果未提供task_id,则根据输入生成一个唯一ID + if task_id is None: + content_for_hash = f"{video_origin_path}_{json.dumps(script_list)}" + task_id = hashlib.md5(content_for_hash.encode()).hexdigest() + + # 设置输出目录 + if output_dir is None: + output_dir = os.path.join( + os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), + "storage", "temp", "clip_video_unified", task_id + ) + + # 确保输出目录存在 + Path(output_dir).mkdir(parents=True, exist_ok=True) + + # 创建TTS结果的快速查找映射 + tts_map = {item['_id']: item for item in tts_results} + + # 获取硬件加速支持 + hwaccel_type = check_hardware_acceleration() + hwaccel_args = [] + + if hwaccel_type: + hwaccel_args = ffmpeg_utils.get_ffmpeg_hwaccel_args() + hwaccel_info = ffmpeg_utils.get_ffmpeg_hwaccel_info() + logger.info(f"🚀 使用硬件加速: {hwaccel_type} ({hwaccel_info.get('message', '')})") + else: + logger.info("🔧 使用软件编码") + + # 获取编码器配置 + encoder_config = get_safe_encoder_config(hwaccel_type) + logger.debug(f"编码器配置: {encoder_config}") + + # 统计信息 + total_clips = len(script_list) + result = {} + failed_clips = [] + success_count = 0 + + logger.info(f"📹 开始统一视频裁剪,总共{total_clips}个片段") + + for i, script_item in enumerate(script_list, 1): + _id = script_item.get("_id") + ost = script_item.get("OST", 0) + timestamp = script_item["timestamp"] + + logger.info(f"📹 [{i}/{total_clips}] 处理片段 ID:{_id}, OST:{ost}, 时间戳:{timestamp}") + + try: + if ost == 0: # 纯解说片段 + output_path = _process_narration_only_segment( + video_origin_path, script_item, tts_map, output_dir, + encoder_config, hwaccel_args + ) + elif ost == 1: # 纯原声片段 + output_path = _process_original_audio_segment( + video_origin_path, script_item, output_dir, + encoder_config, hwaccel_args + ) + elif ost == 2: # 解说+原声混合片段 + output_path = _process_mixed_segment( + video_origin_path, script_item, tts_map, output_dir, + encoder_config, hwaccel_args + ) + else: + logger.warning(f"未知的OST类型: {ost},跳过片段 {_id}") + continue + + if output_path and os.path.exists(output_path) and os.path.getsize(output_path) > 0: + result[_id] = output_path + success_count += 1 + logger.info(f"✅ [{i}/{total_clips}] 片段处理成功: OST={ost}, ID={_id}") + else: + failed_clips.append(f"ID:{_id}, OST:{ost}") + logger.error(f"❌ [{i}/{total_clips}] 片段处理失败: OST={ost}, ID={_id}") + + except Exception as e: + failed_clips.append(f"ID:{_id}, OST:{ost}") + logger.error(f"❌ [{i}/{total_clips}] 片段处理异常: OST={ost}, ID={_id}, 错误: {str(e)}") + + # 最终统计 + logger.info(f"📊 统一视频裁剪完成: 成功 {success_count}/{total_clips}, 失败 {len(failed_clips)}") + + # 检查是否有失败的片段 + if failed_clips: + logger.warning(f"⚠️ 以下片段处理失败: {failed_clips}") + if len(failed_clips) == total_clips: + raise RuntimeError("所有视频片段处理都失败了,请检查视频文件和ffmpeg配置") + elif len(failed_clips) > total_clips / 2: + logger.warning(f"⚠️ 超过一半的片段处理失败 ({len(failed_clips)}/{total_clips}),请检查硬件加速配置") + + if success_count > 0: + logger.info(f"🎉 统一视频裁剪任务完成! 输出目录: {output_dir}") + + return result + + def clip_video( video_origin_path: str, tts_result: List[Dict], diff --git a/app/services/task.py b/app/services/task.py index 3a81584..6150247 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -15,13 +15,19 @@ from app.services import state as sm from app.utils import utils -def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict): +def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict = None): """ - 后台任务(自动剪辑视频进行剪辑) + 后台任务(统一视频裁剪处理)- 优化版本 + + 实施基于OST类型的统一视频裁剪策略,消除双重裁剪问题: + - OST=0: 根据TTS音频时长动态裁剪,移除原声 + - OST=1: 严格按照脚本timestamp精确裁剪,保持原声 + - OST=2: 根据TTS音频时长动态裁剪,保持原声 + Args: task_id: 任务ID params: 视频参数 - subclip_path_videos: 视频片段路径 + subclip_path_videos: 视频片段路径(可选,仅作为备用方案) """ global merged_audio_path, merged_subtitle_path @@ -94,17 +100,26 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di # sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40) """ - 3. 裁剪视频 - 将超出音频长度的视频进行裁剪 + 3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略 """ - logger.info("\n\n## 3. 裁剪视频") - video_clip_result = clip_video.clip_video(params.video_origin_path, tts_results) - # 更新 list_script 中的时间戳 + logger.info("\n\n## 3. 统一视频裁剪(基于OST类型)") + + # 使用新的统一裁剪策略 + video_clip_result = clip_video.clip_video_unified( + video_origin_path=params.video_origin_path, + script_list=list_script, + tts_results=tts_results + ) + + # 更新 list_script 中的时间戳和路径信息 tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results} subclip_clip_result = { tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results } new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result) + logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段") + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60) """ @@ -139,8 +154,27 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4") logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}") - # 如果 new_script_list 中没有 video,则使用 subclip_path_videos 中的视频 - video_clips = [new_script['video'] if new_script.get('video') else subclip_path_videos.get(new_script.get('_id', '')) for new_script in new_script_list] + + # 使用统一裁剪后的视频片段 + video_clips = [] + for new_script in new_script_list: + video_path = new_script.get('video') + if video_path and os.path.exists(video_path): + video_clips.append(video_path) + else: + logger.warning(f"片段 {new_script.get('_id')} 的视频文件不存在或未生成: {video_path}") + # 如果统一裁剪失败,尝试使用备用方案(如果提供了subclip_path_videos) + if subclip_path_videos and new_script.get('_id') in subclip_path_videos: + backup_video = subclip_path_videos[new_script.get('_id')] + if os.path.exists(backup_video): + video_clips.append(backup_video) + logger.info(f"使用备用视频: {backup_video}") + else: + logger.error(f"备用视频也不存在: {backup_video}") + else: + logger.error(f"无法找到片段 {new_script.get('_id')} 的视频文件") + + logger.info(f"准备合并 {len(video_clips)} 个视频片段") merger_video.combine_clip_videos( output_video_path=combined_video_path, @@ -208,6 +242,199 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di return kwargs +def start_subclip_unified(task_id: str, params: VideoClipParams): + """ + 统一视频裁剪处理函数 - 完全基于OST类型的新实现 + + 这是优化后的版本,完全移除了对预裁剪视频的依赖, + 实现真正的统一裁剪策略。 + + Args: + task_id: 任务ID + params: 视频参数 + """ + global merged_audio_path, merged_subtitle_path + + logger.info(f"\n\n## 开始统一视频处理任务: {task_id}") + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0) + + """ + 1. 加载剪辑脚本 + """ + logger.info("\n\n## 1. 加载视频脚本") + video_script_path = path.join(params.video_clip_json_path) + + if path.exists(video_script_path): + try: + with open(video_script_path, "r", encoding="utf-8") as f: + list_script = json.load(f) + video_list = [i['narration'] for i in list_script] + video_ost = [i['OST'] for i in list_script] + time_list = [i['timestamp'] for i in list_script] + + video_script = " ".join(video_list) + logger.debug(f"解说完整脚本: \n{video_script}") + logger.debug(f"解说 OST 列表: \n{video_ost}") + logger.debug(f"解说时间戳列表: \n{time_list}") + except Exception as e: + logger.error(f"无法读取视频json脚本,请检查脚本格式是否正确") + raise ValueError("无法读取视频json脚本,请检查脚本格式是否正确") + else: + logger.error(f"video_script_path: {video_script_path}") + raise ValueError("解说脚本不存在!请检查配置是否正确。") + + """ + 2. 使用 TTS 生成音频素材 + """ + logger.info("\n\n## 2. 根据OST设置生成音频列表") + # 只为OST=0 or 2的判断生成音频, OST=0 仅保留解说 OST=2 保留解说和原声 + tts_segments = [ + segment for segment in list_script + if segment['OST'] in [0, 2] + ] + logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}") + + tts_results = voice.tts_multiple( + task_id=task_id, + list_script=tts_segments, # 只传入需要TTS的片段 + voice_name=params.voice_name, + voice_rate=params.voice_rate, + voice_pitch=params.voice_pitch, + ) + + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20) + + """ + 3. 统一视频裁剪 - 基于OST类型的差异化裁剪策略 + """ + logger.info("\n\n## 3. 统一视频裁剪(基于OST类型)") + + # 使用新的统一裁剪策略 + video_clip_result = clip_video.clip_video_unified( + video_origin_path=params.video_origin_path, + script_list=list_script, + tts_results=tts_results + ) + + # 更新 list_script 中的时间戳和路径信息 + tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results} + subclip_clip_result = { + tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results + } + new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result) + + logger.info(f"统一裁剪完成,处理了 {len(video_clip_result)} 个视频片段") + + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60) + + """ + 4. 合并音频和字幕 + """ + logger.info("\n\n## 4. 合并音频和字幕") + total_duration = sum([script["duration"] for script in new_script_list]) + if tts_segments: + try: + # 合并音频文件 + merged_audio_path = audio_merger.merge_audio_files( + task_id=task_id, + total_duration=total_duration, + list_script=new_script_list + ) + logger.info(f"音频文件合并成功->{merged_audio_path}") + # 合并字幕文件 + merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list) + logger.info(f"字幕文件合并成功->{merged_subtitle_path}") + except Exception as e: + logger.error(f"合并音频文件失败: {str(e)}") + else: + logger.warning("没有需要合并的音频/字幕") + merged_audio_path = "" + merged_subtitle_path = "" + + """ + 5. 合并视频 + """ + final_video_paths = [] + combined_video_paths = [] + + combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4") + logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}") + + # 使用统一裁剪后的视频片段 + video_clips = [] + for new_script in new_script_list: + video_path = new_script.get('video') + if video_path and os.path.exists(video_path): + video_clips.append(video_path) + else: + logger.error(f"片段 {new_script.get('_id')} 的视频文件不存在: {video_path}") + + logger.info(f"准备合并 {len(video_clips)} 个视频片段") + + merger_video.combine_clip_videos( + output_video_path=combined_video_path, + video_paths=video_clips, + video_ost_list=video_ost, + video_aspect=params.video_aspect, + threads=params.n_threads + ) + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80) + + """ + 6. 合并字幕/BGM/配音/视频 + """ + output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4") + logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {output_video_path}") + + bgm_path = utils.get_bgm_file() + + # 获取优化的音量配置 + optimized_volumes = get_recommended_volumes_for_content('mixed') + + # 应用用户设置和优化建议的组合 + final_tts_volume = params.tts_volume if hasattr(params, 'tts_volume') and params.tts_volume != 1.0 else optimized_volumes['tts_volume'] + final_original_volume = params.original_volume if hasattr(params, 'original_volume') and params.original_volume != 0.7 else optimized_volumes['original_volume'] + final_bgm_volume = params.bgm_volume if hasattr(params, 'bgm_volume') and params.bgm_volume != 0.3 else optimized_volumes['bgm_volume'] + + logger.info(f"音量配置 - TTS: {final_tts_volume}, 原声: {final_original_volume}, BGM: {final_bgm_volume}") + + # 调用示例 + options = { + 'voice_volume': final_tts_volume, + 'bgm_volume': final_bgm_volume, + 'original_audio_volume': final_original_volume, + 'keep_original_audio': True, + 'subtitle_enabled': params.subtitle_enabled, + 'subtitle_font': params.font_name, + 'subtitle_font_size': params.font_size, + 'subtitle_color': params.text_fore_color, + 'subtitle_bg_color': None, + 'subtitle_position': params.subtitle_position, + 'custom_position': params.custom_position, + 'threads': params.n_threads + } + generate_video.merge_materials( + video_path=combined_video_path, + audio_path=merged_audio_path, + subtitle_path=merged_subtitle_path, + bgm_path=bgm_path, + output_path=output_video_path, + options=options + ) + + final_video_paths.append(output_video_path) + combined_video_paths.append(combined_video_path) + + logger.success(f"统一处理任务 {task_id} 已完成, 生成 {len(final_video_paths)} 个视频.") + + kwargs = { + "videos": final_video_paths, + "combined_videos": combined_video_paths + } + sm.state.update_task(task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs) + return kwargs + + def validate_params(video_path, audio_path, output_file, params): """ 验证输入参数 diff --git a/app/utils/utils.py b/app/utils/utils.py index 1dbf7e3..d101dce 100644 --- a/app/utils/utils.py +++ b/app/utils/utils.py @@ -509,6 +509,12 @@ def clean_model_output(output): def cut_video(params, progress_callback=None): + """ + 旧的视频裁剪函数 - 已弃用 + + 注意:此函数已被统一裁剪策略取代,不再推荐使用。 + 新的实现请使用 task.start_subclip_unified() 函数。 + """ try: task_id = str(uuid4()) st.session_state['task_id'] = task_id diff --git a/webui.py b/webui.py index 9d82838..56e2c39 100644 --- a/webui.py +++ b/webui.py @@ -106,8 +106,7 @@ def init_global_state(): st.session_state['video_plot'] = '' if 'ui_language' not in st.session_state: st.session_state['ui_language'] = config.ui.get("language", utils.get_system_locale()) - if 'subclip_videos' not in st.session_state: - st.session_state['subclip_videos'] = {} + # 移除subclip_videos初始化 - 现在使用统一裁剪策略 def tr(key): @@ -136,11 +135,9 @@ def render_generate_button(): logger.add(log_received) config.save_config() - task_id = st.session_state.get('task_id') - if not task_id: - st.error(tr("请先裁剪视频")) - return + # 移除task_id检查 - 现在使用统一裁剪策略,不再需要预裁剪 + # 直接检查必要的文件是否存在 if not st.session_state.get('video_clip_json_path'): st.error(tr("脚本文件不能为空")) return @@ -168,10 +165,14 @@ def render_generate_button(): # 创建参数对象 params = VideoClipParams(**all_params) - result = tm.start_subclip( + # 使用新的统一裁剪策略,不再需要预裁剪的subclip_videos + # 生成一个新的task_id用于本次处理 + import uuid + task_id = str(uuid.uuid4()) + + result = tm.start_subclip_unified( task_id=task_id, - params=params, - subclip_path_videos=st.session_state['subclip_videos'] + params=params ) video_files = result.get("videos", []) diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py index b452d08..0caa122 100644 --- a/webui/components/script_settings.py +++ b/webui/components/script_settings.py @@ -336,8 +336,8 @@ def render_script_buttons(tr, params): height=180 ) - # 操作按钮行 - button_cols = st.columns(3) + # 操作按钮行 - 移除裁剪视频按钮,使用统一裁剪策略 + button_cols = st.columns(2) # 改为2列布局 with button_cols[0]: if st.button(tr("Check Format"), key="check_format", use_container_width=True): check_script_format(tr, video_clip_json_details) @@ -346,11 +346,6 @@ def render_script_buttons(tr, params): if st.button(tr("Save Script"), key="save_script", use_container_width=True): save_script(tr, video_clip_json_details) - with button_cols[2]: - script_valid = st.session_state.get('script_format_valid', False) - if st.button(tr("Crop Video"), key="crop_video", disabled=not script_valid, use_container_width=True): - crop_video(tr, params) - def check_script_format(tr, script_content): """检查脚本格式""" @@ -414,26 +409,7 @@ def save_script(tr, video_clip_json_details): st.stop() -def crop_video(tr, params): - """裁剪视频""" - progress_bar = st.progress(0) - status_text = st.empty() - - def update_progress(progress): - progress_bar.progress(progress) - status_text.text(f"剪辑进度: {progress}%") - - try: - utils.cut_video(params, update_progress) - time.sleep(0.5) - progress_bar.progress(100) - st.success("视频剪辑成功完成!") - except Exception as e: - st.error(f"剪辑过程中发生错误: {str(e)}") - finally: - time.sleep(1) - progress_bar.empty() - status_text.empty() +# crop_video函数已移除 - 现在使用统一裁剪策略,不再需要预裁剪步骤 def get_script_params(): diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index aad77e8..61c0e11 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -11,7 +11,6 @@ "Video Theme": "视频主题", "Generation Prompt": "自定义提示词", "Save Script": "保存脚本", - "Crop Video": "裁剪视频", "Video File": "视频文件(:blue[1️⃣支持上传视频文件(限制2G) 2️⃣大文件建议直接导入 ./resource/videos 目录])", "Plot Description": "剧情描述 (:blue[可从 https://www.tvmao.com/ 获取])", "Generate Video Keywords": "点击使用AI根据**文案**生成【视频关键】", From 184286e5e0dbd968ec4bd2cc1aaa428448367e3f Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 17:06:55 +0800 Subject: [PATCH 03/10] =?UTF-8?q?feat(script):=20=E5=90=88=E5=B9=B6?= =?UTF-8?q?=E8=84=9A=E6=9C=AC=E4=BF=9D=E5=AD=98=E4=B8=8E=E6=A0=BC=E5=BC=8F?= =?UTF-8?q?=E9=AA=8C=E8=AF=81=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 重构脚本保存流程,将格式验证整合到保存操作中。新增详细的格式验证错误提示和正确格式示例展示。增强脚本格式检查功能,包括字段类型、格式和必填项验证。 --- app/utils/check_script.py | 85 +++++++++++++++++++---------- webui/components/script_settings.py | 73 ++++++++++++++++--------- 2 files changed, 103 insertions(+), 55 deletions(-) diff --git a/app/utils/check_script.py b/app/utils/check_script.py index 00e6c0f..9c745e6 100644 --- a/app/utils/check_script.py +++ b/app/utils/check_script.py @@ -1,4 +1,5 @@ import json +import re from typing import Dict, Any def check_format(script_content: str) -> Dict[str, Any]: @@ -6,76 +7,104 @@ def check_format(script_content: str) -> Dict[str, Any]: Args: script_content: 脚本内容 Returns: - Dict: {'success': bool, 'message': str} + Dict: {'success': bool, 'message': str, 'details': str} """ try: # 检查是否为有效的JSON data = json.loads(script_content) - + # 检查是否为列表 if not isinstance(data, list): return { 'success': False, - 'message': '脚本必须是JSON数组格式' + 'message': '脚本必须是JSON数组格式', + 'details': '正确格式应该是: [{"_id": 1, "timestamp": "...", ...}, ...]' } - + + # 检查数组不能为空 + if len(data) == 0: + return { + 'success': False, + 'message': '脚本数组不能为空', + 'details': '至少需要包含一个脚本片段' + } + # 检查每个片段 for i, clip in enumerate(data): + # 检查是否为对象类型 + if not isinstance(clip, dict): + return { + 'success': False, + 'message': f'第{i+1}个元素必须是对象类型', + 'details': f'当前类型: {type(clip).__name__}' + } + # 检查必需字段 - required_fields = ['narration', 'picture', 'timestamp'] + required_fields = ['_id', 'timestamp', 'picture', 'narration', 'OST'] for field in required_fields: if field not in clip: return { 'success': False, - 'message': f'第{i+1}个片段缺少必需字段: {field}' + 'message': f'第{i+1}个片段缺少必需字段: {field}', + 'details': f'必需字段: {", ".join(required_fields)}' } - - # 检查字段类型 - if not isinstance(clip['narration'], str): + + # 验证 _id 字段 + if not isinstance(clip['_id'], int) or clip['_id'] <= 0: return { 'success': False, - 'message': f'第{i+1}个片段的narration必须是字符串' + 'message': f'第{i+1}个片段的_id必须是正整数', + 'details': f'当前值: {clip["_id"]} (类型: {type(clip["_id"]).__name__})' } - if not isinstance(clip['picture'], str): + + # 验证 timestamp 字段格式 + timestamp_pattern = r'^\d{2}:\d{2}:\d{2},\d{3}-\d{2}:\d{2}:\d{2},\d{3}$' + if not isinstance(clip['timestamp'], str) or not re.match(timestamp_pattern, clip['timestamp']): return { 'success': False, - 'message': f'第{i+1}个片段的picture必须是字符串' + 'message': f'第{i+1}个片段的timestamp格式错误', + 'details': f'正确格式: "HH:MM:SS,mmm-HH:MM:SS,mmm",示例: "00:00:00,600-00:00:07,559"' } - if not isinstance(clip['timestamp'], str): + + # 验证 picture 字段 + if not isinstance(clip['picture'], str) or not clip['picture'].strip(): return { 'success': False, - 'message': f'第{i+1}个片段的timestamp必须是字符串' + 'message': f'第{i+1}个片段的picture必须是非空字符串', + 'details': f'当前值: {clip.get("picture", "未定义")}' } - - # 检查字段内容不能为空 - if not clip['narration'].strip(): + + # 验证 narration 字段 + if not isinstance(clip['narration'], str) or not clip['narration'].strip(): return { 'success': False, - 'message': f'第{i+1}个片段的narration不能为空' + 'message': f'第{i+1}个片段的narration必须是非空字符串', + 'details': f'当前值: {clip.get("narration", "未定义")}' } - if not clip['picture'].strip(): + + # 验证 OST 字段 + if not isinstance(clip['OST'], int): return { 'success': False, - 'message': f'第{i+1}个片段的picture不能为空' - } - if not clip['timestamp'].strip(): - return { - 'success': False, - 'message': f'第{i+1}个片段的timestamp不能为空' + 'message': f'第{i+1}个片段的OST必须是整数', + 'details': f'当前值: {clip["OST"]} (类型: {type(clip["OST"]).__name__}),常用值: 0, 1, 2' } return { 'success': True, - 'message': '脚本格式检查通过' + 'message': '脚本格式检查通过', + 'details': f'共验证 {len(data)} 个脚本片段,格式正确' } except json.JSONDecodeError as e: return { 'success': False, - 'message': f'JSON格式错误: {str(e)}' + 'message': f'JSON格式错误: {str(e)}', + 'details': '请检查JSON语法,确保所有括号、引号、逗号正确' } except Exception as e: return { 'success': False, - 'message': f'检查过程中发生错误: {str(e)}' + 'message': f'检查过程中发生错误: {str(e)}', + 'details': '请联系技术支持' } diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py index 0caa122..42ff794 100644 --- a/webui/components/script_settings.py +++ b/webui/components/script_settings.py @@ -336,30 +336,9 @@ def render_script_buttons(tr, params): height=180 ) - # 操作按钮行 - 移除裁剪视频按钮,使用统一裁剪策略 - button_cols = st.columns(2) # 改为2列布局 - with button_cols[0]: - if st.button(tr("Check Format"), key="check_format", use_container_width=True): - check_script_format(tr, video_clip_json_details) - - with button_cols[1]: - if st.button(tr("Save Script"), key="save_script", use_container_width=True): - save_script(tr, video_clip_json_details) - - -def check_script_format(tr, script_content): - """检查脚本格式""" - try: - result = check_script.check_format(script_content) - if result.get('success'): - st.success(tr("Script format check passed")) - st.session_state['script_format_valid'] = True - else: - st.error(f"{tr('Script format check failed')}: {result.get('message')}") - st.session_state['script_format_valid'] = False - except Exception as e: - st.error(f"{tr('Script format check error')}: {str(e)}") - st.session_state['script_format_valid'] = False + # 操作按钮行 - 合并格式检查和保存功能 + if st.button(tr("Save Script"), key="save_script", use_container_width=True): + save_script_with_validation(tr, video_clip_json_details) def load_script(tr, script_path): @@ -376,12 +355,52 @@ def load_script(tr, script_path): st.error(f"{tr('Failed to load script')}: {str(e)}") -def save_script(tr, video_clip_json_details): - """保存视频脚本""" +def save_script_with_validation(tr, video_clip_json_details): + """保存视频脚本(包含格式验证)""" if not video_clip_json_details: st.error(tr("请输入视频脚本")) st.stop() + # 第一步:格式验证 + with st.spinner("正在验证脚本格式..."): + try: + result = check_script.check_format(video_clip_json_details) + if not result.get('success'): + # 格式验证失败,显示详细错误信息 + error_message = result.get('message', '未知错误') + error_details = result.get('details', '') + + st.error(f"**脚本格式验证失败**") + st.error(f"**错误信息:** {error_message}") + if error_details: + st.error(f"**详细说明:** {error_details}") + + # 显示正确格式示例 + st.info("**正确的脚本格式示例:**") + example_script = [ + { + "_id": 1, + "timestamp": "00:00:00,600-00:00:07,559", + "picture": "工地上,蔡晓艳奋力救人,场面混乱", + "narration": "灾后重建,工地上险象环生!泼辣女工蔡晓艳挺身而出,救人第一!", + "OST": 0 + }, + { + "_id": 2, + "timestamp": "00:00:08,240-00:00:12,359", + "picture": "领导视察,蔡晓艳不屑一顾", + "narration": "播放原片4", + "OST": 1 + } + ] + st.code(json.dumps(example_script, ensure_ascii=False, indent=2), language='json') + st.stop() + + except Exception as e: + st.error(f"格式验证过程中发生错误: {str(e)}") + st.stop() + + # 第二步:保存脚本 with st.spinner(tr("Save Script")): script_dir = utils.script_dir() timestamp = time.strftime("%Y-%m%d-%H%M%S") @@ -398,7 +417,7 @@ def save_script(tr, video_clip_json_details): config.app["video_clip_json_path"] = save_path # 显示成功消息 - st.success(tr("Script saved successfully")) + st.success("✅ 脚本格式验证通过,保存成功!") # 强制重新加载页面更新选择框 time.sleep(0.5) # 给一点时间让用户看到成功消息 From e59fd6030c53cbc5607eff64156f1040b74522cd Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 17:12:07 +0800 Subject: [PATCH 04/10] =?UTF-8?q?refactor:=20=E7=A7=BB=E9=99=A4=E8=A7=86?= =?UTF-8?q?=E9=A2=91=E5=AD=97=E5=B9=95=E5=90=88=E5=B9=B6=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=E5=8F=8A=E7=9B=B8=E5=85=B3=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 清理不再使用的视频字幕合并功能,包括删除合并设置组件、合并工具函数和相关的国际化文本 --- webui.py | 4 +- webui/components/merge_settings.py | 296 ----------------------------- webui/i18n/zh.json | 28 --- webui/utils/merge_video.py | 115 ----------- 4 files changed, 1 insertion(+), 442 deletions(-) delete mode 100644 webui/components/merge_settings.py delete mode 100644 webui/utils/merge_video.py diff --git a/webui.py b/webui.py index 56e2c39..b649e9d 100644 --- a/webui.py +++ b/webui.py @@ -4,7 +4,7 @@ import sys from loguru import logger from app.config import config from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \ - review_settings, merge_settings, system_settings + review_settings, system_settings # from webui.utils import cache, file_utils from app.utils import utils from app.utils import ffmpeg_utils @@ -221,8 +221,6 @@ def main(): # 首先渲染不依赖PyTorch的UI部分 # 渲染基础设置面板 basic_settings.render_basic_settings(tr) - # 渲染合并设置 - merge_settings.render_merge_settings(tr) # 渲染主面板 panel = st.columns(3) diff --git a/webui/components/merge_settings.py b/webui/components/merge_settings.py deleted file mode 100644 index fe35f7a..0000000 --- a/webui/components/merge_settings.py +++ /dev/null @@ -1,296 +0,0 @@ -import os -import time -import streamlit as st -from loguru import logger -from typing import List, Dict -from dataclasses import dataclass -from streamlit.runtime.uploaded_file_manager import UploadedFile - -from webui.utils.merge_video import merge_videos_and_subtitles -from app.utils.utils import video_dir, srt_dir - -# 定义临时目录路径 -TEMP_MERGE_DIR = os.path.join("storage", "temp", "merge") - -# 确保临时目录存在 -os.makedirs(TEMP_MERGE_DIR, exist_ok=True) - - -@dataclass -class VideoSubtitlePair: - video_file: UploadedFile | None - subtitle_file: str | None - base_name: str - order: int = 0 - - -def save_uploaded_file(uploaded_file: UploadedFile, target_dir: str) -> str: - """Save uploaded file to target directory and return the file path""" - file_path = os.path.join(target_dir, uploaded_file.name) - # 如果文件已存在,先删除它 - if os.path.exists(file_path): - os.remove(file_path) - with open(file_path, "wb") as f: - f.write(uploaded_file.getvalue()) - return file_path - - -def clean_temp_dir(): - """清空临时目录""" - if os.path.exists(TEMP_MERGE_DIR): - for file in os.listdir(TEMP_MERGE_DIR): - file_path = os.path.join(TEMP_MERGE_DIR, file) - try: - if os.path.isfile(file_path): - os.unlink(file_path) - except Exception as e: - logger.error(f"清理临时文件失败: {str(e)}") - - -def group_files(files: List[UploadedFile]) -> Dict[str, VideoSubtitlePair]: - """Group uploaded files by their base names""" - pairs = {} - order_counter = 0 - - # 首先处理所有视频文件 - for file in files: - base_name = os.path.splitext(file.name)[0] - ext = os.path.splitext(file.name)[1].lower() - - if ext == ".mp4": - if base_name not in pairs: - pairs[base_name] = VideoSubtitlePair(None, None, base_name, order_counter) - order_counter += 1 - pairs[base_name].video_file = file - # 保存视频文件到临时目录 - video_path = save_uploaded_file(file, TEMP_MERGE_DIR) - - # 然后处理所有字幕文件 - for file in files: - base_name = os.path.splitext(file.name)[0] - ext = os.path.splitext(file.name)[1].lower() - - if ext == ".srt": - # 即使没有对应视频也保存字幕文件 - subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt") - save_uploaded_file(file, TEMP_MERGE_DIR) - - if base_name in pairs: # 如果有对应的视频 - pairs[base_name].subtitle_file = subtitle_path - - return pairs - - -def render_merge_settings(tr): - """Render the merge settings section""" - with st.expander(tr("Video Subtitle Merge"), expanded=False): - # 上传文件区域 - uploaded_files = st.file_uploader( - tr("Upload Video and Subtitle Files"), - type=["mp4", "srt"], - accept_multiple_files=True, - key="merge_files" - ) - - if uploaded_files: - all_pairs = group_files(uploaded_files) - - if all_pairs: - st.write(tr("All Uploaded Files")) - - # 初始化或更新session state中的排序信息 - if 'file_orders' not in st.session_state: - st.session_state.file_orders = { - name: pair.order for name, pair in all_pairs.items() - } - st.session_state.needs_reorder = False - - # 确保所有新文件都有排序值 - for name, pair in all_pairs.items(): - if name not in st.session_state.file_orders: - st.session_state.file_orders[name] = pair.order - - # 移除不存在的文件的排序值 - st.session_state.file_orders = { - k: v for k, v in st.session_state.file_orders.items() - if k in all_pairs - } - - # 按照排序值对文件对进行排序 - sorted_pairs = sorted( - all_pairs.items(), - key=lambda x: st.session_state.file_orders[x[0]] - ) - - # 计算需要多少行来显示所有视频(每行5个) - num_pairs = len(sorted_pairs) - num_rows = (num_pairs + 4) // 5 # 向上取整,每行5个 - - # 遍历每一行 - for row in range(num_rows): - # 创建5列 - cols = st.columns(5) - - # 在这一行中填充视频(最多5个) - for col_idx in range(5): - pair_idx = row * 5 + col_idx - if pair_idx < num_pairs: - base_name, pair = sorted_pairs[pair_idx] - with cols[col_idx]: - st.caption(base_name) - - # 显示视频预览(如果存在) - video_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4") - if os.path.exists(video_path): - st.video(video_path) - else: - st.warning(tr("Missing Video")) - - # 显示字幕预览(如果存在) - subtitle_path = os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt") - if os.path.exists(subtitle_path): - with open(subtitle_path, 'r', encoding='utf-8') as f: - subtitle_content = f.read() - st.markdown(tr("Subtitle Preview")) - st.text_area( - "Subtitle Content", - value=subtitle_content, - height=100, # 减高度以适应5列布局 - label_visibility="collapsed", - key=f"subtitle_preview_{base_name}" - ) - else: - st.warning(tr("Missing Subtitle")) - # 如果有视频但没有字幕,显示一键转录按钮 - # if os.path.exists(video_path): - # if st.button(tr("One-Click Transcribe"), key=f"transcribe_{base_name}"): - # with st.spinner(tr("Transcribing...")): - # try: - # # 生成字幕文件 - # result = extract_audio_and_create_subtitle(video_path, subtitle_path) - # if result: - # # 读取生成的字幕文件内容并显示预览 - # with open(subtitle_path, 'r', encoding='utf-8') as f: - # subtitle_content = f.read() - # st.markdown(tr("Subtitle Preview")) - # st.text_area( - # "Subtitle Content", - # value=subtitle_content, - # height=150, - # label_visibility="collapsed", - # key=f"subtitle_preview_transcribed_{base_name}" - # ) - # st.success(tr("Transcription Complete!")) - # # 更新pair的字幕文件路径 - # pair.subtitle_file = subtitle_path - # else: - # st.error(tr("Transcription Failed. Please try again.")) - # except Exception as e: - # error_message = str(e) - # logger.error(traceback.format_exc()) - # if "rate limit exceeded" in error_message.lower(): - # st.error(tr("API rate limit exceeded. Please wait about an hour and try again.")) - # elif "resource_exhausted" in error_message.lower(): - # st.error(tr("Resources exhausted. Please try again later.")) - # else: - # st.error(f"{tr('Transcription Failed')}: {str(e)}") - - # 排序输入框 - order = st.number_input( - tr("Order"), - min_value=0, - value=st.session_state.file_orders[base_name], - key=f"order_{base_name}", - on_change=lambda: setattr(st.session_state, 'needs_reorder', True) - ) - if order != st.session_state.file_orders[base_name]: - st.session_state.file_orders[base_name] = order - st.session_state.needs_reorder = True - - # 如果需要重新排序,重新加载页面 - if st.session_state.needs_reorder: - st.session_state.needs_reorder = False - st.rerun() - - # 找出有完整视频和字幕的文件对 - complete_pairs = { - k: v for k, v in all_pairs.items() - if os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.mp4")) and - os.path.exists(os.path.join(TEMP_MERGE_DIR, f"{k}.srt")) - } - - # 合并按钮和结果显示 - cols = st.columns([1, 2, 1]) - with cols[0]: - st.write(f"{tr('Mergeable Files')}: {len(complete_pairs)}") - - merge_videos_result = None - - with cols[1]: - if st.button(tr("Merge All Files"), type="primary", use_container_width=True): - try: - # 获取排序后的完整文件对 - sorted_complete_pairs = sorted( - [(k, v) for k, v in complete_pairs.items()], - key=lambda x: st.session_state.file_orders[x[0]] - ) - - video_paths = [] - subtitle_paths = [] - for base_name, _ in sorted_complete_pairs: - video_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.mp4")) - subtitle_paths.append(os.path.join(TEMP_MERGE_DIR, f"{base_name}.srt")) - - # 获取输出文件路径 - output_video = os.path.join(video_dir(), f"merged_video_{time.strftime('%M%S')}.mp4") - output_subtitle = os.path.join(srt_dir(), f"merged_subtitle_{time.strftime('%M%S')}.srt") - - with st.spinner(tr("Merging files...")): - # 合并文件 - merge_videos_and_subtitles( - video_paths, - subtitle_paths, - output_video, - output_subtitle - ) - - success = True - error_msg = "" - - # 检查输出文件是否成功生成 - if not os.path.exists(output_video): - success = False - error_msg += tr("Failed to generate merged video. ") - if not os.path.exists(output_subtitle): - success = False - error_msg += tr("Failed to generate merged subtitle. ") - - if success: - # 显示成功消息 - st.success(tr("Merge completed!")) - merge_videos_result = (output_video, output_subtitle) - # 清理临时目录 - clean_temp_dir() - else: - st.error(error_msg) - - except Exception as e: - error_message = str(e) - if "moviepy" in error_message.lower(): - st.error(tr("Error processing video files. Please check if the videos are valid MP4 files.")) - # elif "pysrt" in error_message.lower(): - # st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files.")) - else: - st.error(f"{tr('Error during merge')}: {error_message}") - - # 合并结果预览放在合并按钮下方 - if merge_videos_result: - st.markdown(f"

{tr('Merge Result Preview')}

", unsafe_allow_html=True) - # 使用列布局使视频居中 - col1, col2, col3 = st.columns([1,2,1]) - with col2: - st.video(merge_videos_result[0]) - st.code(f"{tr('Video Path')}: {merge_videos_result[0]}") - st.code(f"{tr('Subtitle Path')}: {merge_videos_result[1]}") - else: - st.warning(tr("No Files Found")) diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index 61c0e11..d137ccf 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -136,31 +136,6 @@ "Script Uploaded Successfully": "脚本上传成功", "Invalid JSON format": "无效的JSON格式", "Upload failed": "上传失败", - "Video Subtitle Merge": "**合并视频与字幕**", - "Upload Video and Subtitle Files": "上传视频和字幕文件", - "Matched File Pairs": "已匹配的文件对", - "Merge All Files": "合并所有文件", - "Merge Function Not Implemented": "合并功能待实现", - "No Matched Pairs Found": "未找到匹配的文件对", - "Missing Subtitle": "缺少对应的字幕文件, 请使用其他软件完成字幕转录,比如剪映等", - "Missing Video": "缺少对应的视频文件", - "All Uploaded Files": "所有上传的文件", - "Order": "排序序号", - "Reorder": "重新排序", - "Merging files...": "正在合并文件...", - "Merge completed!": "合并完成!", - "Download Merged Video": "下载合并后的视频", - "Download Merged Subtitle": "下载合并后的字幕", - "Error during merge": "合并过程中出错", - "Failed to generate merged video.": "生成合并视频失败。", - "Failed to generate merged subtitle.": "生成合并字幕失败。", - "Error reading merged video file": "读取合并后的视频文件时出错", - "Error reading merged subtitle file": "读取合并后的字幕文件时出错", - "Error processing video files. Please check if the videos are valid MP4 files.": "处理视频文件时出错。请检查视频是否为有效的MP4文件。", - "Error processing subtitle files. Please check if the subtitles are valid SRT files.": "处理字幕文件时出错。请检查字幕是否为有效的SRT文件。", - "Preview Merged Video": "预览合并后的视频", - "Video Path": "视频路径", - "Subtitle Path": "字幕路径", "Enable Proxy": "启用代理", "QwenVL model is available": "QwenVL 模型可用", "QwenVL model is not available": "QwenVL 模型不可用", @@ -183,9 +158,6 @@ "API rate limit exceeded. Please wait about an hour and try again.": "API 调用次数已达到限制,请等待约一小时后再试。", "Resources exhausted. Please try again later.": "资源已耗尽,请稍后再试。", "Transcription Failed": "转录失败", - "Mergeable Files": "可合并文件数", - "Subtitle Content": "字幕内容", - "Merge Result Preview": "合并结果预览", "Short Generate": "短剧混剪 (高燃剪辑)", "Generate Short Video Script": "AI生成短剧混剪脚本", "Adjust the volume of the original audio": "调整原始音频的音量", diff --git a/webui/utils/merge_video.py b/webui/utils/merge_video.py deleted file mode 100644 index 9d21de3..0000000 --- a/webui/utils/merge_video.py +++ /dev/null @@ -1,115 +0,0 @@ -""" -合并视频和字幕文件 -""" -import os -import pysrt -from moviepy import VideoFileClip, concatenate_videoclips - - -def get_video_duration(video_path): - """获取视频时长(秒)""" - video = VideoFileClip(video_path) - duration = video.duration - video.close() - return duration - - -def adjust_subtitle_timing(subtitle_path, time_offset): - """调整字幕时间戳""" - subs = pysrt.open(subtitle_path) - - # 为每个字幕项添加时间偏移 - for sub in subs: - sub.start.hours += int(time_offset / 3600) - sub.start.minutes += int((time_offset % 3600) / 60) - sub.start.seconds += int(time_offset % 60) - sub.start.milliseconds += int((time_offset * 1000) % 1000) - - sub.end.hours += int(time_offset / 3600) - sub.end.minutes += int((time_offset % 3600) / 60) - sub.end.seconds += int(time_offset % 60) - sub.end.milliseconds += int((time_offset * 1000) % 1000) - - return subs - - -def merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path): - """合并视频和字幕文件""" - if len(video_paths) != len(subtitle_paths): - raise ValueError("视频文件数量与字幕文件数量不匹配") - - # 1. 合并视频 - video_clips = [] - accumulated_duration = 0 - merged_subs = pysrt.SubRipFile() - - try: - # 处理所有视频和字幕 - for i, (video_path, subtitle_path) in enumerate(zip(video_paths, subtitle_paths)): - # 添加视频 - print(f"处理视频 {i + 1}/{len(video_paths)}: {video_path}") - video_clip = VideoFileClip(video_path) - video_clips.append(video_clip) - - # 处理字幕 - print(f"处理字幕 {i + 1}/{len(subtitle_paths)}: {subtitle_path}") - if i == 0: - # 第一个字幕文件直接读取 - current_subs = pysrt.open(subtitle_path) - else: - # 后续字幕文件需要调整时间戳 - current_subs = adjust_subtitle_timing(subtitle_path, accumulated_duration) - - # 合并字幕 - merged_subs.extend(current_subs) - - # 更新累计时长 - accumulated_duration += video_clip.duration - - # 判断视频是否存在,若已经存在不重复合并 - if not os.path.exists(output_video_path): - print("合并视频中...") - final_video = concatenate_videoclips(video_clips) - - # 保存合并后的视频 - print("保存合并后的视频...") - final_video.write_videofile(output_video_path, audio_codec='aac') - - # 保存合并后的字幕 - print("保存合并后的字幕...") - merged_subs.save(output_subtitle_path, encoding='utf-8') - - print("合并完成") - - finally: - # 清理资源 - for clip in video_clips: - clip.close() - - -def main(): - # 示例用法 - video_paths = [ - "temp/1.mp4", - "temp/2.mp4", - "temp/3.mp4", - "temp/4.mp4", - "temp/5.mp4", - ] - - subtitle_paths = [ - "temp/1.srt", - "temp/2.srt", - "temp/3.srt", - "temp/4.srt", - "temp/5.srt", - ] - - output_video_path = "temp/merged_video.mp4" - output_subtitle_path = "temp/merged_subtitle.srt" - - merge_videos_and_subtitles(video_paths, subtitle_paths, output_video_path, output_subtitle_path) - - -if __name__ == "__main__": - main() From 062d317261bbfa9fdcba42a864c8f5872ddb6358 Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 18:45:33 +0800 Subject: [PATCH 05/10] =?UTF-8?q?feat(tts):=20=E6=B7=BB=E5=8A=A0=E5=A4=9A?= =?UTF-8?q?=E5=BC=95=E6=93=8ETTS=E6=94=AF=E6=8C=81=E5=B9=B6=E9=87=8D?= =?UTF-8?q?=E6=9E=84=E8=AF=AD=E9=9F=B3=E8=AE=BE=E7=BD=AE=E7=95=8C=E9=9D=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增Azure Speech Services和Edge TTS引擎支持 - 重构语音设置界面,支持不同引擎的独立配置 - 添加引擎选择器和详细说明 - 更新requirements.txt添加azure-cognitiveservices-speech依赖 - 改进音色名称验证逻辑 --- app/services/voice.py | 51 ++- config.example.toml | 16 + requirements.txt | 2 +- webui/components/audio_settings.py | 516 +++++++++++++++++++++++++---- 4 files changed, 507 insertions(+), 78 deletions(-) diff --git a/app/services/voice.py b/app/services/voice.py index d45db75..76a7f88 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -1058,6 +1058,27 @@ def is_azure_v2_voice(voice_name: str): return "" +def should_use_azure_speech_services(voice_name: str) -> bool: + """判断音色是否应该使用Azure Speech Services""" + if not voice_name or is_soulvoice_voice(voice_name): + return False + + voice_name = voice_name.strip() + + # 如果是带-V2后缀的,肯定是Azure Speech Services + if voice_name.endswith("-V2"): + return True + + # 检查是否为Azure官方音色格式 (如: zh-CN-YunzeNeural) + # Azure音色通常格式为: [语言]-[地区]-[名称]Neural + import re + pattern = r'^[a-z]{2}-[A-Z]{2}-\w+Neural$' + if re.match(pattern, voice_name): + return True + + return False + + def tts( text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str ) -> Union[SubMaker, None]: @@ -1065,11 +1086,11 @@ def tts( if is_soulvoice_voice(voice_name): return soulvoice_tts(text, voice_name, voice_file, speed=voice_rate) - # 检查是否为 Azure V2 引擎 - if is_azure_v2_voice(voice_name): + # 检查是否应该使用 Azure Speech Services + if should_use_azure_speech_services(voice_name): return azure_tts_v2(text, voice_name, voice_file) - # 默认使用 Azure V1 引擎 + # 默认使用 Edge TTS (Azure V1) return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file) @@ -1140,12 +1161,22 @@ def azure_tts_v1( def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]: - voice_name = is_azure_v2_voice(voice_name) - if not voice_name: - logger.error(f"invalid voice name: {voice_name}") - raise ValueError(f"invalid voice name: {voice_name}") + # 直接使用官方音色名称,不需要V2后缀验证 + # Azure Speech Services 的音色名称如: zh-CN-YunzeNeural, en-US-AvaMultilingualNeural + processed_voice_name = voice_name.strip() + if not processed_voice_name: + logger.error(f"invalid voice name: {voice_name} (empty)") + raise ValueError(f"invalid voice name: {voice_name} (empty)") text = text.strip() + # 检查Azure Speech SDK是否可用 + try: + import azure.cognitiveservices.speech as speechsdk + except ImportError as e: + logger.error("Azure Speech SDK 未安装。请运行: pip install azure-cognitiveservices-speech") + logger.error("或者使用 Edge TTS 引擎作为替代方案") + return None + def _format_duration_to_offset(duration) -> int: if isinstance(duration, str): time_obj = datetime.strptime(duration, "%H:%M:%S.%f") @@ -1164,9 +1195,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, for i in range(3): try: - logger.info(f"start, voice name: {voice_name}, try: {i + 1}") - - import azure.cognitiveservices.speech as speechsdk + logger.info(f"start, voice name: {processed_voice_name}, try: {i + 1}") sub_maker = SubMaker() @@ -1185,7 +1214,7 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, speech_config = speechsdk.SpeechConfig( subscription=speech_key, region=service_region ) - speech_config.speech_synthesis_voice_name = voice_name + speech_config.speech_synthesis_voice_name = processed_voice_name # speech_config.set_property(property_id=speechsdk.PropertyId.SpeechServiceResponse_RequestSentenceBoundary, # value='true') speech_config.set_property( diff --git a/config.example.toml b/config.example.toml index ddf529a..877b71b 100644 --- a/config.example.toml +++ b/config.example.toml @@ -92,6 +92,22 @@ # 默认模型(可选) model = "FunAudioLLM/CosyVoice2-0.5B" +[ui] + # TTS引擎选择 (edge_tts, azure_speech, soulvoice) + tts_engine = "edge_tts" + + # Edge TTS 配置 + edge_voice_name = "zh-CN-XiaoyiNeural-Female" + edge_volume = 80 + edge_rate = 1.0 + edge_pitch = 0 + + # Azure Speech Services 配置 + azure_voice_name = "zh-CN-XiaoyiNeural-Female" + azure_volume = 80 + azure_rate = 1.0 + azure_pitch = 0 + [proxy] # clash 默认地址:http://127.0.0.1:7890 http = "" diff --git a/requirements.txt b/requirements.txt index cddc9b1..5efa517 100644 --- a/requirements.txt +++ b/requirements.txt @@ -29,7 +29,7 @@ google-generativeai>=0.8.5 # python-multipart~=0.0.9 # redis==5.0.3 # opencv-python~=4.10.0.84 -# azure-cognitiveservices-speech~=1.37.0 +azure-cognitiveservices-speech~=1.37.0 # git-changelog~=2.5.2 # watchdog==5.0.2 # pydub==0.25.1 diff --git a/webui/components/audio_settings.py b/webui/components/audio_settings.py index b194e81..100cc44 100644 --- a/webui/components/audio_settings.py +++ b/webui/components/audio_settings.py @@ -19,6 +19,53 @@ def get_soulvoice_voices(): return ["soulvoice:custom"] +def get_tts_engine_options(): + """获取TTS引擎选项""" + return { + "edge_tts": "Edge TTS", + "azure_speech": "Azure Speech Services", + "soulvoice": "SoulVoice" + } + + +def get_tts_engine_descriptions(): + """获取TTS引擎详细描述""" + return { + "edge_tts": { + "title": "Edge TTS", + "features": "完全免费,但服务稳定性一般,不支持语音克隆功能", + "use_case": "测试和轻量级使用", + "registration": None + }, + "azure_speech": { + "title": "Azure Speech Services", + "features": "提供一定免费额度,超出后按量付费,需要绑定海外信用卡", + "use_case": "企业级应用,需要稳定服务", + "registration": "https://portal.azure.com/#view/Microsoft_Azure_ProjectOxford/CognitiveServicesHub/~/SpeechServices" + }, + "soulvoice": { + "title": "SoulVoice", + "features": "提供免费额度,支持语音克隆,支持微信购买额度,无需信用卡,性价比极高", + "use_case": "个人用户和中小企业,需要语音克隆功能", + "registration": "https://soulvoice.scsmtech.cn/" + } + } + + +def is_valid_azure_voice_name(voice_name: str) -> bool: + """检查是否为有效的Azure音色名称格式""" + if not voice_name or not isinstance(voice_name, str): + return False + + voice_name = voice_name.strip() + + # Azure音色名称通常格式为: [语言]-[地区]-[名称]Neural + # 例如: zh-CN-YunzeNeural, en-US-AvaMultilingualNeural + import re + pattern = r'^[a-z]{2}-[A-Z]{2}-\w+Neural$' + return bool(re.match(pattern, voice_name)) + + def render_audio_panel(tr): """渲染音频设置面板""" with st.container(border=True): @@ -33,46 +80,91 @@ def render_audio_panel(tr): def render_tts_settings(tr): """渲染TTS(文本转语音)设置""" + + # 1. TTS引擎选择器 + # st.subheader("🎤 TTS引擎选择") + + engine_options = get_tts_engine_options() + engine_descriptions = get_tts_engine_descriptions() + + # 获取保存的TTS引擎设置 + saved_tts_engine = config.ui.get("tts_engine", "edge_tts") + + # 确保保存的引擎在可用选项中 + if saved_tts_engine not in engine_options: + saved_tts_engine = "edge_tts" + + # TTS引擎选择下拉框 + selected_engine = st.selectbox( + "选择TTS引擎", + options=list(engine_options.keys()), + format_func=lambda x: engine_options[x], + index=list(engine_options.keys()).index(saved_tts_engine), + help="选择您要使用的文本转语音引擎" + ) + + # 保存TTS引擎选择 + config.ui["tts_engine"] = selected_engine + + # 2. 显示引擎详细说明 + if selected_engine in engine_descriptions: + desc = engine_descriptions[selected_engine] + + with st.expander(f"📋 {desc['title']} 详细说明", expanded=True): + st.markdown(f"**特点:** {desc['features']}") + st.markdown(f"**适用场景:** {desc['use_case']}") + + if desc['registration']: + st.markdown(f"**注册地址:** [{desc['registration']}]({desc['registration']})") + + # 3. 根据选择的引擎渲染对应的配置界面 + # st.subheader("⚙️ 引擎配置") + + if selected_engine == "edge_tts": + render_edge_tts_settings(tr) + elif selected_engine == "azure_speech": + render_azure_speech_settings(tr) + elif selected_engine == "soulvoice": + render_soulvoice_engine_settings(tr) + + # 4. 试听功能 + render_voice_preview_new(tr, selected_engine) + + +def render_edge_tts_settings(tr): + """渲染 Edge TTS 引擎设置""" # 获取支持的语音列表 support_locales = ["zh-CN", "en-US"] - azure_voices = voice.get_all_azure_voices(filter_locals=support_locales) + all_voices = voice.get_all_azure_voices(filter_locals=support_locales) - # 添加 SoulVoice 语音选项 - soulvoice_voices = get_soulvoice_voices() - - # 合并所有语音选项 - all_voices = azure_voices + soulvoice_voices + # 只保留标准版本的语音(Edge TTS专用,不包含V2) + edge_voices = [v for v in all_voices if "-V2" not in v] # 创建友好的显示名称 friendly_names = {} - - # Azure 语音的友好名称 - for v in azure_voices: + for v in edge_voices: friendly_names[v] = v.replace("Female", tr("Female")).replace("Male", tr("Male")).replace("Neural", "") - # SoulVoice 语音的友好名称 - for v in soulvoice_voices: - friendly_names[v] = "SoulVoice (自定义音色)" - # 获取保存的语音设置 - saved_voice_name = config.ui.get("voice_name", "") - saved_voice_name_index = 0 + saved_voice_name = config.ui.get("edge_voice_name", "zh-CN-XiaoxiaoNeural-Female") - if saved_voice_name in friendly_names: - saved_voice_name_index = list(friendly_names.keys()).index(saved_voice_name) - else: - # 如果没有保存的设置,选择与UI语言匹配的第一个语音 - for i, v in enumerate(all_voices): - if (v.lower().startswith(st.session_state["ui_language"].lower()) - and "V2" not in v and not v.startswith("soulvoice:")): - saved_voice_name_index = i + # 确保保存的音色在可用列表中 + if saved_voice_name not in friendly_names: + # 选择与UI语言匹配的第一个语音 + for v in edge_voices: + if v.lower().startswith(st.session_state.get("ui_language", "zh-CN").lower()): + saved_voice_name = v break + else: + # 如果没找到匹配的,使用第一个 + saved_voice_name = edge_voices[0] if edge_voices else "" - # 语音选择下拉框 + # 音色选择下拉框(Edge TTS音色相对较少,保留下拉框) selected_friendly_name = st.selectbox( - tr("Speech Synthesis"), + "音色选择", options=list(friendly_names.values()), - index=saved_voice_name_index, + index=list(friendly_names.keys()).index(saved_voice_name) if saved_voice_name in friendly_names else 0, + help="选择Edge TTS音色" ) # 获取实际的语音名称 @@ -80,34 +172,342 @@ def render_tts_settings(tr): list(friendly_names.values()).index(selected_friendly_name) ] - # 如果选择的是 SoulVoice 自定义选项,使用配置的音色 URI - if voice_name == "soulvoice:custom": - custom_voice_uri = config.soulvoice.get("voice_uri", "") - if custom_voice_uri: - # 确保音色 URI 有正确的前缀 - if not custom_voice_uri.startswith("soulvoice:") and not custom_voice_uri.startswith("speech:"): - voice_name = f"soulvoice:{custom_voice_uri}" + # 显示音色信息 + with st.expander("💡 Edge TTS 音色说明", expanded=False): + st.write("**中文音色:**") + zh_voices = [v for v in edge_voices if v.startswith("zh-CN")] + for v in zh_voices: + gender = "女声" if "Female" in v else "男声" + name = v.replace("-Female", "").replace("-Male", "").replace("zh-CN-", "").replace("Neural", "") + st.write(f"• {name} ({gender})") + + st.write("") + st.write("**英文音色:**") + en_voices = [v for v in edge_voices if v.startswith("en-US")][:5] # 只显示前5个 + for v in en_voices: + gender = "女声" if "Female" in v else "男声" + name = v.replace("-Female", "").replace("-Male", "").replace("en-US-", "").replace("Neural", "") + st.write(f"• {name} ({gender})") + + if len([v for v in edge_voices if v.startswith("en-US")]) > 5: + st.write("• ... 更多英文音色") + + config.ui["edge_voice_name"] = voice_name + config.ui["voice_name"] = voice_name # 兼容性 + + # 音量调节 + voice_volume = st.slider( + "音量调节", + min_value=0, + max_value=100, + value=int(config.ui.get("edge_volume", 80)), + step=1, + help="调节语音音量 (0-100)" + ) + config.ui["edge_volume"] = voice_volume + st.session_state['voice_volume'] = voice_volume / 100.0 + + # 语速调节 + voice_rate = st.slider( + "语速调节", + min_value=0.5, + max_value=2.0, + value=config.ui.get("edge_rate", 1.0), + step=0.1, + help="调节语音速度 (0.5-2.0倍速)" + ) + config.ui["edge_rate"] = voice_rate + st.session_state['voice_rate'] = voice_rate + + # 语调调节 + voice_pitch = st.slider( + "语调调节", + min_value=-50, + max_value=50, + value=int(config.ui.get("edge_pitch", 0)), + step=5, + help="调节语音音调 (-50%到+50%)" + ) + config.ui["edge_pitch"] = voice_pitch + # 转换为比例值 + st.session_state['voice_pitch'] = 1.0 + (voice_pitch / 100.0) + + +def render_azure_speech_settings(tr): + """渲染 Azure Speech Services 引擎设置""" + # 服务区域配置 + azure_speech_region = st.text_input( + "服务区域", + value=config.azure.get("speech_region", ""), + placeholder="例如:eastus", + help="Azure Speech Services 服务区域,如:eastus, westus2, eastasia 等" + ) + + # API Key配置 + azure_speech_key = st.text_input( + "API Key", + value=config.azure.get("speech_key", ""), + type="password", + help="Azure Speech Services API 密钥" + ) + + # 保存Azure配置 + config.azure["speech_region"] = azure_speech_region + config.azure["speech_key"] = azure_speech_key + + # 音色名称输入框 + saved_voice_name = config.ui.get("azure_voice_name", "zh-CN-XiaoxiaoMultilingualNeural") + + # 音色名称输入 + voice_name = st.text_input( + "音色名称", + value=saved_voice_name, + help="输入Azure Speech Services音色名称,直接使用官方音色名称即可。例如:zh-CN-YunzeNeural", + placeholder="zh-CN-YunzeNeural" + ) + + # 显示常用音色示例 + with st.expander("💡 常用音色参考", expanded=False): + st.write("**中文音色:**") + st.write("• zh-CN-XiaoxiaoMultilingualNeural (女声,多语言)") + st.write("• zh-CN-YunzeNeural (男声)") + st.write("• zh-CN-YunxiNeural (男声)") + st.write("• zh-CN-XiaochenNeural (女声)") + st.write("") + st.write("**英文音色:**") + st.write("• en-US-AndrewMultilingualNeural (男声,多语言)") + st.write("• en-US-AvaMultilingualNeural (女声,多语言)") + st.write("• en-US-BrianMultilingualNeural (男声,多语言)") + st.write("• en-US-EmmaMultilingualNeural (女声,多语言)") + st.write("") + st.info("💡 更多音色请参考 [Azure Speech Services 官方文档](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support)") + + # 快速选择按钮 + st.write("**快速选择:**") + cols = st.columns(3) + with cols[0]: + if st.button("中文女声", help="zh-CN-XiaoxiaoMultilingualNeural"): + voice_name = "zh-CN-XiaoxiaoMultilingualNeural" + st.rerun() + with cols[1]: + if st.button("中文男声", help="zh-CN-YunzeNeural"): + voice_name = "zh-CN-YunzeNeural" + st.rerun() + with cols[2]: + if st.button("英文女声", help="en-US-AvaMultilingualNeural"): + voice_name = "en-US-AvaMultilingualNeural" + st.rerun() + + # 验证音色名称并显示状态 + if voice_name.strip(): + # 检查是否为有效的Azure音色格式 + if is_valid_azure_voice_name(voice_name): + st.success(f"✅ 音色名称有效: {voice_name}") + else: + st.warning(f"⚠️ 音色名称格式可能不正确: {voice_name}") + st.info("💡 Azure音色名称通常格式为: [语言]-[地区]-[名称]Neural") + + # 保存配置 + config.ui["azure_voice_name"] = voice_name + config.ui["voice_name"] = voice_name # 兼容性 + + # 音量调节 + voice_volume = st.slider( + "音量调节", + min_value=0, + max_value=100, + value=int(config.ui.get("azure_volume", 80)), + step=1, + help="调节语音音量 (0-100)" + ) + config.ui["azure_volume"] = voice_volume + st.session_state['voice_volume'] = voice_volume / 100.0 + + # 语速调节 + voice_rate = st.slider( + "语速调节", + min_value=0.5, + max_value=2.0, + value=config.ui.get("azure_rate", 1.0), + step=0.1, + help="调节语音速度 (0.5-2.0倍速)" + ) + config.ui["azure_rate"] = voice_rate + st.session_state['voice_rate'] = voice_rate + + # 语调调节 + voice_pitch = st.slider( + "语调调节", + min_value=-50, + max_value=50, + value=int(config.ui.get("azure_pitch", 0)), + step=5, + help="调节语音音调 (-50%到+50%)" + ) + config.ui["azure_pitch"] = voice_pitch + # 转换为比例值 + st.session_state['voice_pitch'] = 1.0 + (voice_pitch / 100.0) + + # 显示配置状态 + if azure_speech_region and azure_speech_key: + st.success("✅ Azure Speech Services 配置已设置") + elif not azure_speech_region: + st.warning("⚠️ 请配置服务区域") + elif not azure_speech_key: + st.warning("⚠️ 请配置 API Key") + + +def render_soulvoice_engine_settings(tr): + """渲染 SoulVoice 引擎设置""" + # API Key 输入 + api_key = st.text_input( + "API Key", + value=config.soulvoice.get("api_key", ""), + type="password", + help="请输入您的 SoulVoice API 密钥" + ) + + # 音色 URI 输入 + voice_uri = st.text_input( + "音色URI", + value=config.soulvoice.get("voice_uri", "speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr"), + help="请输入 SoulVoice 音色标识符", + placeholder="speech:mcg3fdnx:clzkyf4vy00e5qr6hywum4u84:bzznlkuhcjzpbosexitr" + ) + + # 模型名称选择 + model_options = [ + "FunAudioLLM/CosyVoice2-0.5B", + "FunAudioLLM/CosyVoice-300M", + "FunAudioLLM/CosyVoice-300M-SFT", + "FunAudioLLM/CosyVoice-300M-Instruct" + ] + + saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") + if saved_model not in model_options: + model_options.append(saved_model) + + model = st.selectbox( + "模型名称", + options=model_options, + index=model_options.index(saved_model), + help="选择使用的 TTS 模型" + ) + + # 高级设置 + with st.expander("高级设置", expanded=False): + api_url = st.text_input( + "API 地址", + value=config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts"), + help="SoulVoice API 接口地址" + ) + + # 保存配置 + config.soulvoice["api_key"] = api_key + config.soulvoice["voice_uri"] = voice_uri + config.soulvoice["model"] = model + config.soulvoice["api_url"] = api_url + + # 设置兼容性配置 + if voice_uri: + # 确保音色 URI 有正确的前缀 + if not voice_uri.startswith("soulvoice:") and not voice_uri.startswith("speech:"): + voice_name = f"soulvoice:{voice_uri}" + else: + voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}" + config.ui["voice_name"] = voice_name + + # 显示配置状态 + if api_key and voice_uri: + st.success("✅ SoulVoice 配置已设置") + elif not api_key: + st.warning("⚠️ 请配置 SoulVoice API Key") + elif not voice_uri: + st.warning("⚠️ 请配置音色 URI") + + +def render_voice_preview_new(tr, selected_engine): + """渲染新的语音试听功能""" + if st.button("🎵 试听语音合成", use_container_width=True): + play_content = "感谢关注 NarratoAI,有任何问题或建议,可以关注微信公众号,求助或讨论" + + # 根据选择的引擎获取对应的语音配置 + voice_name = "" + voice_rate = 1.0 + voice_pitch = 1.0 + + if selected_engine == "edge_tts": + voice_name = config.ui.get("edge_voice_name", "zh-CN-XiaoyiNeural-Female") + voice_rate = config.ui.get("edge_rate", 1.0) + voice_pitch = 1.0 + (config.ui.get("edge_pitch", 0) / 100.0) + elif selected_engine == "azure_speech": + voice_name = config.ui.get("azure_voice_name", "zh-CN-XiaoxiaoMultilingualNeural") + voice_rate = config.ui.get("azure_rate", 1.0) + voice_pitch = 1.0 + (config.ui.get("azure_pitch", 0) / 100.0) + elif selected_engine == "soulvoice": + voice_uri = config.soulvoice.get("voice_uri", "") + if voice_uri: + if not voice_uri.startswith("soulvoice:") and not voice_uri.startswith("speech:"): + voice_name = f"soulvoice:{voice_uri}" + else: + voice_name = voice_uri if voice_uri.startswith("soulvoice:") else f"soulvoice:{voice_uri}" + voice_rate = 1.0 # SoulVoice 使用默认语速 + voice_pitch = 1.0 # SoulVoice 不支持音调调节 + + if not voice_name: + st.error("请先配置语音设置") + return + + with st.spinner("正在合成语音..."): + temp_dir = utils.storage_dir("temp", create=True) + audio_file = os.path.join(temp_dir, f"tmp-voice-{str(uuid4())}.mp3") + + sub_maker = voice.tts( + text=play_content, + voice_name=voice_name, + voice_rate=voice_rate, + voice_pitch=voice_pitch, + voice_file=audio_file, + ) + + if sub_maker and os.path.exists(audio_file): + st.success("✅ 语音合成成功!") + + # 播放音频 + with open(audio_file, 'rb') as audio_file_obj: + audio_bytes = audio_file_obj.read() + st.audio(audio_bytes, format='audio/mp3') + + # 清理临时文件 + try: + os.remove(audio_file) + except: + pass else: - voice_name = custom_voice_uri if custom_voice_uri.startswith("soulvoice:") else f"soulvoice:{custom_voice_uri}" + st.error("❌ 语音合成失败,请检查配置") - # 保存设置 - config.ui["voice_name"] = voice_name - # 根据语音类型渲染不同的设置 - if voice.is_soulvoice_voice(voice_name): - render_soulvoice_settings(tr) - elif voice.is_azure_v2_voice(voice_name): - render_azure_v2_settings(tr) +def render_azure_v2_settings(tr): + """渲染Azure V2语音设置(保留兼容性)""" + saved_azure_speech_region = config.azure.get("speech_region", "") + saved_azure_speech_key = config.azure.get("speech_key", "") - # 语音参数设置 - render_voice_parameters(tr, voice_name) + azure_speech_region = st.text_input( + tr("Speech Region"), + value=saved_azure_speech_region + ) + azure_speech_key = st.text_input( + tr("Speech Key"), + value=saved_azure_speech_key, + type="password" + ) - # 试听按钮 - render_voice_preview(tr, voice_name) + config.azure["speech_region"] = azure_speech_region + config.azure["speech_key"] = azure_speech_key def render_soulvoice_settings(tr): - """渲染 SoulVoice 语音设置""" + """渲染 SoulVoice 语音设置(保留兼容性)""" saved_api_key = config.soulvoice.get("api_key", "") saved_api_url = config.soulvoice.get("api_url", "https://tts.scsmtech.cn/tts") saved_model = config.soulvoice.get("model", "FunAudioLLM/CosyVoice2-0.5B") @@ -158,27 +558,8 @@ def render_soulvoice_settings(tr): st.warning("⚠️ 请配置音色 URI") -def render_azure_v2_settings(tr): - """渲染Azure V2语音设置""" - saved_azure_speech_region = config.azure.get("speech_region", "") - saved_azure_speech_key = config.azure.get("speech_key", "") - - azure_speech_region = st.text_input( - tr("Speech Region"), - value=saved_azure_speech_region - ) - azure_speech_key = st.text_input( - tr("Speech Key"), - value=saved_azure_speech_key, - type="password" - ) - - config.azure["speech_region"] = azure_speech_region - config.azure["speech_key"] = azure_speech_key - - def render_voice_parameters(tr, voice_name): - """渲染语音参数设置""" + """渲染语音参数设置(保留兼容性)""" # 音量 - 使用统一的默认值 voice_volume = st.slider( tr("Speech Volume"), @@ -260,9 +641,12 @@ def render_voice_preview(tr, voice_name): ) if sub_maker and os.path.exists(audio_file): + st.success(tr("Voice synthesis successful")) st.audio(audio_file, format="audio/mp3") if os.path.exists(audio_file): os.remove(audio_file) + else: + st.error(tr("Voice synthesis failed")) def render_bgm_settings(tr): From 864ebea1bed81a5ede15ad16c79bd26800af745c Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 20:06:14 +0800 Subject: [PATCH 06/10] =?UTF-8?q?feat(llm):=20=E6=B7=BB=E5=8A=A0gemini-2.5?= =?UTF-8?q?-flash=E6=94=AF=E6=8C=81=E5=B9=B6=E5=A2=9E=E5=BC=BAAPI=E8=B0=83?= =?UTF-8?q?=E7=94=A8=E5=8F=AF=E9=9D=A0=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 添加对gemini-2.5-flash模型的支持并更新示例配置 实现模型验证的严格/宽松模式配置 为API调用添加重试机制和超时配置 增加对更多HTTP错误状态码的处理 --- app/services/llm/base.py | 35 +++- app/services/llm/config_validator.py | 7 +- .../llm/providers/gemini_openai_provider.py | 2 + app/services/llm/providers/gemini_provider.py | 168 ++++++++++++++---- config.example.toml | 14 ++ 5 files changed, 181 insertions(+), 45 deletions(-) diff --git a/app/services/llm/base.py b/app/services/llm/base.py index 91f6c33..6bebef1 100644 --- a/app/services/llm/base.py +++ b/app/services/llm/base.py @@ -57,14 +57,33 @@ class BaseLLMProvider(ABC): """验证配置参数""" if not self.api_key: raise ConfigurationError("API密钥不能为空", "api_key") - + if not self.model_name: raise ConfigurationError("模型名称不能为空", "model_name") - - if self.model_name not in self.supported_models: - from .exceptions import ModelNotSupportedError - raise ModelNotSupportedError(self.model_name, self.provider_name) + + # 检查模型支持情况 + self._validate_model_support() + def _validate_model_support(self): + """验证模型支持情况""" + from app.config import config + from .exceptions import ModelNotSupportedError + from loguru import logger + + # 获取模型验证模式配置 + strict_model_validation = config.app.get('strict_model_validation', True) + + if self.model_name not in self.supported_models: + if strict_model_validation: + # 严格模式:抛出异常 + raise ModelNotSupportedError(self.model_name, self.provider_name) + else: + # 宽松模式:仅记录警告 + logger.warning( + f"模型 {self.model_name} 未在供应商 {self.provider_name} 的预定义支持列表中," + f"但已启用宽松验证模式。支持的模型列表: {self.supported_models}" + ) + def _initialize(self): """初始化提供商特定设置,子类可重写""" pass @@ -77,11 +96,15 @@ class BaseLLMProvider(ABC): def _handle_api_error(self, status_code: int, response_text: str) -> LLMServiceError: """处理API错误,返回适当的异常""" from .exceptions import APICallError, RateLimitError, AuthenticationError - + if status_code == 401: return AuthenticationError() elif status_code == 429: return RateLimitError() + elif status_code in [502, 503, 504]: + return APICallError(f"服务器错误 HTTP {status_code}", status_code, response_text) + elif status_code == 524: + return APICallError(f"服务器处理超时 HTTP {status_code}", status_code, response_text) else: return APICallError(f"HTTP {status_code}", status_code, response_text) diff --git a/app/services/llm/config_validator.py b/app/services/llm/config_validator.py index 0bfe287..31b902a 100644 --- a/app/services/llm/config_validator.py +++ b/app/services/llm/config_validator.py @@ -213,7 +213,8 @@ class LLMConfigValidator: "确保所有API密钥都已正确配置", "建议为每个提供商配置base_url以提高稳定性", "定期检查模型名称是否为最新版本", - "建议配置多个提供商作为备用方案" + "建议配置多个提供商作为备用方案", + "如果使用新发布的模型遇到MODEL_NOT_SUPPORTED错误,可以设置 strict_model_validation = false 启用宽松验证模式" ] } @@ -252,8 +253,8 @@ class LLMConfigValidator: """获取示例模型名称""" examples = { "gemini": { - "vision": ["gemini-2.0-flash-lite", "gemini-2.0-flash"], - "text": ["gemini-2.0-flash", "gemini-1.5-pro"] + "vision": ["gemini-2.5-flash", "gemini-2.0-flash-lite", "gemini-2.0-flash"], + "text": ["gemini-2.5-flash", "gemini-2.0-flash", "gemini-1.5-pro"] }, "openai": { "vision": [], diff --git a/app/services/llm/providers/gemini_openai_provider.py b/app/services/llm/providers/gemini_openai_provider.py index 45c30cb..e9c33ff 100644 --- a/app/services/llm/providers/gemini_openai_provider.py +++ b/app/services/llm/providers/gemini_openai_provider.py @@ -27,6 +27,7 @@ class GeminiOpenAIVisionProvider(VisionModelProvider): @property def supported_models(self) -> List[str]: return [ + "gemini-2.5-flash", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-1.5-pro", @@ -137,6 +138,7 @@ class GeminiOpenAITextProvider(TextModelProvider): @property def supported_models(self) -> List[str]: return [ + "gemini-2.5-flash", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-1.5-pro", diff --git a/app/services/llm/providers/gemini_provider.py b/app/services/llm/providers/gemini_provider.py index 9b571e6..949df21 100644 --- a/app/services/llm/providers/gemini_provider.py +++ b/app/services/llm/providers/gemini_provider.py @@ -27,6 +27,7 @@ class GeminiVisionProvider(VisionModelProvider): @property def supported_models(self) -> List[str]: return [ + "gemini-2.5-flash", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-1.5-pro", @@ -136,25 +137,72 @@ class GeminiVisionProvider(VisionModelProvider): return base64.b64encode(img_bytes).decode('utf-8') async def _make_api_call(self, payload: Dict[str, Any]) -> Dict[str, Any]: - """执行原生Gemini API调用""" + """执行原生Gemini API调用,包含重试机制""" + from app.config import config + url = f"{self.base_url}/models/{self.model_name}:generateContent?key={self.api_key}" - - response = await asyncio.to_thread( - requests.post, - url, - json=payload, - headers={ - "Content-Type": "application/json", - "User-Agent": "NarratoAI/1.0" - }, - timeout=120 - ) - - if response.status_code != 200: - error = self._handle_api_error(response.status_code, response.text) - raise error - - return response.json() + + max_retries = config.app.get('llm_max_retries', 3) + base_timeout = config.app.get('llm_vision_timeout', 120) + + for attempt in range(max_retries): + try: + # 根据尝试次数调整超时时间 + timeout = base_timeout * (attempt + 1) + logger.debug(f"Gemini API调用尝试 {attempt + 1}/{max_retries},超时设置: {timeout}秒") + + response = await asyncio.to_thread( + requests.post, + url, + json=payload, + headers={ + "Content-Type": "application/json", + "User-Agent": "NarratoAI/1.0" + }, + timeout=timeout + ) + + if response.status_code == 200: + return response.json() + + # 处理特定的错误状态码 + if response.status_code == 429: + # 速率限制,等待后重试 + wait_time = 30 * (attempt + 1) + logger.warning(f"Gemini API速率限制,等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + elif response.status_code in [502, 503, 504, 524]: + # 服务器错误或超时,可以重试 + if attempt < max_retries - 1: + wait_time = 10 * (attempt + 1) + logger.warning(f"Gemini API服务器错误 {response.status_code},等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + + # 其他错误,直接抛出 + error = self._handle_api_error(response.status_code, response.text) + raise error + + except requests.exceptions.Timeout: + if attempt < max_retries - 1: + wait_time = 15 * (attempt + 1) + logger.warning(f"Gemini API请求超时,等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + else: + raise APICallError("Gemini API请求超时,已达到最大重试次数") + except requests.exceptions.RequestException as e: + if attempt < max_retries - 1: + wait_time = 10 * (attempt + 1) + logger.warning(f"Gemini API网络错误: {str(e)},等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + else: + raise APICallError(f"Gemini API网络错误: {str(e)}") + + # 如果所有重试都失败了 + raise APICallError("Gemini API调用失败,已达到最大重试次数") def _parse_vision_response(self, response_data: Dict[str, Any]) -> str: """解析视觉分析响应""" @@ -192,6 +240,7 @@ class GeminiTextProvider(TextModelProvider): @property def supported_models(self) -> List[str]: return [ + "gemini-2.5-flash", "gemini-2.0-flash-lite", "gemini-2.0-flash", "gemini-1.5-pro", @@ -278,25 +327,72 @@ class GeminiTextProvider(TextModelProvider): return self._parse_text_response(response_data) async def _make_api_call(self, payload: Dict[str, Any]) -> Dict[str, Any]: - """执行原生Gemini API调用""" + """执行原生Gemini API调用,包含重试机制""" + from app.config import config + url = f"{self.base_url}/models/{self.model_name}:generateContent?key={self.api_key}" - - response = await asyncio.to_thread( - requests.post, - url, - json=payload, - headers={ - "Content-Type": "application/json", - "User-Agent": "NarratoAI/1.0" - }, - timeout=120 - ) - - if response.status_code != 200: - error = self._handle_api_error(response.status_code, response.text) - raise error - - return response.json() + + max_retries = config.app.get('llm_max_retries', 3) + base_timeout = config.app.get('llm_text_timeout', 180) # 文本生成任务使用更长的基础超时时间 + + for attempt in range(max_retries): + try: + # 根据尝试次数调整超时时间 + timeout = base_timeout * (attempt + 1) + logger.debug(f"Gemini文本API调用尝试 {attempt + 1}/{max_retries},超时设置: {timeout}秒") + + response = await asyncio.to_thread( + requests.post, + url, + json=payload, + headers={ + "Content-Type": "application/json", + "User-Agent": "NarratoAI/1.0" + }, + timeout=timeout + ) + + if response.status_code == 200: + return response.json() + + # 处理特定的错误状态码 + if response.status_code == 429: + # 速率限制,等待后重试 + wait_time = 30 * (attempt + 1) + logger.warning(f"Gemini API速率限制,等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + elif response.status_code in [502, 503, 504, 524]: + # 服务器错误或超时,可以重试 + if attempt < max_retries - 1: + wait_time = 15 * (attempt + 1) + logger.warning(f"Gemini API服务器错误 {response.status_code},等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + + # 其他错误,直接抛出 + error = self._handle_api_error(response.status_code, response.text) + raise error + + except requests.exceptions.Timeout: + if attempt < max_retries - 1: + wait_time = 20 * (attempt + 1) + logger.warning(f"Gemini文本API请求超时,等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + else: + raise APICallError("Gemini文本API请求超时,已达到最大重试次数") + except requests.exceptions.RequestException as e: + if attempt < max_retries - 1: + wait_time = 15 * (attempt + 1) + logger.warning(f"Gemini文本API网络错误: {str(e)},等待 {wait_time} 秒后重试") + await asyncio.sleep(wait_time) + continue + else: + raise APICallError(f"Gemini文本API网络错误: {str(e)}") + + # 如果所有重试都失败了 + raise APICallError("Gemini文本API调用失败,已达到最大重试次数") def _parse_text_response(self, response_data: Dict[str, Any]) -> str: """解析文本生成响应""" diff --git a/config.example.toml b/config.example.toml index 877b71b..7bb37be 100644 --- a/config.example.toml +++ b/config.example.toml @@ -1,5 +1,19 @@ [app] project_version="0.6.8" + + # 模型验证模式配置 + # true: 严格模式,只允许使用预定义支持列表中的模型(默认) + # false: 宽松模式,允许使用任何模型名称,仅记录警告 + strict_model_validation = true + + # LLM API 超时配置(秒) + # 视觉模型基础超时时间 + llm_vision_timeout = 120 + # 文本模型基础超时时间(解说文案生成等复杂任务需要更长时间) + llm_text_timeout = 180 + # API 重试次数 + llm_max_retries = 3 + # 支持视频理解的大模型提供商 # gemini (谷歌, 需要 VPN) # siliconflow (硅基流动) From 60e01bf6f25f1fc83e43e4fd811e53f2da424014 Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 20:15:26 +0800 Subject: [PATCH 07/10] =?UTF-8?q?refactor:=20=E7=A7=BB=E9=99=A4=E8=A7=86?= =?UTF-8?q?=E9=A2=91=E5=AE=A1=E6=9F=A5=E5=8A=9F=E8=83=BD=E5=8F=8A=E7=9B=B8?= =?UTF-8?q?=E5=85=B3=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 删除不再使用的视频审查功能,包括移除相关面板组件、i18n翻译条目和主程序中的调用 --- webui.py | 7 +-- webui/components/__init__.py | 6 +- webui/components/review_settings.py | 88 ----------------------------- webui/components/script_settings.py | 2 +- webui/i18n/zh.json | 1 - 5 files changed, 5 insertions(+), 99 deletions(-) delete mode 100644 webui/components/review_settings.py diff --git a/webui.py b/webui.py index b649e9d..0701054 100644 --- a/webui.py +++ b/webui.py @@ -4,7 +4,7 @@ import sys from loguru import logger from app.config import config from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \ - review_settings, system_settings + system_settings # from webui.utils import cache, file_utils from app.utils import utils from app.utils import ffmpeg_utils @@ -227,14 +227,11 @@ def main(): with panel[0]: script_settings.render_script_panel(tr) with panel[1]: - video_settings.render_video_panel(tr) audio_settings.render_audio_panel(tr) with panel[2]: + video_settings.render_video_panel(tr) subtitle_settings.render_subtitle_panel(tr) - # 渲染视频审查面板 - review_settings.render_review_panel(tr) - # 放到最后渲染可能使用PyTorch的部分 # 渲染系统设置面板 with panel[2]: diff --git a/webui/components/__init__.py b/webui/components/__init__.py index 6aafcd7..31d3afb 100644 --- a/webui/components/__init__.py +++ b/webui/components/__init__.py @@ -3,13 +3,11 @@ from .script_settings import render_script_panel from .video_settings import render_video_panel from .audio_settings import render_audio_panel from .subtitle_settings import render_subtitle_panel -from .review_settings import render_review_panel __all__ = [ 'render_basic_settings', 'render_script_panel', 'render_video_panel', 'render_audio_panel', - 'render_subtitle_panel', - 'render_review_panel' -] \ No newline at end of file + 'render_subtitle_panel' +] \ No newline at end of file diff --git a/webui/components/review_settings.py b/webui/components/review_settings.py deleted file mode 100644 index c4f3bce..0000000 --- a/webui/components/review_settings.py +++ /dev/null @@ -1,88 +0,0 @@ -import streamlit as st -import os -from loguru import logger - - -def render_review_panel(tr): - """渲染视频审查面板""" - with st.expander(tr("Video Check"), expanded=False): - try: - video_list = st.session_state.get('video_clip_json', []) - subclip_videos = st.session_state.get('subclip_videos', {}) - except KeyError: - video_list = [] - subclip_videos = {} - - # 计算列数和行数 - num_videos = len(video_list) - cols_per_row = 3 - rows = (num_videos + cols_per_row - 1) // cols_per_row # 向上取整计算行数 - - # 使用容器展示视频 - for row in range(rows): - cols = st.columns(cols_per_row) - for col in range(cols_per_row): - index = row * cols_per_row + col - if index < num_videos: - with cols[col]: - render_video_item(tr, video_list, subclip_videos, index) - - -def render_video_item(tr, video_list, subclip_videos, index): - """渲染单个视频项""" - video_script = video_list[index] - - # 显示时间戳 - timestamp = video_script.get('_id', '') - st.text_area( - tr("Timestamp"), - value=timestamp, - height=70, - disabled=True, - key=f"timestamp_{index}" - ) - - # 显示视频播放器 - video_path = subclip_videos.get(timestamp) - if video_path and os.path.exists(video_path): - try: - st.video(video_path) - except Exception as e: - logger.error(f"加载视频失败 {video_path}: {e}") - st.error(f"无法加载视频: {os.path.basename(video_path)}") - else: - st.warning(tr("视频文件未找到")) - - # 显示画面描述 - st.text_area( - tr("Picture Description"), - value=video_script.get('picture', ''), - height=150, - disabled=True, - key=f"picture_{index}" - ) - - # 显示旁白文本 - narration = st.text_area( - tr("Narration"), - value=video_script.get('narration', ''), - height=150, - key=f"narration_{index}" - ) - # 保存修改后的旁白文本 - if narration != video_script.get('narration', ''): - video_script['narration'] = narration - st.session_state['video_clip_json'] = video_list - - # 显示剪辑模式 - ost = st.selectbox( - tr("Clip Mode"), - options=range(0, 3), - index=video_script.get('OST', 0), - key=f"ost_{index}", - help=tr("0: Keep the audio only, 1: Keep the original sound only, 2: Keep the original sound and audio") - ) - # 保存修改后的剪辑模式 - if ost != video_script.get('OST', 0): - video_script['OST'] = ost - st.session_state['video_clip_json'] = video_list diff --git a/webui/components/script_settings.py b/webui/components/script_settings.py index 42ff794..15956e5 100644 --- a/webui/components/script_settings.py +++ b/webui/components/script_settings.py @@ -333,7 +333,7 @@ def render_script_buttons(tr, params): video_clip_json_details = st.text_area( tr("Video Script"), value=json.dumps(st.session_state.get('video_clip_json', []), indent=2, ensure_ascii=False), - height=180 + height=500 ) # 操作按钮行 - 合并格式检查和保存功能 diff --git a/webui/i18n/zh.json b/webui/i18n/zh.json index d137ccf..f955be9 100644 --- a/webui/i18n/zh.json +++ b/webui/i18n/zh.json @@ -81,7 +81,6 @@ "TTS Provider": "语音合成提供商", "Hide Log": "隐藏日志", "Upload Local Files": "上传本地文件", - "Video Check": "视频审查", "File Uploaded Successfully": "文件上传成功", "timestamp": "时间戳", "Picture description": "图片描述", From 0d49812ad2c114b8eb2f953f5675b74c082f2e60 Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 20:27:34 +0800 Subject: [PATCH 08/10] =?UTF-8?q?refactor:=20=E7=A7=BB=E9=99=A4=E5=BA=9F?= =?UTF-8?q?=E5=BC=83=E8=84=9A=E6=9C=AC=E6=96=87=E4=BB=B6=E5=B9=B6=E6=9B=B4?= =?UTF-8?q?=E6=96=B0=E9=A1=B9=E7=9B=AE=E7=89=88=E6=9C=AC=E8=87=B30.7.0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 删除不再使用的脚本文件(check_gpu_cuda_cudnn.bat, changelog.py, main.py, release-notes.md, video_pipeline.py) 将项目版本从0.6.8更新至0.7.0,并同步更新config.example.toml中的版本号 --- changelog.py | 17 ---- check_gpu_cuda_cudnn.bat | Bin 5384 -> 0 bytes config.example.toml | 2 +- main.py | 19 ----- project_version | 2 +- release-notes.md | 17 ---- video_pipeline.py | 178 --------------------------------------- 7 files changed, 2 insertions(+), 233 deletions(-) delete mode 100644 changelog.py delete mode 100644 check_gpu_cuda_cudnn.bat delete mode 100644 main.py delete mode 100644 release-notes.md delete mode 100644 video_pipeline.py diff --git a/changelog.py b/changelog.py deleted file mode 100644 index 31a1337..0000000 --- a/changelog.py +++ /dev/null @@ -1,17 +0,0 @@ -from git_changelog.cli import build_and_render - -# 运行这段脚本自动生成CHANGELOG.md文件 - -build_and_render( - repository=".", - output="CHANGELOG.md", - convention="angular", - provider="github", - template="keepachangelog", - parse_trailers=True, - parse_refs=False, - sections=["build", "deps", "feat", "fix", "refactor"], - versioning="pep440", - bump="1.1.2", # 指定bump版本 - in_place=True, -) diff --git a/check_gpu_cuda_cudnn.bat b/check_gpu_cuda_cudnn.bat deleted file mode 100644 index 2cb5f5d2395788c513d88ac0729c70b3ad0f5f4d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5384 zcmd5=O-x)>6h4}Cq0*=ul6LhVL0eMjSlh;6XfznL{!Gh&WibQ8{DUw<83qJXp-r0D zW@>6w^5)%l-@WI2=bZ1H zd+%)i{hAacCu1@t9_}KpgYvZL$;n0Zot9G)!}Su{XQU#tGHFK4U|tRFDU8kF>EXSC zRu=7otnUgWvPGGtz|^l+kk0^yCC0`vM|N+2det{~HR#&91oGt{qYi6==pw7gm$#)!@mvBeT$bgkLx+jkP3rQAei{(ADqYfr|g zTP2mCw}zWB8H2u@Oi0?qCl3oa@~HV`t7tHKa@^oNGPe-%28 zVtn3M67-eL9uLT@{Pec`-fg5roW%%duiwMZWxq+#!`8oK2U*j$LtfjqryQ@8_itB^ zl_L&1$Q&v{C*x5ld`b z`h9Wrv#V*#KE{eHXKcdUZp^Y%i2dmXEQ|KTh+KlMqG4qonrrY9Sy44e$Rwg^Z@?tY z8OJErG3H6yWUceMWtK8Z=h;lsH-ChK3{&5cGnQ#ldMx8yhC1H43QN|hu4rz#@qT87 zEv+DT(k3Hk(aIwqi?H6t(Pq!?Gq@tvkgvU4Ic77#*4~xU>cC%@ug*CHa7 zrh1-LY^RnXFz+Pl+=-@Y=|fFOh3xTs7B$D7sOo6fpn}Z91I!6)cZV#(Q}jZxmMV8O zGRMIITkrYw$Hz)YNhhR4=~ij_jK-3Ohq6X~YHMvG6Faf$<`KQg21r%5K6mFQ?NLUv zE^tkBT?4R^mi{4A#L{i_TP2+ZECdEsfYXs+?vV={C5m3 zo2;vc&;6NvaOLNl?_KLDja?`vN8@cD)`uPz*82}F2X}(XtLT+E8l~e&Yu2^Hdrrq$ z%ZOJ;?=R9*DmT13J<2nHdOqsOz25!Bug#uK@p&eXnu&_mTh8zI6ril}uYKI9a_>(b z?2N_U;(WNB`@67ITLr@V>F{pETE_FK-F5M-s+qKN&(x?USrocD?|omiHyeM&*^T=H zdqV9j@s!k>joP^~W-e~ctGm`lmpy+B69+XZN3B13^5HriT(ciBsDIDJ2ysO?7{8IM z6i2HlE!dj(pm?|7ycCX%N)2bwQ@M@!vzhK?`Fy?Ko*v0B?$l<@ncDpa!Tx^$ DN=ARF diff --git a/config.example.toml b/config.example.toml index 7bb37be..94a98b7 100644 --- a/config.example.toml +++ b/config.example.toml @@ -1,5 +1,5 @@ [app] - project_version="0.6.8" + project_version="0.7.0" # 模型验证模式配置 # true: 严格模式,只允许使用预定义支持列表中的模型(默认) diff --git a/main.py b/main.py deleted file mode 100644 index bfec175..0000000 --- a/main.py +++ /dev/null @@ -1,19 +0,0 @@ -import os -import uvicorn -from loguru import logger - -from app.config import config - -if __name__ == "__main__": - logger.info( - "start server, docs: http://127.0.0.1:" + str(config.listen_port) + "/docs" - ) - os.environ["HTTP_PROXY"] = config.proxy.get("http") - os.environ["HTTPS_PROXY"] = config.proxy.get("https") - uvicorn.run( - app="app.asgi:app", - host=config.listen_host, - port=config.listen_port, - reload=config.reload_debug, - log_level="warning", - ) diff --git a/project_version b/project_version index bc8443e..bcaffe1 100644 --- a/project_version +++ b/project_version @@ -1 +1 @@ -0.6.8 \ No newline at end of file +0.7.0 \ No newline at end of file diff --git a/release-notes.md b/release-notes.md deleted file mode 100644 index d290698..0000000 --- a/release-notes.md +++ /dev/null @@ -1,17 +0,0 @@ -# Release Notes - -## Latest Changes - -* docs(README): 更新README. PR [#138](https://github.com/linyqh/NarratoAI/pull/138) by [@linyqh](https://github.com/linyqh). -* Dev 0.6.0. PR [#137](https://github.com/linyqh/NarratoAI/pull/137) by [@linyqh](https://github.com/linyqh). -* Dev 0.6.0 . PR [#134](https://github.com/linyqh/NarratoAI/pull/134) by [@linyqh](https://github.com/linyqh). -* Dev-0.3.9. PR [#73](https://github.com/linyqh/NarratoAI/pull/73) by [@linyqh](https://github.com/linyqh). -* 0.3.9 版本发布. PR [#71](https://github.com/linyqh/NarratoAI/pull/71) by [@linyqh](https://github.com/linyqh). -* docs: add Japanese README. PR [#66](https://github.com/linyqh/NarratoAI/pull/66) by [@eltociear](https://github.com/eltociear). -* docs: 测试 release 2. PR [#62](https://github.com/linyqh/NarratoAI/pull/62) by [@linyqh](https://github.com/linyqh). -* docs: 测试 release. PR [#61](https://github.com/linyqh/NarratoAI/pull/61) by [@linyqh](https://github.com/linyqh). -* docs: 测试commit. PR [#60](https://github.com/linyqh/NarratoAI/pull/60) by [@linyqh](https://github.com/linyqh). -* Dev. PR [#59](https://github.com/linyqh/NarratoAI/pull/59) by [@linyqh](https://github.com/linyqh). -* 0.2.0新版预发布. PR [#37](https://github.com/linyqh/NarratoAI/pull/37) by [@linyqh](https://github.com/linyqh). -* v0.3.6. PR [#58](https://github.com/linyqh/NarratoAI/pull/58) by [@linyqh](https://github.com/linyqh). -* 0.3.4 修改各种bug. PR [#49](https://github.com/linyqh/NarratoAI/pull/49) by [@linyqh](https://github.com/linyqh). diff --git a/video_pipeline.py b/video_pipeline.py deleted file mode 100644 index dc7fa26..0000000 --- a/video_pipeline.py +++ /dev/null @@ -1,178 +0,0 @@ -import requests -import json -import os -import time -from typing import Dict, Any - -class VideoPipeline: - def __init__(self, base_url: str = "http://127.0.0.1:8080"): - self.base_url = base_url - - def download_video(self, url: str, resolution: str = "1080p", - output_format: str = "mp4", rename: str = None) -> Dict[str, Any]: - """下载视频的第一步""" - endpoint = f"{self.base_url}/api/v2/youtube/download" - payload = { - "url": url, - "resolution": resolution, - "output_format": output_format, - "rename": rename or time.strftime("%Y-%m-%d") - } - - response = requests.post(endpoint, json=payload) - response.raise_for_status() - return response.json() - - def generate_script(self, video_path: str, skip_seconds: int = 0, - threshold: int = 30, vision_batch_size: int = 10, - vision_llm_provider: str = "gemini") -> Dict[str, Any]: - """生成脚本的第二步""" - endpoint = f"{self.base_url}/api/v2/scripts/generate" - payload = { - "video_path": video_path, - "skip_seconds": skip_seconds, - "threshold": threshold, - "vision_batch_size": vision_batch_size, - "vision_llm_provider": vision_llm_provider - } - - response = requests.post(endpoint, json=payload) - response.raise_for_status() - return response.json() - - def crop_video(self, video_path: str, script: list) -> Dict[str, Any]: - """剪辑视频的第三步""" - endpoint = f"{self.base_url}/api/v2/scripts/crop" - payload = { - "video_origin_path": video_path, - "video_script": script - } - - response = requests.post(endpoint, json=payload) - response.raise_for_status() - return response.json() - - def generate_final_video(self, task_id: str, video_path: str, - script_path: str, script: list, subclip_videos: Dict[str, str], voice_name: str) -> Dict[str, Any]: - """生成最终视频的第四步""" - endpoint = f"{self.base_url}/api/v2/scripts/start-subclip" - - request_data = { - "video_clip_json": script, - "video_clip_json_path": script_path, - "video_origin_path": video_path, - "video_aspect": "16:9", - "video_language": "zh-CN", - "voice_name": voice_name, - "voice_volume": 1, - "voice_rate": 1.2, - "voice_pitch": 1, - "bgm_name": "random", - "bgm_type": "random", - "bgm_file": "", - "bgm_volume": 0.3, - "subtitle_enabled": True, - "subtitle_position": "bottom", - "font_name": "STHeitiMedium.ttc", - "text_fore_color": "#FFFFFF", - "text_background_color": "transparent", - "font_size": 75, - "stroke_color": "#000000", - "stroke_width": 1.5, - "custom_position": 70, - "n_threads": 8 - } - - payload = { - "request": request_data, - "subclip_videos": subclip_videos - } - - params = {"task_id": task_id} - response = requests.post(endpoint, params=params, json=payload) - response.raise_for_status() - return response.json() - - def save_script_to_json(self, script: list, script_path: str) -> str: - """保存脚本到json文件""" - try: - with open(script_path, 'w', encoding='utf-8') as f: - json.dump(script, f, ensure_ascii=False, indent=2) - print(f"脚本已保存到: {script_path}") - return script_path - except Exception as e: - print(f"保存脚本失败: {str(e)}") - raise - - def run_pipeline(self, task_id: str, script_name: str, youtube_url: str, video_name: str="null", skip_seconds: int = 0, threshold: int = 30, vision_batch_size: int = 10, vision_llm_provider: str = "gemini", voice_name: str = "zh-CN-YunjianNeural") -> Dict[str, Any]: - """运行完整的pipeline""" - try: - current_path = os.path.dirname(os.path.abspath(__file__)) - video_path = os.path.join(current_path, "resource", "videos", f"{video_name}.mp4") - # 判断视频是否存在 - if not os.path.exists(video_path): - # 1. 下载视频 - print(f"视频不存在, 开始下载视频: {video_path}") - download_result = self.download_video(url=youtube_url, resolution="1080p", output_format="mp4", rename=video_name) - video_path = download_result["output_path"] - else: - print(f"视频已存在: {video_path}") - - # 2. 判断script_name是否存在 - # 2.1.1 拼接脚本路径 NarratoAI/resource/scripts - script_path = os.path.join(current_path, "resource", "scripts", script_name) - if os.path.exists(script_path): - script = json.load(open(script_path, "r", encoding="utf-8")) - else: - # 2.1.2 生成脚本 - print("开始生成脚本...") - script_result = self.generate_script(video_path=video_path, skip_seconds=skip_seconds, threshold=threshold, vision_batch_size=vision_batch_size, vision_llm_provider=vision_llm_provider) - script = script_result["script"] - - # 2.2 保存脚本到json文件 - print("保存脚本到json文件...") - self.save_script_to_json(script=script, script_path=script_path) - - # 3. 剪辑视频 - print("开始剪辑视频...") - crop_result = self.crop_video(video_path=video_path, script=script) - subclip_videos = crop_result["subclip_videos"] - - # 4. 生成最终视频 - print("开始生成最终视频...") - self.generate_final_video( - task_id=task_id, - video_path=video_path, - script_path=script_path, - script=script, - subclip_videos=subclip_videos, - voice_name=voice_name - ) - - return { - "status": "等待异步生成视频", - "path": os.path.join(current_path, "storage", "tasks", task_id) - } - - except Exception as e: - return { - "status": "error", - "error": str(e) - } - - -# 使用示例 -if __name__ == "__main__": - pipeline = VideoPipeline() - result = pipeline.run_pipeline( - task_id="test_111901", - script_name="test.json", - youtube_url="https://www.youtube.com/watch?v=vLJ7Yed6FQ4", - video_name="2024-11-19-01", - skip_seconds=50, - threshold=35, - vision_batch_size=10, - vision_llm_provider="gemini", - voice_name="zh-CN-YunjianNeural", - ) - print(result) From b1f2cd37edc9be1678dd1a2337d1d492b3b48e50 Mon Sep 17 00:00:00 2001 From: linyq Date: Sun, 3 Aug 2025 20:29:07 +0800 Subject: [PATCH 09/10] =?UTF-8?q?=E6=96=B0=E5=A2=9E=20azure=20=E4=BE=9D?= =?UTF-8?q?=E8=B5=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.example.toml | 2 +- project_version | 2 +- requirements.txt | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/config.example.toml b/config.example.toml index 94a98b7..7bb37be 100644 --- a/config.example.toml +++ b/config.example.toml @@ -1,5 +1,5 @@ [app] - project_version="0.7.0" + project_version="0.6.8" # 模型验证模式配置 # true: 严格模式,只允许使用预定义支持列表中的模型(默认) diff --git a/project_version b/project_version index bcaffe1..bc8443e 100644 --- a/project_version +++ b/project_version @@ -1 +1 @@ -0.7.0 \ No newline at end of file +0.6.8 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5efa517..c9b0c3e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ pysrt==1.1.2 openai~=1.77.0 google-generativeai>=0.8.5 +azure-cognitiveservices-speech~=1.37.0 # 待优化项 # opencv-python==4.11.0.86 @@ -29,7 +30,6 @@ google-generativeai>=0.8.5 # python-multipart~=0.0.9 # redis==5.0.3 # opencv-python~=4.10.0.84 -azure-cognitiveservices-speech~=1.37.0 # git-changelog~=2.5.2 # watchdog==5.0.2 # pydub==0.25.1 From 8e933ff6291c79875fe4241f4ef9626e8c3658a8 Mon Sep 17 00:00:00 2001 From: linyqh Date: Sun, 3 Aug 2025 21:29:16 +0800 Subject: [PATCH 10/10] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=95=B4=E5=90=88?= =?UTF-8?q?=E5=8C=85bat=E5=90=AF=E5=8A=A8=E8=84=9A=E6=9C=AC=E5=92=8C?= =?UTF-8?q?=E7=8E=AF=E5=A2=83=E6=A3=80=E6=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- config.example.toml | 2 +- project_version | 2 +- start.bat | 88 ---------------------------------- update.bat | 112 -------------------------------------------- 4 files changed, 2 insertions(+), 202 deletions(-) delete mode 100644 start.bat delete mode 100644 update.bat diff --git a/config.example.toml b/config.example.toml index 7bb37be..94a98b7 100644 --- a/config.example.toml +++ b/config.example.toml @@ -1,5 +1,5 @@ [app] - project_version="0.6.8" + project_version="0.7.0" # 模型验证模式配置 # true: 严格模式,只允许使用预定义支持列表中的模型(默认) diff --git a/project_version b/project_version index bc8443e..bcaffe1 100644 --- a/project_version +++ b/project_version @@ -1 +1 @@ -0.6.8 \ No newline at end of file +0.7.0 \ No newline at end of file diff --git a/start.bat b/start.bat deleted file mode 100644 index 37a788b..0000000 --- a/start.bat +++ /dev/null @@ -1,88 +0,0 @@ -@echo off -:: 设置控制台代码页为UTF-8,解决中文显示问题 -chcp 65001 >nul -:: 关闭命令回显,使脚本运行时更整洁 - -:: 获取当前脚本所在目录路径并存储在变量中 -set "CURRENT_DIR=%~dp0" -echo ***** 当前工作目录: %CURRENT_DIR% ***** - -:: ==================== FFmpeg 配置 ==================== -:: 设置 FFmpeg 可执行文件的完整路径 -set "FFMPEG_BINARY=%CURRENT_DIR%lib\ffmpeg\ffmpeg-7.0-essentials_build\ffmpeg.exe" -set "FFMPEG_PATH=%CURRENT_DIR%lib\ffmpeg\ffmpeg-7.0-essentials_build" -echo ***** FFmpeg 执行文件路径: %FFMPEG_BINARY% ***** - -:: 将 FFmpeg 目录添加到系统 PATH 环境变量,使其可以在命令行中直接调用 -set "PATH=%FFMPEG_PATH%;%PATH%" - -:: ==================== ImageMagick 配置 ==================== -:: 设置 ImageMagick 可执行文件的完整路径(用于图像处理) -set "IMAGEMAGICK_BINARY=%CURRENT_DIR%lib\imagemagic\ImageMagick-7.1.1-29-portable-Q16-x64\magick.exe" -set "IMAGEMAGICK_PATH=%CURRENT_DIR%lib\imagemagic\ImageMagick-7.1.1-29-portable-Q16-x64" -echo ***** ImageMagick 执行文件路径: %IMAGEMAGICK_BINARY% ***** - -:: 将 ImageMagick 目录添加到系统 PATH 环境变量 -set "PATH=%IMAGEMAGICK_PATH%;%PATH%" - -:: ==================== Python 环境配置 ==================== -:: 设置 Python 模块搜索路径,确保能够正确导入项目模块 -set "PYTHONPATH=%CURRENT_DIR%NarratoAI;%PYTHONPATH%" -echo ***** Python模块搜索路径: %PYTHONPATH% ***** - -:: ==================== 项目特定环境变量配置 ==================== -:: 设置项目根目录和依赖工具的路径,供应用程序内部使用 -set "NARRATO_ROOT=%CURRENT_DIR%NarratoAI" -set "NARRATO_FFMPEG=%FFMPEG_BINARY%" -set "NARRATO_IMAGEMAGICK=%IMAGEMAGICK_BINARY%" - -:: ==================== Streamlit 配置 ==================== -:: 设置 Streamlit(Python Web应用框架)的配置文件路径 -set "USER_HOME=%USERPROFILE%" -set "STREAMLIT_DIR=%USER_HOME%\.streamlit" -set "CREDENTIAL_FILE=%STREAMLIT_DIR%\credentials.toml" -echo ***** Streamlit 凭证文件路径: %CREDENTIAL_FILE% ***** - -:: 检查并创建 Streamlit 配置目录和凭证文件(如果不存在) -if not exist "%STREAMLIT_DIR%" ( - echo 创建 Streamlit 配置目录... - mkdir "%STREAMLIT_DIR%" - ( - echo [general] - echo email="" - ) > "%CREDENTIAL_FILE%" - echo Streamlit 配置文件已创建! -) - -:: ==================== 依赖检查 ==================== -:: 验证必要的外部工具是否存在,确保应用可以正常运行 -if not exist "%FFMPEG_BINARY%" ( - echo 错误: 未找到 FFmpeg 执行文件,路径: %FFMPEG_BINARY% - echo 请确保已正确安装 FFmpeg 或检查路径配置 - pause - exit /b 1 -) - -if not exist "%IMAGEMAGICK_BINARY%" ( - echo 错误: 未找到 ImageMagick 执行文件,路径: %IMAGEMAGICK_BINARY% - echo 请确保已正确安装 ImageMagick 或检查路径配置 - pause - exit /b 1 -) - -:: ==================== 启动应用 ==================== -:: 切换到项目目录并启动应用 -echo ***** 切换工作目录到: %CURRENT_DIR%NarratoAI ***** -cd /d "%CURRENT_DIR%NarratoAI" - -echo ***** 正在启动 NarratoAI 应用... ***** -:: 使用项目自带的Python解释器启动Streamlit应用 -"%CURRENT_DIR%lib\python\python.exe" -m streamlit run webui.py --browser.serverAddress="127.0.0.1" --server.enableCORS=True --server.maxUploadSize=2048 --browser.gatherUsageStats=False -:: 参数说明: -:: --browser.serverAddress="127.0.0.1" - 将服务器绑定到本地地址 -:: --server.enableCORS=True - 启用跨域资源共享 -:: --server.maxUploadSize=2048 - 设置最大上传文件大小为2048MB -:: --browser.gatherUsageStats=False - 禁用使用统计收集 - -:: 应用关闭后暂停,让用户看到最终输出 -pause diff --git a/update.bat b/update.bat deleted file mode 100644 index 963f44c..0000000 --- a/update.bat +++ /dev/null @@ -1,112 +0,0 @@ -@echo off -chcp 65001 >nul -setlocal EnableDelayedExpansion -set "CURRENT_DIR=%~dp0" -echo ***** 当前目录: %CURRENT_DIR% ***** - -REM 清除可能影响的环境变量 -set PYTHONPATH= -set PYTHONHOME= - -REM 初始化代理设置为空 -set "HTTP_PROXY=" -set "HTTPS_PROXY=" - -:git_pull -echo 正在更新代码,请稍候... -REM 使用git更新代码并检查是否成功 -"%CURRENT_DIR%lib\git\bin\git.exe" -C "%CURRENT_DIR%NarratoAI" pull > "%TEMP%\git_output.txt" 2>&1 -set GIT_EXIT_CODE=%ERRORLEVEL% - -if %GIT_EXIT_CODE% NEQ 0 ( - echo [错误] 代码更新失败!错误代码: %GIT_EXIT_CODE% - type "%TEMP%\git_output.txt" - - findstr /C:"error: 403" /C:"fatal: unable to access" /C:"The requested URL returned error: 403" "%TEMP%\git_output.txt" >nul - if !ERRORLEVEL! EQU 0 ( - echo. - echo [提示] 检测到 GitHub 403 错误,可能是由于网络问题导致。 - - if not defined HTTP_PROXY ( - echo. - echo 请输入代理地址(例如 http://127.0.0.1:7890),或直接按回车跳过: - set /p PROXY_INPUT="> " - - if not "!PROXY_INPUT!"=="" ( - set "HTTP_PROXY=!PROXY_INPUT!" - set "HTTPS_PROXY=!PROXY_INPUT!" - echo. - echo [信息] 已设置代理: !PROXY_INPUT! - echo 正在使用代理重试... - goto git_pull - ) else ( - echo. - echo [警告] 未设置代理,建议: - echo - 手动设置系统代理 - echo - 使用VPN或其他网络工具 - echo - 重新运行此脚本并输入代理地址 - ) - ) else ( - echo. - echo [警告] 使用代理 !HTTP_PROXY! 仍然失败。 - echo 您可以: - echo 1. 输入新的代理地址(或直接按回车使用当前代理: !HTTP_PROXY!) - echo 2. 输入 "clear" 清除代理设置 - set /p PROXY_INPUT="> " - - if "!PROXY_INPUT!"=="clear" ( - set "HTTP_PROXY=" - set "HTTPS_PROXY=" - echo [信息] 已清除代理设置 - goto end - ) else if not "!PROXY_INPUT!"=="" ( - set "HTTP_PROXY=!PROXY_INPUT!" - set "HTTPS_PROXY=!PROXY_INPUT!" - echo [信息] 已更新代理为: !PROXY_INPUT! - echo 正在使用新代理重试... - goto git_pull - ) else ( - echo [信息] 保持当前代理: !HTTP_PROXY! - echo 您可以稍后再次尝试或手动解决网络问题 - ) - ) - ) else ( - echo. - echo [警告] 遇到其他错误,请检查输出信息以获取更多详情。 - ) - goto end -) else ( - echo [成功] 代码已成功更新! -) - -echo 正在更新pip,请稍候... -"%CURRENT_DIR%lib\python\python.exe" -m pip install --upgrade pip >nul 2>&1 -if %ERRORLEVEL% NEQ 0 ( - echo [警告] pip更新失败,将继续使用当前版本。 -) else ( - echo [成功] pip已更新至最新版本! -) - -echo 正在安装依赖,请稍候... -REM 确保使用正确的Python和pip -"%CURRENT_DIR%lib\python\python.exe" -m pip install -q -r "%CURRENT_DIR%NarratoAI\requirements.txt" -if %ERRORLEVEL% NEQ 0 ( - echo [错误] 依赖安装失败!请检查requirements.txt文件是否存在。 - goto end -) else ( - echo [成功] 依赖安装完成! -) - -echo =================================== -echo ✓ 程序更新已完成 -echo =================================== - -:end -if exist "%TEMP%\git_output.txt" del "%TEMP%\git_output.txt" -REM 清除设置的代理环境变量 -if defined HTTP_PROXY ( - echo [信息] 本次会话的代理设置已清除 - set "HTTP_PROXY=" - set "HTTPS_PROXY=" -) -pause \ No newline at end of file