From 852f5ae34cbfdf138569e152553d0d6a77095692 Mon Sep 17 00:00:00 2001 From: aw123456dew <1444236498@qq.com> Date: Tue, 7 Apr 2026 17:08:37 +0800 Subject: [PATCH] fix: jianying draft export failure due to floating-point precision in audio duration --- app/services/jianying_task.py | 44 +++++++++++++++++++-- webui.py | 73 ++++++++++++++++++++++------------- 2 files changed, 87 insertions(+), 30 deletions(-) diff --git a/app/services/jianying_task.py b/app/services/jianying_task.py index 52a32a6..25b4a74 100644 --- a/app/services/jianying_task.py +++ b/app/services/jianying_task.py @@ -1,5 +1,6 @@ import json import os +import subprocess import time from os import path from loguru import logger @@ -12,6 +13,36 @@ from app.services import state as sm from app.utils import utils +def get_audio_duration_ffprobe(audio_file: str) -> float: + """ + 使用ffprobe获取音频文件的精确时长(秒) + + Args: + audio_file: 音频文件路径 + + Returns: + float: 音频时长(秒),精确到微秒 + """ + try: + cmd = [ + 'ffprobe', + '-v', 'error', + '-show_entries', 'format=duration', + '-of', 'csv=p=0', + audio_file + ] + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + duration = float(result.stdout.strip()) + logger.debug(f"使用ffprobe获取音频时长: {duration:.6f}秒") + return duration + except subprocess.CalledProcessError as e: + logger.error(f"ffprobe执行失败: {e.stderr}") + raise + except Exception as e: + logger.error(f"获取音频时长失败: {str(e)}") + raise + + def start_export_jianying_draft(task_id: str, params: VideoClipParams): """ 导出到剪映草稿的后台任务 @@ -166,11 +197,18 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams): # 处理音频 if item['OST'] in [0, 2]: # 需要TTS的片段 if os.path.exists(audio_file): - # 添加TTS音频片段 - # 对于音频片段,target_timerange的第二个参数是持续时间 + # 使用ffprobe获取精确的音频时长,避免因TTS引擎差异导致时长不匹配 + actual_audio_duration = get_audio_duration_ffprobe(audio_file) + logger.info(f"音频文件实际时长: {actual_audio_duration:.6f}秒, 脚本时长(视频): {duration:.3f}秒") + + # 使用音频实际时长和视频时长中的较小值,确保不超过素材时长 + # 当TTS语速调整时,音频可能比视频长或短,取较小值可以避免超出素材 + safe_duration = min(actual_audio_duration, duration) + logger.info(f"使用时长: {safe_duration:.6f}秒 (取音频和视频时长的较小值)") + audio_segment = AudioSegment( audio_file, - trange(f"{current_time}s", f"{duration}s") + trange(f"{current_time}s", f"{safe_duration}s") ) script.add_segment(audio_segment, '音频轨道') else: diff --git a/webui.py b/webui.py index 6a58e7c..d2ab42b 100644 --- a/webui.py +++ b/webui.py @@ -1,6 +1,7 @@ import streamlit as st import os import sys +import time from loguru import logger from app.config import config from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \ @@ -221,6 +222,45 @@ def render_generate_button(): time.sleep(0.5) +def get_voice_name_for_tts_engine(tts_engine: str) -> str: + """根据TTS引擎获取用户选择的音色""" + if tts_engine == 'doubaotts': + return st.session_state.get('voice_name', config.ui.get('doubaotts_voice_type', 'BV700_streaming')) + elif tts_engine == 'azure_speech': + return st.session_state.get('voice_name', config.ui.get('azure_voice_name', 'zh-CN-XiaoxiaoMultilingualNeural')) + else: + return st.session_state.get('voice_name', config.ui.get('edge_voice_name', 'zh-CN-XiaoxiaoNeural-Female')) + + +def get_jianying_export_params() -> VideoClipParams: + """获取导出到剪映草稿的参数""" + tts_engine = st.session_state.get('tts_engine', 'azure') + voice_name = get_voice_name_for_tts_engine(tts_engine) + voice_rate = st.session_state.get('voice_rate', 1.0) + voice_pitch = st.session_state.get('voice_pitch', 1.0) + + return VideoClipParams( + video_clip_json_path=st.session_state['video_clip_json_path'], + video_origin_path=st.session_state['video_origin_path'], + tts_engine=tts_engine, + voice_name=voice_name, + voice_rate=voice_rate, + voice_pitch=voice_pitch, + n_threads=config.app.get('n_threads', 4), + video_aspect=VideoAspect.landscape, + subtitle_enabled=st.session_state.get('subtitle_enabled', False), + font_name=st.session_state.get('font_name', 'Microsoft YaHei'), + font_size=st.session_state.get('font_size', 24), + text_fore_color=st.session_state.get('text_fore_color', '#FFFFFF'), + subtitle_position=st.session_state.get('subtitle_position', 'bottom'), + custom_position=st.session_state.get('custom_position', 70.0), + tts_volume=st.session_state.get('tts_volume', 1.0), + original_volume=st.session_state.get('original_volume', 0.7), + bgm_volume=st.session_state.get('bgm_volume', 0.3), + draft_name=st.session_state.get('draft_name_input', f"NarratoAI_{int(time.time())}") + ) + + def render_export_jianying_button(): """渲染导出到剪映草稿按钮和处理逻辑""" import os @@ -276,38 +316,17 @@ def render_export_jianying_button(): st.error("请输入草稿名称") return - # 获取音频设置 - tts_engine = st.session_state.get('tts_engine', 'azure') - voice_name = st.session_state.get('voice_name', 'zh-CN-YunjianNeural') - voice_rate = st.session_state.get('voice_rate', 1.0) - voice_pitch = st.session_state.get('voice_pitch', 1.0) - # 创建任务ID task_id = str(uuid.uuid4()) st.session_state['task_id'] = task_id # 构建参数 - logger.debug(f"准备创建VideoClipParams,草稿名称: '{draft_name}'") - params = VideoClipParams( - video_clip_json_path=st.session_state['video_clip_json_path'], - video_origin_path=st.session_state['video_origin_path'], - tts_engine=tts_engine, - voice_name=voice_name, - voice_rate=voice_rate, - voice_pitch=voice_pitch, - n_threads=config.app.get('n_threads', 4), - video_aspect=VideoAspect.landscape, - subtitle_enabled=st.session_state.get('subtitle_enabled', False), - font_name=st.session_state.get('font_name', 'Microsoft YaHei'), - font_size=st.session_state.get('font_size', 24), - text_fore_color=st.session_state.get('text_fore_color', '#FFFFFF'), - subtitle_position=st.session_state.get('subtitle_position', 'bottom'), - custom_position=st.session_state.get('custom_position', 70.0), - tts_volume=st.session_state.get('tts_volume', 1.0), - original_volume=st.session_state.get('original_volume', 0.7), - bgm_volume=st.session_state.get('bgm_volume', 0.3), - draft_name=draft_name - ) + try: + params = get_jianying_export_params() + except Exception as e: + logger.error(f"构建参数失败: {e}") + st.error(f"参数构建失败: {e}") + return with st.spinner("正在导出到剪映草稿,请稍候..."): try: