fix: jianying draft export failure due to floating-point precision in audio duration

This commit is contained in:
aw123456dew 2026-04-07 17:08:37 +08:00
parent d45c1858c9
commit 852f5ae34c
2 changed files with 87 additions and 30 deletions

View File

@ -1,5 +1,6 @@
import json
import os
import subprocess
import time
from os import path
from loguru import logger
@ -12,6 +13,36 @@ from app.services import state as sm
from app.utils import utils
def get_audio_duration_ffprobe(audio_file: str) -> float:
"""
使用ffprobe获取音频文件的精确时长
Args:
audio_file: 音频文件路径
Returns:
float: 音频时长精确到微秒
"""
try:
cmd = [
'ffprobe',
'-v', 'error',
'-show_entries', 'format=duration',
'-of', 'csv=p=0',
audio_file
]
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
duration = float(result.stdout.strip())
logger.debug(f"使用ffprobe获取音频时长: {duration:.6f}")
return duration
except subprocess.CalledProcessError as e:
logger.error(f"ffprobe执行失败: {e.stderr}")
raise
except Exception as e:
logger.error(f"获取音频时长失败: {str(e)}")
raise
def start_export_jianying_draft(task_id: str, params: VideoClipParams):
"""
导出到剪映草稿的后台任务
@ -166,11 +197,18 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
# 处理音频
if item['OST'] in [0, 2]: # 需要TTS的片段
if os.path.exists(audio_file):
# 添加TTS音频片段
# 对于音频片段target_timerange的第二个参数是持续时间
# 使用ffprobe获取精确的音频时长避免因TTS引擎差异导致时长不匹配
actual_audio_duration = get_audio_duration_ffprobe(audio_file)
logger.info(f"音频文件实际时长: {actual_audio_duration:.6f}秒, 脚本时长(视频): {duration:.3f}")
# 使用音频实际时长和视频时长中的较小值,确保不超过素材时长
# 当TTS语速调整时音频可能比视频长或短取较小值可以避免超出素材
safe_duration = min(actual_audio_duration, duration)
logger.info(f"使用时长: {safe_duration:.6f}秒 (取音频和视频时长的较小值)")
audio_segment = AudioSegment(
audio_file,
trange(f"{current_time}s", f"{duration}s")
trange(f"{current_time}s", f"{safe_duration}s")
)
script.add_segment(audio_segment, '音频轨道')
else:

View File

@ -1,6 +1,7 @@
import streamlit as st
import os
import sys
import time
from loguru import logger
from app.config import config
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
@ -221,6 +222,45 @@ def render_generate_button():
time.sleep(0.5)
def get_voice_name_for_tts_engine(tts_engine: str) -> str:
"""根据TTS引擎获取用户选择的音色"""
if tts_engine == 'doubaotts':
return st.session_state.get('voice_name', config.ui.get('doubaotts_voice_type', 'BV700_streaming'))
elif tts_engine == 'azure_speech':
return st.session_state.get('voice_name', config.ui.get('azure_voice_name', 'zh-CN-XiaoxiaoMultilingualNeural'))
else:
return st.session_state.get('voice_name', config.ui.get('edge_voice_name', 'zh-CN-XiaoxiaoNeural-Female'))
def get_jianying_export_params() -> VideoClipParams:
"""获取导出到剪映草稿的参数"""
tts_engine = st.session_state.get('tts_engine', 'azure')
voice_name = get_voice_name_for_tts_engine(tts_engine)
voice_rate = st.session_state.get('voice_rate', 1.0)
voice_pitch = st.session_state.get('voice_pitch', 1.0)
return VideoClipParams(
video_clip_json_path=st.session_state['video_clip_json_path'],
video_origin_path=st.session_state['video_origin_path'],
tts_engine=tts_engine,
voice_name=voice_name,
voice_rate=voice_rate,
voice_pitch=voice_pitch,
n_threads=config.app.get('n_threads', 4),
video_aspect=VideoAspect.landscape,
subtitle_enabled=st.session_state.get('subtitle_enabled', False),
font_name=st.session_state.get('font_name', 'Microsoft YaHei'),
font_size=st.session_state.get('font_size', 24),
text_fore_color=st.session_state.get('text_fore_color', '#FFFFFF'),
subtitle_position=st.session_state.get('subtitle_position', 'bottom'),
custom_position=st.session_state.get('custom_position', 70.0),
tts_volume=st.session_state.get('tts_volume', 1.0),
original_volume=st.session_state.get('original_volume', 0.7),
bgm_volume=st.session_state.get('bgm_volume', 0.3),
draft_name=st.session_state.get('draft_name_input', f"NarratoAI_{int(time.time())}")
)
def render_export_jianying_button():
"""渲染导出到剪映草稿按钮和处理逻辑"""
import os
@ -276,38 +316,17 @@ def render_export_jianying_button():
st.error("请输入草稿名称")
return
# 获取音频设置
tts_engine = st.session_state.get('tts_engine', 'azure')
voice_name = st.session_state.get('voice_name', 'zh-CN-YunjianNeural')
voice_rate = st.session_state.get('voice_rate', 1.0)
voice_pitch = st.session_state.get('voice_pitch', 1.0)
# 创建任务ID
task_id = str(uuid.uuid4())
st.session_state['task_id'] = task_id
# 构建参数
logger.debug(f"准备创建VideoClipParams草稿名称: '{draft_name}'")
params = VideoClipParams(
video_clip_json_path=st.session_state['video_clip_json_path'],
video_origin_path=st.session_state['video_origin_path'],
tts_engine=tts_engine,
voice_name=voice_name,
voice_rate=voice_rate,
voice_pitch=voice_pitch,
n_threads=config.app.get('n_threads', 4),
video_aspect=VideoAspect.landscape,
subtitle_enabled=st.session_state.get('subtitle_enabled', False),
font_name=st.session_state.get('font_name', 'Microsoft YaHei'),
font_size=st.session_state.get('font_size', 24),
text_fore_color=st.session_state.get('text_fore_color', '#FFFFFF'),
subtitle_position=st.session_state.get('subtitle_position', 'bottom'),
custom_position=st.session_state.get('custom_position', 70.0),
tts_volume=st.session_state.get('tts_volume', 1.0),
original_volume=st.session_state.get('original_volume', 0.7),
bgm_volume=st.session_state.get('bgm_volume', 0.3),
draft_name=draft_name
)
try:
params = get_jianying_export_params()
except Exception as e:
logger.error(f"构建参数失败: {e}")
st.error(f"参数构建失败: {e}")
return
with st.spinner("正在导出到剪映草稿,请稍候..."):
try: