From 852f5ae34cbfdf138569e152553d0d6a77095692 Mon Sep 17 00:00:00 2001
From: aw123456dew <1444236498@qq.com>
Date: Tue, 7 Apr 2026 17:08:37 +0800
Subject: [PATCH] fix: jianying draft export failure due to floating-point
 precision in audio duration

---
 app/services/jianying_task.py | 44 +++++++++++++++++++--
 webui.py                      | 73 ++++++++++++++++++++++-------------
 2 files changed, 87 insertions(+), 30 deletions(-)

diff --git a/app/services/jianying_task.py b/app/services/jianying_task.py
index 52a32a6..25b4a74 100644
--- a/app/services/jianying_task.py
+++ b/app/services/jianying_task.py
@@ -1,5 +1,6 @@
 import json
 import os
+import subprocess
 import time
 from os import path
 from loguru import logger
@@ -12,6 +13,36 @@ from app.services import state as sm
 from app.utils import utils
 
 
+def get_audio_duration_ffprobe(audio_file: str) -> float:
+    """
+    使用ffprobe获取音频文件的精确时长（秒）
+    
+    Args:
+        audio_file: 音频文件路径
+        
+    Returns:
+        float: 音频时长（秒），精确到微秒
+    """
+    try:
+        cmd = [
+            'ffprobe',
+            '-v', 'error',
+            '-show_entries', 'format=duration',
+            '-of', 'csv=p=0',
+            audio_file
+        ]
+        result = subprocess.run(cmd, capture_output=True, text=True, check=True)
+        duration = float(result.stdout.strip())
+        logger.debug(f"使用ffprobe获取音频时长: {duration:.6f}秒")
+        return duration
+    except subprocess.CalledProcessError as e:
+        logger.error(f"ffprobe执行失败: {e.stderr}")
+        raise
+    except Exception as e:
+        logger.error(f"获取音频时长失败: {str(e)}")
+        raise
+
+
 def start_export_jianying_draft(task_id: str, params: VideoClipParams):
     """
     导出到剪映草稿的后台任务
@@ -166,11 +197,18 @@ def start_export_jianying_draft(task_id: str, params: VideoClipParams):
             # 处理音频
             if item['OST'] in [0, 2]:  # 需要TTS的片段
                 if os.path.exists(audio_file):
-                    # 添加TTS音频片段
-                    # 对于音频片段，target_timerange的第二个参数是持续时间
+                    # 使用ffprobe获取精确的音频时长，避免因TTS引擎差异导致时长不匹配
+                    actual_audio_duration = get_audio_duration_ffprobe(audio_file)
+                    logger.info(f"音频文件实际时长: {actual_audio_duration:.6f}秒, 脚本时长(视频): {duration:.3f}秒")
+                    
+                    # 使用音频实际时长和视频时长中的较小值，确保不超过素材时长
+                    # 当TTS语速调整时，音频可能比视频长或短，取较小值可以避免超出素材
+                    safe_duration = min(actual_audio_duration, duration)
+                    logger.info(f"使用时长: {safe_duration:.6f}秒 (取音频和视频时长的较小值)")
+                    
                     audio_segment = AudioSegment(
                         audio_file,
-                        trange(f"{current_time}s", f"{duration}s")
+                        trange(f"{current_time}s", f"{safe_duration}s")
                     )
                     script.add_segment(audio_segment, '音频轨道')
                 else:
diff --git a/webui.py b/webui.py
index 6a58e7c..d2ab42b 100644
--- a/webui.py
+++ b/webui.py
@@ -1,6 +1,7 @@
 import streamlit as st
 import os
 import sys
+import time
 from loguru import logger
 from app.config import config
 from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
@@ -221,6 +222,45 @@ def render_generate_button():
             time.sleep(0.5)
 
 
+def get_voice_name_for_tts_engine(tts_engine: str) -> str:
+    """根据TTS引擎获取用户选择的音色"""
+    if tts_engine == 'doubaotts':
+        return st.session_state.get('voice_name', config.ui.get('doubaotts_voice_type', 'BV700_streaming'))
+    elif tts_engine == 'azure_speech':
+        return st.session_state.get('voice_name', config.ui.get('azure_voice_name', 'zh-CN-XiaoxiaoMultilingualNeural'))
+    else:
+        return st.session_state.get('voice_name', config.ui.get('edge_voice_name', 'zh-CN-XiaoxiaoNeural-Female'))
+
+
+def get_jianying_export_params() -> VideoClipParams:
+    """获取导出到剪映草稿的参数"""
+    tts_engine = st.session_state.get('tts_engine', 'azure')
+    voice_name = get_voice_name_for_tts_engine(tts_engine)
+    voice_rate = st.session_state.get('voice_rate', 1.0)
+    voice_pitch = st.session_state.get('voice_pitch', 1.0)
+    
+    return VideoClipParams(
+        video_clip_json_path=st.session_state['video_clip_json_path'],
+        video_origin_path=st.session_state['video_origin_path'],
+        tts_engine=tts_engine,
+        voice_name=voice_name,
+        voice_rate=voice_rate,
+        voice_pitch=voice_pitch,
+        n_threads=config.app.get('n_threads', 4),
+        video_aspect=VideoAspect.landscape,
+        subtitle_enabled=st.session_state.get('subtitle_enabled', False),
+        font_name=st.session_state.get('font_name', 'Microsoft YaHei'),
+        font_size=st.session_state.get('font_size', 24),
+        text_fore_color=st.session_state.get('text_fore_color', '#FFFFFF'),
+        subtitle_position=st.session_state.get('subtitle_position', 'bottom'),
+        custom_position=st.session_state.get('custom_position', 70.0),
+        tts_volume=st.session_state.get('tts_volume', 1.0),
+        original_volume=st.session_state.get('original_volume', 0.7),
+        bgm_volume=st.session_state.get('bgm_volume', 0.3),
+        draft_name=st.session_state.get('draft_name_input', f"NarratoAI_{int(time.time())}")
+    )
+
+
 def render_export_jianying_button():
     """渲染导出到剪映草稿按钮和处理逻辑"""
     import os
@@ -276,38 +316,17 @@ def render_export_jianying_button():
                 st.error("请输入草稿名称")
                 return
             
-            # 获取音频设置
-            tts_engine = st.session_state.get('tts_engine', 'azure')
-            voice_name = st.session_state.get('voice_name', 'zh-CN-YunjianNeural')
-            voice_rate = st.session_state.get('voice_rate', 1.0)
-            voice_pitch = st.session_state.get('voice_pitch', 1.0)
-            
             # 创建任务ID
             task_id = str(uuid.uuid4())
             st.session_state['task_id'] = task_id
             
             # 构建参数
-            logger.debug(f"准备创建VideoClipParams，草稿名称: '{draft_name}'")
-            params = VideoClipParams(
-                video_clip_json_path=st.session_state['video_clip_json_path'],
-                video_origin_path=st.session_state['video_origin_path'],
-                tts_engine=tts_engine,
-                voice_name=voice_name,
-                voice_rate=voice_rate,
-                voice_pitch=voice_pitch,
-                n_threads=config.app.get('n_threads', 4),
-                video_aspect=VideoAspect.landscape,
-                subtitle_enabled=st.session_state.get('subtitle_enabled', False),
-                font_name=st.session_state.get('font_name', 'Microsoft YaHei'),
-                font_size=st.session_state.get('font_size', 24),
-                text_fore_color=st.session_state.get('text_fore_color', '#FFFFFF'),
-                subtitle_position=st.session_state.get('subtitle_position', 'bottom'),
-                custom_position=st.session_state.get('custom_position', 70.0),
-                tts_volume=st.session_state.get('tts_volume', 1.0),
-                original_volume=st.session_state.get('original_volume', 0.7),
-                bgm_volume=st.session_state.get('bgm_volume', 0.3),
-                draft_name=draft_name
-            )
+            try:
+                params = get_jianying_export_params()
+            except Exception as e:
+                logger.error(f"构建参数失败: {e}")
+                st.error(f"参数构建失败: {e}")
+                return
             
             with st.spinner("正在导出到剪映草稿，请稍候..."):
                 try: