feat(audio): 改进音频合并功能，支持 OST 设置，提升时间戳精度

-重构了 merge_audio_files 函数，增加了对 OST 设置的支持 - 新增 time_to_seconds 函数，支持多种时间格式的转换 - 修改了 audio_merger 模块的逻辑，根据 OST 设置处理音频 - 更新了 task 模块中的 start_subclip 函数，传入 OST 信息 - 优化了 subtitle 和 video 模块的逻辑，适应新的音频处理方式
2025-12-12 03:02:48 +00:00 · 2024-11-20 18:12:45 +08:00 · 2024-11-20 18:12:45 +08:00 · 401eb92fa3
commit 401eb92fa3
parent c03a13db13
10 changed files with 566 additions and 412 deletions
--- a/app/models/schema.py
+++ b/app/models/schema.py
@ -366,6 +366,8 @@ class VideoClipParams(BaseModel):
    custom_position: float = Field(default=70.0, description="自定义位置")
    n_threads: Optional[int] = 8    # 线程数，有助于提升视频处理速度
    tts_volume: float = 1.0  # TTS音频音量
    video_volume: float = 0.1  # 视频原声音量
 class VideoTranscriptionRequest(BaseModel):
    video_name: str
--- a/app/services/audio_merger.py
+++ b/app/services/audio_merger.py
@ -18,95 +18,119 @@ def check_ffmpeg():
        return False
-def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int, video_script: list):
+def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list):
    """
-    合并多个音频文件到一个指定总时长的音频文件中，并生成相应的字幕
+    合并音频文件，根据OST设置处理不同的音频轨道
-    :param task_id: 任务ID
+    
-    :param audio_file_paths: 音频文件路径列表
+    Args:
-    :param total_duration: 最终音频文件的总时长（秒）
+        task_id: 任务ID
-    :param video_script: JSON格式的视频脚本
+        audio_files: TTS生成的音频文件列表
        total_duration: 总时长
        list_script: 完整脚本信息，包含OST设置
    Returns:
        str: 合并后的音频文件路径
    """
-    output_dir = utils.task_dir(task_id)
+    # 检查FFmpeg是否安装
    if not check_ffmpeg():
-        logger.error("错误：FFmpeg未安装。请安装FFmpeg后再运行此脚本。")
+        logger.error("FFmpeg未安装，无法合并音频文件")
-        return None, None
+        return None
-    # 创建一个总时长为total_duration的空白音频
+    # 创建一个空的音频片段
-    blank_audio = AudioSegment.silent(duration=total_duration * 1000)  # pydub使用毫秒
+    final_audio = AudioSegment.silent(duration=total_duration * 1000)  # 总时长以毫秒为单位
-    for audio_path in audio_file_paths:
+    # 遍历脚本中的每个片段
-        if not os.path.exists(audio_path):
+    for segment, audio_file in zip(list_script, audio_files):
-            logger.info(f"警告：文件 {audio_path} 不存在，已跳过。")
+        try:
            # 加载TTS音频文件
            tts_audio = AudioSegment.from_file(audio_file)
            # 获取片段的开始和结束时间
            start_time, end_time = segment['new_timestamp'].split('-')
            start_seconds = utils.time_to_seconds(start_time)
            end_seconds = utils.time_to_seconds(end_time)
            # 根据OST设置处理音频
            if segment['OST'] == 0:
                # 只使用TTS音频
                final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000)
            elif segment['OST'] == 1:
                # 只使用原声（假设原声已经在视频中）
                continue
            elif segment['OST'] == 2:
                # 混合TTS音频和原声
                original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000)
                mixed_audio = original_audio.overlay(tts_audio)
                final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000)
        except Exception as e:
            logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}")
            continue
-        # 从文件名中提取时间戳
+    # 保存合并后的音频文件
-        filename = os.path.basename(audio_path)
+    output_audio_path = os.path.join(utils.task_dir(task_id), "final_audio.mp3")
-        start_time, end_time = extract_timestamp(filename)
+    final_audio.export(output_audio_path, format="mp3")
    logger.info(f"合并后的音频文件已保存: {output_audio_path}")
-        # 读取音频文件
+    return output_audio_path
        try:
            audio = AudioSegment.from_mp3(audio_path)
        except Exception as e:
            logger.error(f"错误：无法读取文件 {audio_path}。错误信息：{str(e)}")
            continue
        # 将音频插入到空白音频的指定位置
        blank_audio = blank_audio.overlay(audio, position=start_time * 1000)
    # 尝试导出为WAV格式
    try:
        output_file = os.path.join(output_dir, "audio.wav")
        blank_audio.export(output_file, format="wav")
        logger.info(f"音频合并完成，已保存为 {output_file}")
    except Exception as e:
        logger.info(f"导出为WAV格式失败，尝试使用MP3格式：{str(e)}")
        try:
            output_file = os.path.join(output_dir, "audio.mp3")
            blank_audio.export(output_file, format="mp3", codec="libmp3lame")
            logger.info(f"音频合并完成，已保存为 {output_file}")
        except Exception as e:
            logger.error(f"导出音频失败：{str(e)}")
            return None, None
    return output_file
 def parse_timestamp(timestamp: str):
    """解析时间戳字符串为秒数"""
    # 确保使用冒号作为分隔符
    timestamp = timestamp.replace('_', ':')
    return time_to_seconds(timestamp)
 def extract_timestamp(filename):
    """从文件名中提取开始和结束时间戳"""
    # 从 "audio_00_06-00_24.mp3" 这样的格式中提取时间
    time_part = filename.split('_', 1)[1].split('.')[0]  # 获取 "00_06-00_24" 部分
    start_time, end_time = time_part.split('-')  # 分割成 "00_06" 和 "00_24"
    # 将下划线格式转换回冒号格式
    start_time = start_time.replace('_', ':')
    end_time = end_time.replace('_', ':')
    # 将时间戳转换为秒
    start_seconds = time_to_seconds(start_time)
    end_seconds = time_to_seconds(end_time)
    return start_seconds, end_seconds
 def time_to_seconds(time_str):
-    """将 "00:06" 或 "00_06" 格式转换为总秒数"""
+    """
-    # 确保使用冒号作为分隔符
+    将时间字符串转换为秒数，支持多种格式：
-    time_str = time_str.replace('_', ':')
+    1. 'HH:MM:SS,mmm' (时:分:秒,毫秒)
    2. 'MM:SS,mmm' (分:秒,毫秒)
    3. 'SS,mmm' (秒,毫秒)
    """
    try:
-        parts = time_str.split(':')
+        # 处理毫秒部分
-        if len(parts) != 2:
+        if ',' in time_str:
-            logger.error(f"Invalid time format: {time_str}")
+            time_part, ms_part = time_str.split(',')
-            return 0
+            ms = float(ms_part) / 1000
-        return int(parts[0]) * 60 + int(parts[1])
+        else:
            time_part = time_str
            ms = 0
        # 分割时间部分
        parts = time_part.split(':')
        if len(parts) == 3:  # HH:MM:SS
            h, m, s = map(int, parts)
            seconds = h * 3600 + m * 60 + s
        elif len(parts) == 2:  # MM:SS
            m, s = map(int, parts)
            seconds = m * 60 + s
        else:  # SS
            seconds = int(parts[0])
        return seconds + ms
    except (ValueError, IndexError) as e:
        logger.error(f"Error parsing time {time_str}: {str(e)}")
-        return 0
+        return 0.0
 def extract_timestamp(filename):
    """
    从文件名中提取开始和结束时间戳
    例如: "audio_00_06,500-00_24,800.mp3" -> (6.5, 24.8)
    """
    try:
        # 从文件名中提取时间部分
        time_part = filename.split('_', 1)[1].split('.')[0]  # 获取 "00_06,500-00_24,800" 部分
        start_time, end_time = time_part.split('-')  # 分割成开始和结束时间
        # 将下划线格式转换回冒号格式
        start_time = start_time.replace('_', ':')
        end_time = end_time.replace('_', ':')
        # 将时间戳转换为秒
        start_seconds = time_to_seconds(start_time)
        end_seconds = time_to_seconds(end_time)
        return start_seconds, end_seconds
    except Exception as e:
        logger.error(f"Error extracting timestamp from {filename}: {str(e)}")
        return 0.0, 0.0
 if __name__ == "__main__":
--- a/app/services/material.py
+++ b/app/services/material.py
@ -3,6 +3,7 @@ import subprocess
 import random
 import traceback
 from urllib.parse import urlencode
 from datetime import datetime
 import requests
 from typing import List
@ -253,34 +254,58 @@ def download_videos(
 def time_to_seconds(time_str: str) -> float:
    """
-    将时间字符串转换为秒数
+    将时间字符串转换为秒数，支持多种格式：
-    支持格式：
+    1. 'HH:MM:SS,mmm' (时:分:秒,毫秒)
-    1. "MM:SS" (分:秒)
+    2. 'MM:SS' (分:秒)
-    2. "SS" (纯秒数)
+    3. 'SS' (秒)
    """
-    parts = time_str.split(':')
+    try:
-    if len(parts) == 2:
+        # 处理毫秒部分
-        minutes, seconds = map(float, parts)
+        if ',' in time_str:
-        return minutes * 60 + seconds
+            time_part, ms_part = time_str.split(',')
-    return float(time_str)
+            ms = int(ms_part) / 1000
        else:
            time_part = time_str
            ms = 0
        # 根据格式分别处理
        parts = time_part.split(':')
        if len(parts) == 3:  # HH:MM:SS
            time_obj = datetime.strptime(time_part, "%H:%M:%S")
            seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second
        elif len(parts) == 2:  # MM:SS
            time_obj = datetime.strptime(time_part, "%M:%S")
            seconds = time_obj.minute * 60 + time_obj.second
        else:  # SS
            seconds = float(time_part)
        return seconds + ms
    except ValueError as e:
        logger.error(f"时间格式错误: {time_str}")
        raise ValueError(f"时间格式错误，支持的格式：HH:MM:SS,mmm 或 MM:SS 或 SS") from e
 def format_timestamp(seconds: float) -> str:
    """
-    将秒数转换为 "MM:SS" 格式的时间字符串
+    将秒数转换为可读的时间格式 (HH:MM:SS,mmm)
    """
-    minutes = int(seconds) // 60
+    hours = int(seconds // 3600)
-    secs = int(seconds) % 60
+    minutes = int((seconds % 3600) // 60)
-    return f"{minutes:02d}:{secs:02d}"
+    seconds_remain = seconds % 60
    whole_seconds = int(seconds_remain)
    milliseconds = int((seconds_remain - whole_seconds) * 1000)
    return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
 def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict:
    """
    保存剪辑后的视频
    Args:
-        timestamp: 需要裁剪的单个时间戳，支持两种格式：
+        timestamp: 需要裁剪的单个时间戳，支持格式：
-                  1. '00:36-00:40' (分:秒-分:秒)
+                  1. 'HH:MM:SS,mmm-HH:MM:SS,mmm' (时:分:秒,毫秒)
-                  2. 'SS-SS' (秒-秒)
+                  2. 'MM:SS-MM:SS' (分:秒-分:秒)
                  3. 'SS-SS' (秒-秒)
        origin_video: 原视频路径
        save_dir: 存储目录
@ -293,7 +318,7 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
-    video_id = f"vid-{timestamp.replace(':', '_')}"
+    video_id = f"vid-{timestamp.replace(':', '_').replace(',', '-')}"
    video_path = f"{save_dir}/{video_id}.mp4"
    if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
@ -312,12 +337,12 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
        # 验证时间段是否有效
        if start >= total_duration:
-            logger.warning(f"起始时间 {format_timestamp(start)} ({start:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)")
+            logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)")
            video.close()
            return {}
        if end > total_duration:
-            logger.warning(f"结束时间 {format_timestamp(end)} ({end:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)，将自动调整为视频结尾")
+            logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)，将自动调整为视频结尾")
            end = total_duration
        if end <= start:
@ -332,7 +357,15 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
        try:
            # 检查视频是否有音频轨道并写入文件
-            subclip.write_videofile(video_path, audio=(subclip.audio is not None), logger=None)
+            subclip.write_videofile(
                video_path,
                codec='libx264',
                audio_codec='aac',
                temp_audiofile='temp-audio.m4a',
                remove_temp=True,
                audio=(subclip.audio is not None),
                logger=None
            )
            # 验证生成的视频文件
            if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
--- a/app/services/task.py
+++ b/app/services/task.py
@ -206,134 +206,14 @@ def generate_final_videos(
    return final_video_paths, combined_video_paths
 def start(task_id, params: VideoParams, stop_at: str = "video"):
    logger.info(f"start task: {task_id}, stop_at: {stop_at}")
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
    if type(params.video_concat_mode) is str:
        params.video_concat_mode = VideoConcatMode(params.video_concat_mode)
    # 1. Generate script
    video_script = generate_script(task_id, params)
    if not video_script:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=10)
    if stop_at == "script":
        sm.state.update_task(
            task_id, state=const.TASK_STATE_COMPLETE, progress=100, script=video_script
        )
        return {"script": video_script}
    # 2. Generate terms
    video_terms = ""
    if params.video_source != "local":
        video_terms = generate_terms(task_id, params, video_script)
        if not video_terms:
            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
            return
    save_script_data(task_id, video_script, video_terms, params)
    if stop_at == "terms":
        sm.state.update_task(
            task_id, state=const.TASK_STATE_COMPLETE, progress=100, terms=video_terms
        )
        return {"script": video_script, "terms": video_terms}
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
    # 3. Generate audio
    audio_file, audio_duration, sub_maker = generate_audio(task_id, params, video_script)
    if not audio_file:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
    if stop_at == "audio":
        sm.state.update_task(
            task_id,
            state=const.TASK_STATE_COMPLETE,
            progress=100,
            audio_file=audio_file,
        )
        return {"audio_file": audio_file, "audio_duration": audio_duration}
    # 4. Generate subtitle
    subtitle_path = generate_subtitle(task_id, params, video_script, sub_maker, audio_file)
    if stop_at == "subtitle":
        sm.state.update_task(
            task_id,
            state=const.TASK_STATE_COMPLETE,
            progress=100,
            subtitle_path=subtitle_path,
        )
        return {"subtitle_path": subtitle_path}
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
    # 5. Get video materials
    downloaded_videos = get_video_materials(
        task_id, params, video_terms, audio_duration
    )
    if not downloaded_videos:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    if stop_at == "materials":
        sm.state.update_task(
            task_id,
            state=const.TASK_STATE_COMPLETE,
            progress=100,
            materials=downloaded_videos,
        )
        return {"materials": downloaded_videos}
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
    # 6. Generate final videos
    final_video_paths, combined_video_paths = generate_final_videos(
        task_id, params, downloaded_videos, audio_file, subtitle_path
    )
    if not final_video_paths:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        return
    logger.success(
        f"task {task_id} finished, generated {len(final_video_paths)} videos."
    )
    kwargs = {
        "videos": final_video_paths,
        "combined_videos": combined_video_paths,
        "script": video_script,
        "terms": video_terms,
        "audio_file": audio_file,
        "audio_duration": audio_duration,
        "subtitle_path": subtitle_path,
        "materials": downloaded_videos,
    }
    sm.state.update_task(
        task_id, state=const.TASK_STATE_COMPLETE, progress=100, **kwargs
    )
    return kwargs
 def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
-    """
+    """后台任务（自动剪辑视频进行剪辑）"""
    后台任务（自动剪辑视频进行剪辑）
        task_id: 任务ID
        params: 剪辑参数
        subclip_path_videos: 视频文件路径
    """
    logger.info(f"\n\n## 开始任务: {task_id}")
    # 初始化 ImageMagick
    if not utils.init_imagemagick():
        logger.warning("ImageMagick 初始化失败，字幕可能无法正常显示")
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
    # tts 角色名称
@ -341,8 +221,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
    logger.info("\n\n## 1. 加载视频脚本")
    video_script_path = path.join(params.video_clip_json_path)
-    # video_script_path = video_clip_json_path
+    
    # 判断json文件是否存在
    if path.exists(video_script_path):
        try:
            with open(video_script_path, "r", encoding="utf-8") as f:
@ -355,10 +234,12 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
                logger.debug(f"解说完整脚本: \n{video_script}")
                logger.debug(f"解说 OST 列表: \n{video_ost}")
                logger.debug(f"解说时间戳列表: \n{time_list}")
                # 获取视频总时长(单位 s)
-                total_duration = list_script[-1]['new_timestamp']
+                last_timestamp = list_script[-1]['new_timestamp']
-                total_duration = int(total_duration.split("-")[1].split(":")[0]) * 60 + int(
+                end_time = last_timestamp.split("-")[1]
-                    total_duration.split("-")[1].split(":")[1])
+                total_duration = utils.time_to_seconds(end_time)
        except Exception as e:
            logger.error(f"无法读取视频json脚本，请检查配置是否正确。{e}")
            raise ValueError("无法读取视频json脚本，请检查配置是否正确")
@ -366,32 +247,51 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
        logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc())
        raise ValueError("解说脚本不存在！请检查配置是否正确。")
-    logger.info("\n\n## 2. 生成音频列表")
+    logger.info("\n\n## 2. 根据OST设置生成音频列表")
-    audio_files, sub_maker_list = voice.tts_multiple(
+    # 只为OST=0或2的片段生成TTS音频
-        task_id=task_id,
+    tts_segments = [
-        list_script=list_script,
+        segment for segment in list_script 
-        voice_name=voice_name,
+        if segment['OST'] in [0, 2]
-        voice_rate=params.voice_rate,
+    ]
-        voice_pitch=params.voice_pitch,
+    logger.debug(f"tts_segments: {tts_segments}")
-        force_regenerate=True
+    if tts_segments:
        audio_files, sub_maker_list = voice.tts_multiple(
            task_id=task_id,
            list_script=tts_segments,  # 只传入需要TTS的片段
            voice_name=voice_name,
            voice_rate=params.voice_rate,
            voice_pitch=params.voice_pitch,
            force_regenerate=True
        )
        if audio_files is None:
            sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
            logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
            return
    else:
        audio_files = []
    logger.info(f"合并音频文件:\n{audio_files}")
    # 传入OST信息以便正确处理音频
    final_audio = audio_merger.merge_audio_files(
        task_id=task_id, 
        audio_files=audio_files, 
        total_duration=total_duration, 
        list_script=list_script  # 传入完整脚本以便处理OST
    )
    if audio_files is None:
        sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
        logger.error(
            "TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
        return
    logger.info(f"合并音频:\n\n {audio_files}")
    audio_file = audio_merger.merge_audio_files(task_id, audio_files, total_duration, list_script)
    sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
    # 只为OST=0或2的片段生成字幕
    subtitle_path = ""
    if params.subtitle_enabled:
        subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
        subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
        logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
-        # 使用 faster-whisper-large-v2 模型生成字幕
+         
-        subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
+        subtitle.create(
            audio_file=final_audio,
            subtitle_file=subtitle_path,
        )
        subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
        if not subtitle_lines:
@ -434,14 +334,15 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
    final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
-    logger.info(f"\n\n## 6. 最后一步: {index} => {final_video_path}")
+    logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
-    # 把所有东西合到在一起
+    # 传入OST信息以便正确处理音频和视频
    video.generate_video_v2(
        video_path=combined_video_path,
-        audio_path=audio_file,
+        audio_path=final_audio,
        subtitle_path=subtitle_path,
        output_file=final_video_path,
        params=params,
        list_script=list_script  # 传入完整脚本以便处理OST
    )
    _progress += 50 / 2
--- a/app/services/video.py
+++ b/app/services/video.py
@ -173,7 +173,7 @@ def wrap_text(text, max_width, font, fontsize=60):
    if width <= max_width:
        return text, height
-    logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 文本: {text}")
+    logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 本: {text}")
    processed = True
@ -228,131 +228,93 @@ def manage_clip(clip):
 def generate_video_v2(
-        video_path: str,
+    video_path: str,
-        audio_path: str,
+    audio_path: str,
-        subtitle_path: str,
+    subtitle_path: str,
-        output_file: str,
+    output_file: str,
-        params: Union[VideoParams, VideoClipParams],
+    params: VideoClipParams,
-        progress_callback=None,
+    list_script: list = None
 ):
    """
-    合并所有素材
+    生成最终视频，处理音频和字幕
    Args:
-        video_path: 视频路径
+        video_path: 视频文件路径
-        audio_path: 单个音频文件路径
+        audio_path: 音频文件路径
        subtitle_path: 字幕文件路径
        output_file: 输出文件路径
        params: 视频参数
-        progress_callback: 进度回调函数，接收 0-100 的进度值
+        list_script: 视频脚本列表，包含OST设置
    Returns:
    """
    total_steps = 4
    current_step = 0
    def update_progress(step_name):
        nonlocal current_step
        current_step += 1
        if progress_callback:
            progress_callback(int(current_step * 100 / total_steps))
        logger.info(f"完成步骤: {step_name}")
    try:
-        validate_params(video_path, audio_path, output_file, params)
+        video_clip = VideoFileClip(video_path)
-        with manage_clip(VideoFileClip(video_path)) as video_clip:
+        # 处理音频
-            aspect = VideoAspect(params.video_aspect)
+        if audio_path and os.path.exists(audio_path):
-            video_width, video_height = aspect.to_resolution()
+            audio_clip = AudioFileClip(audio_path)
            logger.info(f"开始，视频尺寸: {video_width} x {video_height}")
            logger.info(f"  ① 视频: {video_path}")
            logger.info(f"  ② 音频: {audio_path}")
            logger.info(f"  ③ 字幕: {subtitle_path}")
            logger.info(f"  ④ 输出: {output_file}")
            output_dir = os.path.dirname(output_file)
            update_progress("初始化完成")
            # 字体设置
            font_path = ""
            if params.subtitle_enabled:
                if not params.font_name:
                    params.font_name = "STHeitiMedium.ttc"
                font_path = os.path.join(utils.font_dir(), params.font_name)
                if os.name == "nt":
                    font_path = font_path.replace("\\", "/")
                logger.info(f"使用字体: {font_path}")
            def create_text_clip(subtitle_item):
                phrase = subtitle_item[1]
                max_width = video_width * 0.9
                wrapped_txt, txt_height = wrap_text(
                    phrase, max_width=max_width, font=font_path, fontsize=params.font_size
                )
                _clip = TextClip(
                    wrapped_txt,
                    font=font_path,
                    fontsize=params.font_size,
                    color=params.text_fore_color,
                    bg_color=params.text_background_color,
                    stroke_color=params.stroke_color,
                    stroke_width=params.stroke_width,
                    print_cmd=False,
                )
                duration = subtitle_item[0][1] - subtitle_item[0][0]
                _clip = _clip.set_start(subtitle_item[0][0])
                _clip = _clip.set_end(subtitle_item[0][1])
                _clip = _clip.set_duration(duration)
                if params.subtitle_position == "bottom":
                    _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
                elif params.subtitle_position == "top":
                    _clip = _clip.set_position(("center", video_height * 0.05))
                elif params.subtitle_position == "custom":
                    margin = 10
                    max_y = video_height - _clip.h - margin
                    min_y = margin
                    custom_y = (video_height - _clip.h) * (params.custom_position / 100)
                    custom_y = max(min_y, min(custom_y, max_y))
                    _clip = _clip.set_position(("center", custom_y))
                else:  # center
                    _clip = _clip.set_position(("center", "center"))
                return _clip
            update_progress("字体设置完成")
            # 处理音频
            original_audio = video_clip.audio
            video_duration = video_clip.duration
            new_audio = AudioFileClip(audio_path)
            final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration)
            update_progress("音频处理完成")
            # 处理字幕
            if subtitle_path and os.path.exists(subtitle_path):
                video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip)
            update_progress("字幕处理完成")
            # 合并音频和导出
            video_clip = video_clip.set_audio(final_audio)
            video_clip.write_videofile(
                output_file,
                audio_codec="aac",
                temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"),
                threads=params.n_threads,
                logger=None,
                fps=30,
            )
-    except FileNotFoundError as e:
+            if list_script:
-        logger.error(f"文件不存在: {str(e)}")
+                # 根据OST设置处理音频
-        raise
+                # OST=0: 只使用TTS音频
                # OST=1: 只使用视频原声
                # OST=2: 混合TTS音频和视频原声
                original_audio = video_clip.audio
                # 设置音频音量
                tts_volume = params.tts_volume if hasattr(params, 'tts_volume') else 1.0
                video_volume = params.video_volume if hasattr(params, 'video_volume') else 0.1
                # 创建最终音频
                if original_audio:
                    # 有些片段需要原声，有些需要TTS
                    final_audio = CompositeAudioClip([
                        audio_clip.volumex(tts_volume),  # TTS音频
                        original_audio.volumex(video_volume)  # 原声音频
                    ])
                else:
                    final_audio = audio_clip.volumex(tts_volume)
            else:
                # 如果没有OST设置，使用默认行为
                final_audio = audio_clip
            video_clip = video_clip.set_audio(final_audio)
        # 处理字幕
        if subtitle_path and os.path.exists(subtitle_path):
            # 添加字幕
            video_clip = add_subtitles(
                video_clip,
                subtitle_path,
                params.font_size,
                params.font_name,
                params.text_fore_color,
                params.subtitle_position,
                params.stroke_color,
                params.stroke_width
            )
        # 写入最终视频文件
        video_clip.write_videofile(
            output_file,
            codec="libx264",
            audio_codec="aac",
            temp_audiofile="temp-audio.m4a",
            remove_temp=True,
            threads=params.n_threads
        )
    except Exception as e:
-        logger.error(f"视频生成失败: {str(e)}")
+        logger.error(f"生成视频时发生错误: {str(e)}")
-        raise
+        raise e
    finally:
-        logger.success("完成")
+        # 清理资源
        if 'video_clip' in locals():
            video_clip.close()
        if 'audio_clip' in locals():
            audio_clip.close()
        if 'final_audio' in locals():
            final_audio.close()
 def process_audio_tracks(original_audio, new_audio, params, video_duration):
@ -389,7 +351,7 @@ def process_subtitles(subtitle_path, video_clip, video_duration, create_text_cli
    for item in sub.subtitles:
        clip = create_text_clip(subtitle_item=item)
-        # 时间范围调整
+        # 时间范围<EFBFBD><EFBFBD>整
        start_time = max(clip.start, 0)
        if start_time >= video_duration:
            continue
@ -450,12 +412,12 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
 def combine_clip_videos(combined_video_path: str,
-                        video_paths: List[str],
+                       video_paths: List[str],
-                        video_ost_list: List[int],
+                       video_ost_list: List[int],
-                        list_script: list,
+                       list_script: list,
-                        video_aspect: VideoAspect = VideoAspect.portrait,
+                       video_aspect: VideoAspect = VideoAspect.portrait,
-                        threads: int = 2,
+                       threads: int = 2,
-                        ) -> str:
+                       ) -> str:
    """
    合并子视频
    Args:
@ -469,9 +431,18 @@ def combine_clip_videos(combined_video_path: str,
    Returns:
        str: 合并后的视频路径
    """
-    from app.utils.utils import calculate_total_duration
+    # 计算总时长时需要考虑毫秒精度
-    audio_duration = calculate_total_duration(list_script)
+    total_duration = 0.0
-    logger.info(f"音频的最大持续时间: {audio_duration} s")
+    for item in list_script:
        timestamp = item.get('new_timestamp', '')
        if timestamp:
            start_str, end_str = timestamp.split('-')
            start_time = utils.time_to_seconds(start_str)
            end_time = utils.time_to_seconds(end_str)
            duration = end_time - start_time
            total_duration += duration
    logger.info(f"音频的最大持续时间: {total_duration:.3f} s")
    output_dir = os.path.dirname(combined_video_path)
    aspect = VideoAspect(video_aspect)
@ -480,11 +451,17 @@ def combine_clip_videos(combined_video_path: str,
    clips = []
    for video_path, video_ost in zip(video_paths, video_ost_list):
        try:
            # 加载视频片段
            clip = VideoFileClip(video_path)
            # 根据OST设置处理音频
            if video_ost == 0:  # 不保留原声
                clip = clip.without_audio()
-            # video_ost 为 1 或 2 时都保留原声，不需要特殊处理
+            elif video_ost == 1:  # 只保留原声
                # 保持原声，但可能需要调整音量
                if clip.audio:
                    clip = clip.set_audio(clip.audio.volumex(1.0))  # 可以调整音量系数
            # OST == 2 的情况会在后续处理中混合音频
            clip = clip.set_fps(30)
@ -498,6 +475,16 @@ def combine_clip_videos(combined_video_path: str,
                )
                logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}")
            # 精确控制视频时长
            filename = os.path.basename(video_path)
            timestamp = extract_timestamp_from_filename(filename)
            if timestamp:
                start_time, end_time = timestamp
                clip_duration = end_time - start_time
                if abs(clip.duration - clip_duration) > 0.1:  # 允许0.1秒的误差
                    logger.warning(f"视频 {video_path} 时长与时间戳不匹配，进行调整")
                    clip = clip.set_duration(clip_duration)
            clips.append(clip)
        except Exception as e:
@ -508,6 +495,7 @@ def combine_clip_videos(combined_video_path: str,
        raise ValueError("没有有效的视频片段可以合并")
    try:
        # 合并所有视频片段
        video_clip = concatenate_videoclips(clips)
        video_clip = video_clip.set_fps(30)
@ -521,7 +509,7 @@ def combine_clip_videos(combined_video_path: str,
            temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
        )
    finally:
-        # 确保资源被正确<EFBFBD><EFBFBD><EFBFBD>放
+        # 确保资源被正确释放
        video_clip.close()
        for clip in clips:
            clip.close()
@ -530,6 +518,59 @@ def combine_clip_videos(combined_video_path: str,
    return combined_video_path
 def extract_timestamp_from_filename(filename: str) -> tuple:
    """
    从文件名中提取时间戳，支持多种格式：
    - "vid-00_06,500-00_24,800.mp4" -> (6.5, 24.8)
    - "vid-00_00_00-020-00_00_10-400.mp4" -> (0.02, 10.4)
    """
    try:
        # 提取时间戳部分
        match = re.search(r'vid-(.+?)\.mp4$', filename)
        if not match:
            logger.warning(f"文件名格式不正确: {filename}")
            return None
        timestamp = match.group(1)
        # 处理包含毫秒的格式 (00_00_00-020-00_00_10-400)
        if timestamp.count('-') == 3:
            parts = timestamp.split('-')
            start_time = f"{parts[0]}-{parts[1]}"  # 组合开始时间和毫秒
            end_time = f"{parts[2]}-{parts[3]}"    # 组合结束时间和毫秒
            # 转换开始时间
            start_time_str = start_time.replace('_', ':')
            if start_time_str.count(':') == 2:  # 如果是 00:00:00-020 格式
                start_base = utils.time_to_seconds(start_time_str.split('-')[0])
                start_ms = int(start_time_str.split('-')[1]) / 1000
                start_seconds = start_base + start_ms
            else:
                start_seconds = utils.time_to_seconds(start_time_str)
            # 转换结束时间
            end_time_str = end_time.replace('_', ':')
            if end_time_str.count(':') == 2:  # 如果是 00:00:10-400 格式
                end_base = utils.time_to_seconds(end_time_str.split('-')[0])
                end_ms = int(end_time_str.split('-')[1]) / 1000
                end_seconds = end_base + end_ms
            else:
                end_seconds = utils.time_to_seconds(end_time_str)
        # 处理简单格式 (00_06-00_24)
        else:
            start_str, end_str = timestamp.split('-')
            start_seconds = utils.time_to_seconds(start_str.replace('_', ':'))
            end_seconds = utils.time_to_seconds(end_str.replace('_', ':'))
        logger.debug(f"从文件名 {filename} 提取时间戳: {start_seconds:.3f} - {end_seconds:.3f}")
        return start_seconds, end_seconds
    except Exception as e:
        logger.error(f"从文件名提取时间戳失败 {filename}: {str(e)}\n{traceback.format_exc()}")
        return None
 def resize_video_with_padding(clip, target_width: int, target_height: int):
    """辅助函数：调整视频尺寸并添加黑边"""
    clip_ratio = clip.w / clip.h
@ -574,6 +615,71 @@ def validate_params(video_path, audio_path, output_file, params):
        raise ValueError("params 缺少必要参数 video_aspect")
 def add_subtitles(video_clip, subtitle_path, font_size, font_name, font_color, position, shadow_color, shadow_offset):
    """
    为视频添加字幕
    Args:
        video_clip: 视频剪辑对象
        subtitle_path: 字幕文件路径
        font_size: 字体大小
        font_name: 字体名称
        font_color: 字体颜色
        position: 字幕位置 ('top', 'center', 'bottom')
        shadow_color: 阴影颜色
        shadow_offset: 阴影偏移
    Returns:
        带有字幕的视频剪辑对象
    """
    try:
        # 确保字体文件存在
        font_path = os.path.join(utils.font_dir(), font_name)
        if not os.path.exists(font_path):
            logger.error(f"字体文件不存在: {font_path}")
            # 尝试使用系统默认字体
            font_path = "Arial" if os.name == 'nt' else "/System/Library/Fonts/STHeiti Light.ttc"
            logger.info(f"使用默认字体: {font_path}")
        # 设置字幕位置
        if position == "top":
            pos = ("center", 50)
        elif position == "center":
            pos = "center"
        else:  # bottom
            pos = ("center", -50)
        def subtitle_generator(txt):
            return TextClip(
                txt, 
                fontsize=font_size,
                font=font_path,
                color=font_color,
                stroke_color=shadow_color,
                stroke_width=shadow_offset,
                method='caption',  # 使用 caption 方法可能更稳定
                size=(video_clip.w * 0.9, None)  # 限制字幕宽度
            )
        subtitles = SubtitlesClip(
            subtitle_path,
            subtitle_generator
        )
        # 添加字幕到视频
        video_with_subtitles = CompositeVideoClip([
            video_clip,
            subtitles.set_position(pos)
        ])
        return video_with_subtitles
    except Exception as e:
        logger.error(f"添加字幕时出错: {str(e)}\n{traceback.format_exc()}")
        # 如果添加字幕失败，返回原始视频
        return video_clip
 if __name__ == "__main__":
    # combined_video_path = "../../storage/tasks/12312312/com123.mp4"
    #
@ -586,7 +692,7 @@ if __name__ == "__main__":
    #     {
    #         "picture": "夜晚，一个小孩在树林里奔跑，后面有人拿着火把在追赶",
    #         "timestamp": "00:00-00:03",
-    #         "narration": "夜黑风高的树林，一个小孩在拼命奔跑，后面的人穷追不舍！",
+    #         "narration": "夜<EFBFBD><EFBFBD><EFBFBD>风高的树林，一个小孩在拼命奔跑，后面的人穷追不舍！",
    #         "OST": False,
    #         "new_timestamp": "00:00-00:03"
    #     },
--- a/app/test/test_moviepy.py
+++ b/app/test/test_moviepy.py
@ -1,5 +1,5 @@
 """
-使用 moviepy 库剪辑指定时间戳视频
+使用 moviepy 库剪辑指定时间戳视频，支持时分秒毫秒精度
 """
 from moviepy.editor import VideoFileClip
@ -11,12 +11,22 @@ def time_str_to_seconds(time_str: str) -> float:
    """
    将时间字符串转换为秒数
    参数:
-        time_str: 格式为"MM:SS"的时间字符串
+        time_str: 格式为"HH:MM:SS,mmm"的时间字符串，例如"00:01:23,456"
    返回:
-        转换后的秒数
+        转换后的秒数(float)
    """
-    time_obj = datetime.strptime(time_str, "%M:%S")
+    try:
-    return time_obj.minute * 60 + time_obj.second
+        # 分离时间和毫秒
        time_part, ms_part = time_str.split(',')
        # 转换时分秒
        time_obj = datetime.strptime(time_part, "%H:%M:%S")
        # 计算总秒数
        total_seconds = time_obj.hour * 3600 + time_obj.minute * 60 + time_obj.second
        # 添加毫秒部分
        total_seconds += int(ms_part) / 1000
        return total_seconds
    except ValueError as e:
        raise ValueError("时间格式错误，请使用 HH:MM:SS,mmm 格式，例如 00:01:23,456") from e
 def format_duration(seconds: float) -> str:
@ -25,11 +35,15 @@ def format_duration(seconds: float) -> str:
    参数:
        seconds: 秒数
    返回:
-        格式化的时间字符串 (MM:SS)
+        格式化的时间字符串 (HH:MM:SS,mmm)
    """
-    minutes = int(seconds // 60)
+    hours = int(seconds // 3600)
-    remaining_seconds = int(seconds % 60)
+    minutes = int((seconds % 3600) // 60)
-    return f"{minutes:02d}:{remaining_seconds:02d}"
+    seconds_remain = seconds % 60
    whole_seconds = int(seconds_remain)
    milliseconds = int((seconds_remain - whole_seconds) * 1000)
    return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
 def cut_video(video_path: str, start_time: str, end_time: str, output_path: str) -> None:
@ -37,8 +51,8 @@ def cut_video(video_path: str, start_time: str, end_time: str, output_path: str)
    剪辑视频
    参数:
        video_path: 视频文件路径
-        start_time: 开始时间 (格式: "MM:SS")
+        start_time: 开始时间 (格式: "HH:MM:SS,mmm")
-        end_time: 结束时间 (格式: "MM:SS")
+        end_time: 结束时间 (格式: "HH:MM:SS,mmm")
        output_path: 输出文件路径
    """
    try:
@ -62,10 +76,18 @@ def cut_video(video_path: str, start_time: str, end_time: str, output_path: str)
        # 加载视频文件
        video = VideoFileClip(video_path)
        # 验证时间范围
        if start_seconds >= video.duration or end_seconds > video.duration:
            raise ValueError(f"剪辑时间超出视频长度！视频总长度为: {format_duration(video.duration)}")
        if start_seconds >= end_seconds:
            raise ValueError("结束时间必须大于开始时间！")
        # 计算剪辑时长
        clip_duration = end_seconds - start_seconds
        print(f"原视频总长度: {format_duration(video.duration)}")
        print(f"剪辑时长: {format_duration(clip_duration)}")
        print(f"剪辑区间: {start_time} -> {end_time}")
        # 剪辑视频
        video = video.subclip(start_seconds, end_seconds)
@ -92,6 +114,9 @@ def cut_video(video_path: str, start_time: str, end_time: str, output_path: str)
 if __name__ == "__main__":
-    # cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "00:00", "07:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-1")
+    cut_video(
-    # cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "07:00", "14:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-2")
+        video_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp.mp4",
-    cut_video("E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\ca4fee22-350b-47f9-bb2f-802ad96774f7\\final-2.mp4", "14:00", "22:00", "E:\\NarratoAI_v0.3.5_cuda\\NarratoAI\storage\\tasks\\yyjx2-3")
+        start_time="00:00:00,789",
        end_time="00:02:00,123",
        output_path="/Users/apple/Desktop/NarratoAI/resource/videos/duanju_yuansp_cut3.mp4"
    )
--- a/app/test/test_qwen.py
+++ b/app/test/test_qwen.py
@ -2,11 +2,23 @@ import os
 import traceback
 import json
 from openai import OpenAI
-from test_moviepy import cut_video
+from pydantic import BaseModel
 from typing import List
 from app.utils import utils
 from app.services.subtitle import extract_audio_and_create_subtitle
 class Step(BaseModel):
    timestamp: str
    picture: str
    narration: str
    OST: int
    new_timestamp: str
 class MathReasoning(BaseModel):
    result: List[Step]
 def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str:
    """
    与通义千问AI模型进行对话
@ -23,7 +35,7 @@ def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str:
    """
    try:
        client = OpenAI(
-            api_key="sk-",
+            api_key="sk-a1acd853d88d41d3ae92777d7bfa2612",
            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
        )
@ -50,25 +62,25 @@ def chat_with_qwen(prompt: str, system_message: str, subtitle_path: str) -> str:
 # 使用示例
 if __name__ == "__main__":
    try:
-        # video_path = utils.video_dir("duanju_yuansp.mp4")
+        video_path = utils.video_dir("duanju_yuansp.mp4")
        # # 判断视频是否存在
        # if not os.path.exists(video_path):
        #     print(f"视频文件不存在：{video_path}")
        #     exit(1)
        # 提取字幕
        subtitle_path = os.path.join(utils.video_dir(""), f"duanju_yuan.srt")
-        # extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path)
+        extract_audio_and_create_subtitle(video_file=video_path, subtitle_file=subtitle_path)
        # 分析字幕
        system_message = """
        你是一个视频srt字幕分析剪辑器, 输入视频的srt字幕, 分析其中的精彩且尽可能连续的片段并裁剪出来, 注意确保文字与时间戳的正确匹配。
-        输出需严格按照如下 json 格式: 
+        输出需严格按照如下 json 格式:
        [
            {
-                "timestamp": "00:50-01:44",
+                "timestamp": "00:00:50,020-00,01:44,000",
                "picture": "画面1",
                "narration": "播放原声",
                "OST": 0,
-                "new_timestamp": "00:00-00:54"
+                "new_timestamp": "00:00:00,000-00:00:54,020"
            },
            {
                "timestamp": "01:49-02:30",
--- a/app/utils/utils.py
+++ b/app/utils/utils.py
@ -40,7 +40,7 @@ def to_json(obj):
            # 如果对象是二进制数据，转换为base64编码的字符串
            elif isinstance(o, bytes):
                return "*** binary data ***"
-            # 如果对象是字典，递归处理每个键值对
+            # 如果<EFBFBD><EFBFBD><EFBFBD>象是字典，递归处理每个键值对
            elif isinstance(o, dict):
                return {k: serialize(v) for k, v in o.items()}
            # 如果对象是列表或元组，递归处理每个元素
@ -302,15 +302,49 @@ def get_current_country():
 def time_to_seconds(time_str: str) -> float:
-    parts = time_str.split(':')
+    """
-    if len(parts) == 2:
+    将时间字符串转换为秒数，支持多种格式：
-        m, s = map(float, parts)
+    - "HH:MM:SS,mmm" -> 小时:分钟:秒,毫秒
-        return m * 60 + s
+    - "MM:SS,mmm" -> 分钟:秒,毫秒
-    elif len(parts) == 3:
+    - "SS,mmm" -> 秒,毫秒
-        h, m, s = map(float, parts)
+    - "SS-mmm" -> 秒-毫秒
-        return h * 3600 + m * 60 + s
+    
-    else:
+    Args:
-        raise ValueError(f"Invalid time format: {time_str}")
+        time_str: 时间字符串
    Returns:
        float: 转换后的秒数(包含毫秒)
    """
    try:
        # 处理带有'-'的毫秒格式
        if '-' in time_str:
            time_part, ms_part = time_str.split('-')
            ms = float(ms_part) / 1000
        # 处理带有','的毫秒格式
        elif ',' in time_str:
            time_part, ms_part = time_str.split(',')
            ms = float(ms_part) / 1000
        else:
            time_part = time_str
            ms = 0
        # 分割时间部分
        parts = time_part.split(':')
        if len(parts) == 3:  # HH:MM:SS
            h, m, s = map(float, parts)
            seconds = h * 3600 + m * 60 + s
        elif len(parts) == 2:  # MM:SS
            m, s = map(float, parts)
            seconds = m * 60 + s
        else:  # SS
            seconds = float(parts[0])
        return seconds + ms
    except (ValueError, IndexError) as e:
        logger.error(f"时间格式转换错误 {time_str}: {str(e)}")
        return 0.0
 def seconds_to_time(seconds: float) -> str:
@ -520,3 +554,21 @@ def download_font(url: str, font_path: str):
    except Exception as e:
        logger.error(f"下载字体文件失败: {e}")
        raise
 def init_imagemagick():
    """初始化 ImageMagick 配置"""
    try:
        # 检查 ImageMagick 是否已安装
        import subprocess
        result = subprocess.run(['magick', '-version'], capture_output=True, text=True)
        if result.returncode != 0:
            logger.error("ImageMagick 未安装或配置不正确")
            return False
        # 设置 IMAGEMAGICK_BINARY 环境变量
        os.environ['IMAGEMAGICK_BINARY'] = 'magick'
        return True
    except Exception as e:
        logger.error(f"初始化 ImageMagick 失败: {str(e)}")
        return False
--- a/video_pipeline.py
+++ b/video_pipeline.py
@ -93,10 +93,8 @@ class VideoPipeline:
        response.raise_for_status()
        return response.json()
-    def save_script_to_json(self, script: list, script_name: str) -> str:
+    def save_script_to_json(self, script: list, script_path: str) -> str:
-        """保存脚本到json文件"""
+        """保存脚本到json文件"""        
        script_path = f"E:\\projects\\NarratoAI\\resource\\scripts\\{script_name}.json"
        try:
            with open(script_path, 'w', encoding='utf-8') as f:
                json.dump(script, f, ensure_ascii=False, indent=2)
@ -133,8 +131,7 @@ class VideoPipeline:
            # 2.2 保存脚本到json文件
            print("保存脚本到json文件...")
-            script_path = self.save_script_to_json(script, script_name)
+            self.save_script_to_json(script=script, script_path=script_path)
            script_result["script_path"] = script_path
            # 3. 剪辑视频
            print("开始剪辑视频...")
@ -143,7 +140,7 @@ class VideoPipeline:
            # 4. 生成最终视频
            print("开始生成最终视频...")
-            final_result = self.generate_final_video(
+            self.generate_final_video(
                task_id=task_id,
                video_path=video_path,
                script_path=script_path,
--- a/webui.txt
+++ b/webui.txt
@ -369,4 +369,6 @@ output_path和script参数需要传递给请求3
  }
 }
 subclip_videos和 output_path和script参数需要传递给请求4
-最后完成工作流
+最后完成工作流
 0代表只播放文案音频，禁用视频原声；1代表只播放视频原声，不需要播放文案音频和字幕；2代表即播放文案音频也要播放视频原声；