diff --git a/app/services/material.py b/app/services/material.py
index fc41fba..2a84f85 100644
--- a/app/services/material.py
+++ b/app/services/material.py
@@ -421,23 +421,10 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
     Returns:
         剪辑后的视频路径
     """
-    # 创建基于原视频的缓存目录
-    video_cache_dir = os.path.join(utils.temp_dir(), "video")
-    video_hash = utils.md5(origin_video + str(os.path.getmtime(origin_video)))
-    video_clips_dir = os.path.join(video_cache_dir, video_hash)
-    
-    if not os.path.exists(video_clips_dir):
-        os.makedirs(video_clips_dir)
-        
     video_paths = {}
     total_items = len(timestamp_terms)
     for index, item in enumerate(timestamp_terms):
         material_directory = config.app.get("material_directory", "").strip()
-        if material_directory == "task":
-            material_directory = utils.task_dir(task_id)
-        elif material_directory and not os.path.isdir(material_directory):
-            material_directory = video_clips_dir  # 如果没有指定material_directory,使用缓存目录
-
         try:
             saved_video_path = save_clip_video(timestamp=item, origin_video=origin_video, save_dir=material_directory)
             if saved_video_path:
diff --git a/app/services/video.py b/app/services/video.py
index fc6fce9..2dc7a45 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -48,13 +48,13 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""):
 
 
 def combine_videos(
-    combined_video_path: str,
-    video_paths: List[str],
-    audio_file: str,
-    video_aspect: VideoAspect = VideoAspect.portrait,
-    video_concat_mode: VideoConcatMode = VideoConcatMode.random,
-    max_clip_duration: int = 5,
-    threads: int = 2,
+        combined_video_path: str,
+        video_paths: List[str],
+        audio_file: str,
+        video_aspect: VideoAspect = VideoAspect.portrait,
+        video_concat_mode: VideoConcatMode = VideoConcatMode.random,
+        max_clip_duration: int = 5,
+        threads: int = 2,
 ) -> str:
     audio_clip = AudioFileClip(audio_file)
     audio_duration = audio_clip.duration
@@ -173,7 +173,7 @@ def wrap_text(text, max_width, font, fontsize=60):
     if width <= max_width:
         return text, height
 
-    logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 本: {text}")
+    logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 文本: {text}")
 
     processed = True
 
@@ -228,105 +228,143 @@ def manage_clip(clip):
 
 
 def generate_video_v2(
-    video_path: str,
-    audio_path: str,
-    subtitle_path: str,
-    output_file: str,
-    params: VideoClipParams,
-    list_script: list = None
+        video_path: str,
+        audio_path: str,
+        subtitle_path: str,
+        output_file: str,
+        params: Union[VideoParams, VideoClipParams],
+        progress_callback=None,
 ):
     """
-    生成最终视频，处理音频和字幕
-
+    合并所有素材
     Args:
-        video_path: 视频文件路径
-        audio_path: 音频文件路径
+        video_path: 视频路径
+        audio_path: 单个音频文件路径
         subtitle_path: 字幕文件路径
         output_file: 输出文件路径
         params: 视频参数
-        list_script: 视频脚本列表，包含OST设置
-    """
-    try:
-        video_clip = VideoFileClip(video_path)
-        
-        # 处理音频
-        if audio_path and os.path.exists(audio_path):
-            audio_clip = AudioFileClip(audio_path)
-            
-            if list_script:
-                # 根据OST设置处理音频
-                # OST=0: 只使用TTS音频
-                # OST=1: 只使用视频原声
-                # OST=2: 混合TTS音频和视频原声
-                original_audio = video_clip.audio
-                
-                # 设置音频音量
-                tts_volume = params.tts_volume if hasattr(params, 'tts_volume') else 1.0
-                video_volume = params.video_volume if hasattr(params, 'video_volume') else 0.1
-                
-                # 创建最终音频
-                if original_audio:
-                    # 有些片段需要原声，有些需要TTS
-                    final_audio = CompositeAudioClip([
-                        audio_clip.volumex(tts_volume),  # TTS音频
-                        original_audio.volumex(video_volume)  # 原声音频
-                    ])
-                else:
-                    final_audio = audio_clip.volumex(tts_volume)
-            else:
-                # 如果没有OST设置，使用默认行为
-                final_audio = audio_clip
-                
-            video_clip = video_clip.set_audio(final_audio)
+        progress_callback: 进度回调函数，接收 0-100 的进度值
 
-        # 处理字幕
-        if subtitle_path and os.path.exists(subtitle_path):
-            # 添加字幕
-            video_clip = add_subtitles(
-                video_clip,
-                subtitle_path,
-                params.font_size,
-                params.font_name,
-                params.text_fore_color,
-                params.subtitle_position,
-                params.stroke_color,
-                params.stroke_width
+    Returns:
+
+    """
+    total_steps = 4
+    current_step = 0
+
+    def update_progress(step_name):
+        nonlocal current_step
+        current_step += 1
+        if progress_callback:
+            progress_callback(int(current_step * 100 / total_steps))
+        logger.info(f"完成步骤: {step_name}")
+
+    try:
+        validate_params(video_path, audio_path, output_file, params)
+
+        with manage_clip(VideoFileClip(video_path)) as video_clip:
+            aspect = VideoAspect(params.video_aspect)
+            video_width, video_height = aspect.to_resolution()
+
+            logger.info(f"开始，视频尺寸: {video_width} x {video_height}")
+            logger.info(f"  ① 视频: {video_path}")
+            logger.info(f"  ② 音频: {audio_path}")
+            logger.info(f"  ③ 字幕: {subtitle_path}")
+            logger.info(f"  ④ 输出: {output_file}")
+
+            output_dir = os.path.dirname(output_file)
+            update_progress("初始化完成")
+
+            # 字体设置
+            font_path = ""
+            if params.subtitle_enabled:
+                if not params.font_name:
+                    params.font_name = "STHeitiMedium.ttc"
+                font_path = os.path.join(utils.font_dir(), params.font_name)
+                if os.name == "nt":
+                    font_path = font_path.replace("\\", "/")
+                logger.info(f"使用字体: {font_path}")
+
+            def create_text_clip(subtitle_item):
+                phrase = subtitle_item[1]
+                max_width = video_width * 0.9
+                wrapped_txt, txt_height = wrap_text(
+                    phrase, max_width=max_width, font=font_path, fontsize=params.font_size
+                )
+                _clip = TextClip(
+                    wrapped_txt,
+                    font=font_path,
+                    fontsize=params.font_size,
+                    color=params.text_fore_color,
+                    bg_color=params.text_background_color,
+                    stroke_color=params.stroke_color,
+                    stroke_width=params.stroke_width,
+                    print_cmd=False,
+                )
+                duration = subtitle_item[0][1] - subtitle_item[0][0]
+                _clip = _clip.set_start(subtitle_item[0][0])
+                _clip = _clip.set_end(subtitle_item[0][1])
+                _clip = _clip.set_duration(duration)
+
+                if params.subtitle_position == "bottom":
+                    _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
+                elif params.subtitle_position == "top":
+                    _clip = _clip.set_position(("center", video_height * 0.05))
+                elif params.subtitle_position == "custom":
+                    margin = 10
+                    max_y = video_height - _clip.h - margin
+                    min_y = margin
+                    custom_y = (video_height - _clip.h) * (params.custom_position / 100)
+                    custom_y = max(min_y, min(custom_y, max_y))
+                    _clip = _clip.set_position(("center", custom_y))
+                else:  # center
+                    _clip = _clip.set_position(("center", "center"))
+                return _clip
+
+            update_progress("字体设置完成")
+
+            # 处理音频
+            original_audio = video_clip.audio
+            video_duration = video_clip.duration
+            new_audio = AudioFileClip(audio_path)
+            final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration)
+            update_progress("音频处理完成")
+
+            # 处理字幕
+            if subtitle_path and os.path.exists(subtitle_path):
+                video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip)
+            update_progress("字幕处理完成")
+
+            # 合并音频和导出
+            video_clip = video_clip.set_audio(final_audio)
+            video_clip.write_videofile(
+                output_file,
+                audio_codec="aac",
+                temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"),
+                threads=params.n_threads,
+                logger=None,
+                fps=30,
             )
 
-        # 写入最终视频文件
-        video_clip.write_videofile(
-            output_file,
-            codec="libx264",
-            audio_codec="aac",
-            temp_audiofile="temp-audio.m4a",
-            remove_temp=True,
-            threads=params.n_threads
-        )
-
+    except FileNotFoundError as e:
+        logger.error(f"文件不存在: {str(e)}")
+        raise
     except Exception as e:
-        logger.error(f"生成视频时发生错误: {str(e)}")
-        raise e
-
+        logger.error(f"视频生成失败: {str(e)}")
+        raise
     finally:
-        # 清理资源
-        if 'video_clip' in locals():
-            video_clip.close()
-        if 'audio_clip' in locals():
-            audio_clip.close()
-        if 'final_audio' in locals():
-            final_audio.close()
+        logger.success("完成")
 
 
 def process_audio_tracks(original_audio, new_audio, params, video_duration):
     """处理所有音轨"""
     audio_tracks = []
-    
+
     if original_audio is not None:
         audio_tracks.append(original_audio)
-    
+
     new_audio = new_audio.volumex(params.voice_volume)
     audio_tracks.append(new_audio)
-    
+
     # 处理背景音乐
     bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
     if bgm_file:
@@ -336,7 +374,7 @@ def process_audio_tracks(original_audio, new_audio, params, video_duration):
             audio_tracks.append(bgm_clip)
         except Exception as e:
             logger.error(f"添加背景音乐失败: {str(e)}")
-    
+
     return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio
 
 
@@ -344,22 +382,22 @@ def process_subtitles(subtitle_path, video_clip, video_duration, create_text_cli
     """处理字幕"""
     if not (subtitle_path and os.path.exists(subtitle_path)):
         return video_clip
-        
+
     sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
     text_clips = []
-    
+
     for item in sub.subtitles:
         clip = create_text_clip(subtitle_item=item)
-        
-        # 时间范围整
+
+        # 时间范围调整
         start_time = max(clip.start, 0)
         if start_time >= video_duration:
             continue
-            
+
         end_time = min(clip.end, video_duration)
         clip = clip.set_start(start_time).set_end(end_time)
         text_clips.append(clip)
-    
+
     logger.info(f"处理了 {len(text_clips)} 段字幕")
     return CompositeVideoClip([video_clip, *text_clips])
 
@@ -412,12 +450,12 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
 
 
 def combine_clip_videos(combined_video_path: str,
-                       video_paths: List[str],
-                       video_ost_list: List[int],
-                       list_script: list,
-                       video_aspect: VideoAspect = VideoAspect.portrait,
-                       threads: int = 2,
-                       ) -> str:
+                        video_paths: List[str],
+                        video_ost_list: List[int],
+                        list_script: list,
+                        video_aspect: VideoAspect = VideoAspect.portrait,
+                        threads: int = 2,
+                        ) -> str:
     """
     合并子视频
     Args:
@@ -431,19 +469,10 @@ def combine_clip_videos(combined_video_path: str,
     Returns:
         str: 合并后的视频路径
     """
-    # 计算总时长时需要考虑毫秒精度
-    total_duration = 0.0
-    for item in list_script:
-        timestamp = item.get('new_timestamp', '')
-        if timestamp:
-            start_str, end_str = timestamp.split('-')
-            start_time = utils.time_to_seconds(start_str)
-            end_time = utils.time_to_seconds(end_str)
-            duration = end_time - start_time
-            total_duration += duration
-            
-    logger.info(f"音频的最大持续时间: {total_duration:.3f} s")
-    
+    from app.utils.utils import calculate_total_duration
+    audio_duration = calculate_total_duration(list_script)
+    logger.info(f"音频的最大持续时间: {audio_duration} s")
+
     output_dir = os.path.dirname(combined_video_path)
     aspect = VideoAspect(video_aspect)
     video_width, video_height = aspect.to_resolution()
@@ -451,42 +480,26 @@ def combine_clip_videos(combined_video_path: str,
     clips = []
     for video_path, video_ost in zip(video_paths, video_ost_list):
         try:
-            # 加载视频片段
             clip = VideoFileClip(video_path)
-            
-            # 根据OST设置处理音频
+
             if video_ost == 0:  # 不保留原声
                 clip = clip.without_audio()
-            elif video_ost == 1:  # 只保留原声
-                # 保持原声，但可能需要调整音量
-                if clip.audio:
-                    clip = clip.set_audio(clip.audio.volumex(1.0))  # 可以调整音量系数
-            # OST == 2 的情况会在后续处理中混合音频
-                
+            # video_ost 为 1 或 2 时都保留原声，不需要特殊处理
+
             clip = clip.set_fps(30)
 
             # 处理视频尺寸
             clip_w, clip_h = clip.size
             if clip_w != video_width or clip_h != video_height:
                 clip = resize_video_with_padding(
-                    clip, 
-                    target_width=video_width, 
+                    clip,
+                    target_width=video_width,
                     target_height=video_height
                 )
                 logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}")
 
-            # 精确控制视频时长
-            filename = os.path.basename(video_path)
-            timestamp = extract_timestamp_from_filename(filename)
-            if timestamp:
-                start_time, end_time = timestamp
-                clip_duration = end_time - start_time
-                if abs(clip.duration - clip_duration) > 0.1:  # 允许0.1秒的误差
-                    logger.warning(f"视频 {video_path} 时长与时间戳不匹配，进行调整")
-                    clip = clip.set_duration(clip_duration)
-
             clips.append(clip)
-            
+
         except Exception as e:
             logger.error(f"处理视频 {video_path} 时出错: {str(e)}")
             continue
@@ -495,10 +508,9 @@ def combine_clip_videos(combined_video_path: str,
         raise ValueError("没有有效的视频片段可以合并")
 
     try:
-        # 合并所有视频片段
         video_clip = concatenate_videoclips(clips)
         video_clip = video_clip.set_fps(30)
-        
+
         logger.info("开始合并视频...")
         video_clip.write_videofile(
             filename=combined_video_path,
@@ -509,7 +521,7 @@ def combine_clip_videos(combined_video_path: str,
             temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
         )
     finally:
-        # 确保资源被正确释放
+        # 确保资源被正确���放
         video_clip.close()
         for clip in clips:
             clip.close()
@@ -518,61 +530,6 @@ def combine_clip_videos(combined_video_path: str,
     return combined_video_path
 
 
-def extract_timestamp_from_filename(filename: str) -> tuple:
-    """
-    从文件名中提取时间戳，支持格式：
-    - "vid-00-00-10_000-00-00-43_039.mp4" -> (10.0, 43.039) 
-    表示 00时00分10秒000毫秒 到 00时00分43秒039毫秒
-    """
-    try:
-        # 提取时间戳部分
-        match = re.search(r'vid-(.+?)\.mp4$', filename)
-        if not match:
-            logger.warning(f"文件名格式不正确: {filename}")
-            return None
-            
-        timestamp = match.group(1)
-        
-        def parse_timestamp(time_str: str) -> float:
-            """解析单个时间戳字符串为秒数"""
-            try:
-                # 处理 "00-00-10_000" 格式
-                main_time, milliseconds = time_str.rsplit('_', 1)  # 从右边分割，处理可能存在的多个下划线
-                time_components = main_time.split('-')
-                
-                if len(time_components) != 3:
-                    raise ValueError(f"时间格式错误: {main_time}")
-                    
-                hours = int(time_components[0])
-                minutes = int(time_components[1])
-                seconds = int(time_components[2])
-                ms = int(milliseconds)
-                
-                # 转换为秒数
-                total_seconds = hours * 3600 + minutes * 60 + seconds + ms / 1000
-                return total_seconds
-            except Exception as e:
-                raise ValueError(f"解析时间戳失败 {time_str}: {str(e)}")
-            
-        # 分割起始和结束时间戳
-        timestamps = timestamp.split('-', 5)  # 最多分割5次，处理 00-00-10_000-00-00-43_039 格式
-        if len(timestamps) != 6:  # 应该得到 ['00', '00', '10_000', '00', '00', '43_039']
-            raise ValueError(f"时间戳格式错误，无法分割: {timestamp}")
-            
-        start_str = '-'.join(timestamps[0:3])  # 组合开始时间 "00-00-10_000"
-        end_str = '-'.join(timestamps[3:6])    # 组合结束时间 "00-00-43_039"
-        
-        start_seconds = parse_timestamp(start_str)
-        end_seconds = parse_timestamp(end_str)
-        
-        logger.debug(f"从文件名 {filename} 提取时间戳: {start_seconds:.3f} - {end_seconds:.3f}")
-        return start_seconds, end_seconds
-        
-    except Exception as e:
-        logger.error(f"从文件名提取时间戳失败 {filename}: {str(e)}\n{traceback.format_exc()}")
-        return None
-
-
 def resize_video_with_padding(clip, target_width: int, target_height: int):
     """辅助函数：调整视频尺寸并添加黑边"""
     clip_ratio = clip.w / clip.h
@@ -580,7 +537,7 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
 
     if clip_ratio == target_ratio:
         return clip.resize((target_width, target_height))
-    
+
     if clip_ratio > target_ratio:
         scale_factor = target_width / clip.w
     else:
@@ -591,10 +548,10 @@ def resize_video_with_padding(clip, target_width: int, target_height: int):
     clip_resized = clip.resize(newsize=(new_width, new_height))
 
     background = ColorClip(
-        size=(target_width, target_height), 
+        size=(target_width, target_height),
         color=(0, 0, 0)
     ).set_duration(clip.duration)
-    
+
     return CompositeVideoClip([
         background,
         clip_resized.set_position("center")
@@ -605,170 +562,85 @@ def validate_params(video_path, audio_path, output_file, params):
     """验证输入参数"""
     if not os.path.exists(video_path):
         raise FileNotFoundError(f"视频文件不存在: {video_path}")
-        
+
     if not os.path.exists(audio_path):
         raise FileNotFoundError(f"音频文件不存在: {audio_path}")
-        
+
     output_dir = os.path.dirname(output_file)
     if not os.path.exists(output_dir):
         raise FileNotFoundError(f"输出目录不存在: {output_dir}")
-        
+
     if not hasattr(params, 'video_aspect'):
         raise ValueError("params 缺少必要参数 video_aspect")
 
 
-def add_subtitles(video_clip, subtitle_path, font_size, font_name, font_color, position, shadow_color, shadow_offset):
-    """
-    为视频添加字幕
-
-    Args:
-        video_clip: 视频剪辑对象
-        subtitle_path: 字幕文件路径
-        font_size: 字体大小
-        font_name: 字体名称
-        font_color: 字体颜色
-        position: 字幕位置 ('top', 'center', 'bottom')
-        shadow_color: 阴影颜色
-        shadow_offset: 阴影偏移
-
-    Returns:
-        带有字幕的视频剪辑对象
-    """
-    try:
-        # 确保字体文件存在
-        font_path = os.path.join(utils.font_dir(), font_name)
-        if not os.path.exists(font_path):
-            logger.error(f"字体文件不存在: {font_path}")
-            # 尝试使用系统默认字体
-            font_path = "Arial" if os.name == 'nt' else "/System/Library/Fonts/STHeiti Light.ttc"
-            logger.info(f"使用默认字体: {font_path}")
-
-        # 设置字幕位置
-        if position == "top":
-            pos = ("center", 50)
-        elif position == "center":
-            pos = "center"
-        else:  # bottom
-            pos = ("center", -50)
-
-        def subtitle_generator(txt):
-            return TextClip(
-                txt, 
-                fontsize=font_size,
-                font=font_path,
-                color=font_color,
-                stroke_color=shadow_color,
-                stroke_width=shadow_offset,
-                method='caption',  # 使用 caption 方法可能更稳定
-                size=(video_clip.w * 0.9, None)  # 限制字幕宽度
-            )
-
-        # 使用 SubtitlesClip，但明确指定 UTF-8 编码
-        subtitles = SubtitlesClip(
-            subtitle_path,
-            subtitle_generator,
-            encoding='utf-8'  # 明确指定使用 UTF-8 编码
-        )
-        
-        # 添加字幕到视频
-        video_with_subtitles = CompositeVideoClip([
-            video_clip,
-            subtitles.set_position(pos)
-        ])
-        
-        return video_with_subtitles
-
-    except Exception as e:
-        logger.error(f"添加字幕时出错: {str(e)}\n{traceback.format_exc()}")
-        # 如果添加字幕失败，返回原始视频
-        return video_clip
-
-
 if __name__ == "__main__":
-    # combined_video_path = "../../storage/tasks/12312312/com123.mp4"
-    #
-    # video_paths = ['../../storage/cache_videos/vid-00_00-00_03.mp4',
-    #                '../../storage/cache_videos/vid-00_03-00_07.mp4',
-    #                '../../storage/cache_videos/vid-00_12-00_17.mp4',
-    #                '../../storage/cache_videos/vid-00_26-00_31.mp4']
-    # video_ost_list = [False, True, False, True]
-    # list_script = [
-    #     {
-    #         "picture": "夜晚，一个小孩在树林里奔跑，后面有人拿着火把在追赶",
-    #         "timestamp": "00:00-00:03",
-    #         "narration": "夜风高的树林，一个小孩在拼命奔跑，后面的人穷追不舍！",
-    #         "OST": False,
-    #         "new_timestamp": "00:00-00:03"
-    #     },
-    #     {
-    #         "picture": "追赶的人命令抓住小孩",
-    #         "timestamp": "00:03-00:07",
-    #         "narration": "原声播放1",
-    #         "OST": True,
-    #         "new_timestamp": "00:03-00:07"
-    #     },
-    #     {
-    #         "picture": "小孩躲在草丛里，黑衣人用脚踢了踢他",
-    #         "timestamp": "00:12-00:17",
-    #         "narration": "小孩脱下外套，跑进树林, 一路奔跑，直到第二天清晨",
-    #         "OST": False,
-    #         "new_timestamp": "00:07-00:12"
-    #     },
-    #     {
-    #         "picture": "小孩跑到车前，慌慌张张地对女人说有人要杀他",
-    #         "timestamp": "00:26-00:31",
-    #         "narration": "原声播放2",
-    #         "OST": True,
-    #         "new_timestamp": "00:12-00:17"
-    #     }
-    # ]
+    combined_video_path = "../../storage/tasks/123/combined.mp4"
+
+    video_paths = ['../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-10_000-00-00-43_039.mp4',
+                   '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-45_439-00-01-01_600.mp4',
+                   '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-07_920-00-01-25_719.mp4',
+                   '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-36_959-00-01-53_719.mp4']
+    video_ost_list = [2, 2, 2, 2]
+    list_script = [
+        {
+            "timestamp": "00:10-00:43",
+            "picture": "好的，以下是视频画面的客观描述：\n\n视频显示一个男人在一个树木繁茂的地区，靠近一个泥土斜坡他穿着一件深色T恤、卡其色长裤和登山靴。他背着一个军绿色背包，里面似乎装有头和其他工具。\n\n第一个镜头显示该男子从远处走近斜坡，背对着镜头。下一个镜头特写显示了的背包，一个镐头从背包中伸出来。下一个镜头显示该男子用镐头敲打斜坡。下一个镜头是该男子脚上的特写镜头，他穿着登山靴，正站在泥土斜坡上。最后一个镜显示该男子在斜坡上，仔细地拨开树根和泥土。周围的环境是树木繁茂的，阳光透过树叶照射下来。土壤是浅棕色的，斜坡上有许多树根和植被。",
+            "narration": "（接上文）好吧，今天我们的男主角，背着一个看似随时要发射军绿色背包，竟然化身“泥土探险家”，在斜坡上挥舞着镐头！他这是准备挖宝还是给树根做个“美容”？阳光洒下来，简直是自然界的聚光灯，仿佛在说：“快来看看，这位勇士要挑战泥土极限！”我只能默默想，如果树根能说话，它们一定会喊：“别打我，我还有家人！”这就是生活，总有些搞笑的瞬间等着我们去发现！",
+            "OST": 2,
+            "new_timestamp": "00:00:00,000-00:00:33,000"
+        },
+        {
+            "timestamp": "00:45-01:01",
+            "picture": "好的以下是视频画面的客观描述：\n\n视频显示了一个人在森林里挖掘。\n\n第一个镜头是地面特写，显示出松散的泥土、碎石和落叶。光线照在部分区域。\n\n第二个镜头中，一模糊不清的蹲一个树根旁挖掘，一个橄榄绿色的背包放在地上。树根缠绕着常春藤。\n\n第三个镜头显示该人在一个更开阔的区域挖掘，那里有一些树根，以及部分倒的树干。他起来像是在挖掘一个较大的坑。\n\n第四个镜头是特写镜头，显示该人用工具清理土坑的墙壁。\n\n第五个镜头是土坑内部的特写镜头，可以看到土质的纹理，有一些小树根和其它植被的残留物。",
+            "narration": "现在，这位勇敢的挖掘者就像个“现代版的土豆农夫”，在森林里开辟新天地。的目标是什么？挖出一个宝藏还一块“树根披萨”？小心哦，别让树根追着你喊：“不要挖我，我也是有故事的！”",
+            "OST": 2,
+            "new_timestamp": "00:00:33,000-00:00:49,000"
+        },
+        {
+            "timestamp": "01:07-01:25",
+            "picture": "好，以下是视频画面的客观描述：\n\n画面1：特写镜头，显示出一丛带有水珠的深绿色灌木叶片。叶片呈椭圆形，边缘光滑。背景是树根和泥土。\n\n画面2：一个留着胡子的男人正在一个森林中土坑里挖掘。他穿着黑色T恤和卡其色裤子，跪在地上，用具挖掘泥土。周围环绕着树木、树根和灌木。一个倒下的树干横跨土坑上方。\n\n画面3：同一个男人坐在他刚才挖的坑的边缘，看着前方。他的表情似乎略带沉思。背景与画面2相同。\n\n画面4：一个广角镜头显示出他挖出的坑。这是一个不规则形状的土坑，在树木繁茂的斜坡上。土壤呈深棕色，可见树根。\n\n画面5：同一个男人跪在地上，用一把小斧头砍一根木头。他穿着与前几个画面相同的衣服。地面上覆盖着落叶。周围是树木和灌木。",
+            "narration": "“哎呀，这片灌木叶子滴水如雨，感觉像是大自然的洗发水广告！但我这位‘挖宝达人’似乎更适合拍个‘森林里的单身狗’真人秀。等会儿，我要给树根唱首歌，听说它们爱音乐！”",
+            "OST": 2,
+            "new_timestamp": "00:00:49,000-00:01:07,000"
+        },
+        {
+            "timestamp": "01:36-01:53",
+            "picture": "好的，以下是视频画面内容的客观描述：\n\n视频包含三个镜头：\n\n**镜头一：**个小型、浅水池塘，位于树林中。池塘的水看起来浑浊，呈绿褐色。池塘周围遍布泥土和落叶。多根树枝和树干横跨池塘，部分浸没在水中。周围的植被茂密，主要是深色树木和灌木。\n\n**镜头二：**距拍摄树深处，阳光透过树叶洒落在植被上。镜头中可见粗大的树干、树枝和各种绿叶植物。部分树枝似乎被砍断，切口可见。\n\n**镜头三：**近距离特写镜头，聚焦在树枝和绿叶上。叶片呈圆形，颜色为鲜绿色，有些叶片上有缺损。树枝颜色较深，呈现深褐色。背景是模糊的树林。\n",
+            "narration": "“好吧，看来我们的‘挖宝达人’终于找到了一‘宝藏’——一个色泽如同绿豆汤的池塘！我敢打赌，这里不仅是小鱼儿的游乐场更是树枝们的‘水疗中心’！下次来这里，我得带上浮潜装备！”",
+            "OST": 2,
+            "new_timestamp": "00:01:07,000-00:01:24,000"
+        }
+    ]
+    # 合并子视频
     # combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script)
 
-    # cfg = VideoClipParams()
-    # cfg.video_aspect = VideoAspect.portrait
-    # cfg.font_name = "STHeitiMedium.ttc"
-    # cfg.font_size = 60
-    # cfg.stroke_color = "#000000"
-    # cfg.stroke_width = 1.5
-    # cfg.text_fore_color = "#FFFFFF"
-    # cfg.text_background_color = "transparent"
-    # cfg.bgm_type = "random"
-    # cfg.bgm_file = ""
-    # cfg.bgm_volume = 1.0
-    # cfg.subtitle_enabled = True
-    # cfg.subtitle_position = "bottom"
-    # cfg.n_threads = 2
-    # cfg.paragraph_number = 1
-    #
-    # cfg.voice_volume = 1.0
+    cfg = VideoClipParams()
+    cfg.video_aspect = VideoAspect.portrait
+    cfg.font_name = "STHeitiMedium.ttc"
+    cfg.font_size = 60
+    cfg.stroke_color = "#000000"
+    cfg.stroke_width = 1.5
+    cfg.text_fore_color = "#FFFFFF"
+    cfg.text_background_color = "transparent"
+    cfg.bgm_type = "random"
+    cfg.bgm_file = ""
+    cfg.bgm_volume = 1.0
+    cfg.subtitle_enabled = True
+    cfg.subtitle_position = "bottom"
+    cfg.n_threads = 2
+    cfg.video_volume = 1
 
-    # generate_video(video_path=video_file,
-    #                audio_path=audio_file,
-    #                subtitle_path=subtitle_file,
-    #                output_file=output_file,
-    #                params=cfg
-    #                )
-    #
-    # video_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/combined-1.mp4"
-    #
-    # audio_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3"
-    #
-    # subtitle_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa\subtitle.srt"
-    #
-    # output_file = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/final-123.mp4"
-    #
-    # generate_video_v2(video_path=video_path,
-    #                    audio_path=audio_path,
-    #                    subtitle_path=subtitle_path,
-    #                    output_file=output_file,
-    #                    params=cfg
-    #                   )
+    cfg.voice_volume = 1.0
 
-    # 合并视频
-    video_list = [
-        './storage/cache_videos/vid-01_03-01_50.mp4',
-        './storage/cache_videos/vid-01_55-02_29.mp4',
-        './storage/cache_videos/vid-03_24-04_04.mp4',
-        './storage/cache_videos/vid-04_50-05_28.mp4'
-    ]
+    video_path = "../../storage/tasks/123/combined.mp4"
+    audio_path = "../../storage/tasks/123/final_audio.mp3"
+    subtitle_path = "../../storage/tasks/123/subtitle.srt"
+    output_file = "../../storage/tasks/123/final-123.mp4"
 
+    generate_video_v2(video_path=video_path,
+                       audio_path=audio_path,
+                       subtitle_path=subtitle_path,
+                       output_file=output_file,
+                       params=cfg
+                      )
diff --git a/requirements.txt b/requirements.txt
index 3024e71..0c864ca 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 requests~=2.31.0
-moviepy~=2.0.0.dev2
+moviepy==2.0.0.dev2
 faster-whisper~=1.0.1
 edge_tts~=6.1.15
 uvicorn~=0.27.1
@@ -26,7 +26,7 @@ psutil>=5.9.0
 opencv-python~=4.10.0.84
 scikit-learn~=1.5.2
 google-generativeai~=0.8.3
-Pillow>=11.0.0
+pillow~=10.3.0
 python-dotenv~=1.0.1
 openai~=1.53.0
 tqdm>=4.66.6