0.3.4 修改各种bug

2025-12-12 11:22:51 +00:00 · 2024-11-10 16:22:04 +08:00 · 2024-11-10 16:22:04 +08:00 · d0462ce91b
commit d0462ce91b
parent 4bafd696a1
7 changed files with 224 additions and 99 deletions
--- a/app/services/material.py
+++ b/app/services/material.py
@ -251,16 +251,41 @@ def download_videos(
    return video_paths
 def time_to_seconds(time_str: str) -> float:
    """
    将时间字符串转换为秒数
    支持格式：
    1. "MM:SS" (分:秒)
    2. "SS" (纯秒数)
    """
    parts = time_str.split(':')
    if len(parts) == 2:
        minutes, seconds = map(float, parts)
        return minutes * 60 + seconds
    return float(time_str)
 def format_timestamp(seconds: float) -> str:
    """
    将秒数转换为 "MM:SS" 格式的时间字符串
    """
    minutes = int(seconds) // 60
    secs = int(seconds) % 60
    return f"{minutes:02d}:{secs:02d}"
 def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict:
    """
    保存剪辑后的视频
    Args:
-        timestamp: 需要裁剪的单个时间戳，如：'00:36-00:40'
+        timestamp: 需要裁剪的单个时间戳，支持两种格式：
                  1. '00:36-00:40' (分:秒-分:秒)
                  2. 'SS-SS' (秒-秒)
        origin_video: 原视频路径
        save_dir: 存储目录
    Returns:
-        裁剪后的视频路径
+        裁剪后的视频路径，格式为 {timestamp: video_path}
    """
    if not save_dir:
        save_dir = utils.storage_dir("cache_videos")
@ -276,35 +301,64 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
        return {timestamp: video_path}
    try:
        # 先加载视频获取总时长
        video = VideoFileClip(origin_video)
        total_duration = video.duration
        # 获取目标时间段
        start_str, end_str = timestamp.split('-')
        start = time_to_seconds(start_str)
        end = time_to_seconds(end_str)
        # 验证时间段是否有效
        if start >= total_duration:
            logger.warning(f"起始时间 {format_timestamp(start)} ({start:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)")
            video.close()
            return {}
        if end > total_duration:
            logger.warning(f"结束时间 {format_timestamp(end)} ({end:.2f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.2f}秒)，将自动调整为视频结尾")
            end = total_duration
        if end <= start:
            logger.warning(f"结束时间 {format_timestamp(end)} 必须大于起始时间 {format_timestamp(start)}")
            video.close()
            return {}
        # 剪辑视频
-        start, end = utils.split_timestamp(timestamp)
+        duration = end - start
-        video = VideoFileClip(origin_video).subclip(start, end)
+        logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)}，时长 {format_timestamp(duration)}")
        subclip = video.subclip(start, end)
-        # 检查视频是否有音频轨道
+        try:
-        if video.audio is not None:
+            # 检查视频是否有音频轨道并写入文件
-            video.write_videofile(video_path, logger=None)  # 有音频时正常处理
+            subclip.write_videofile(video_path, audio=(subclip.audio is not None), logger=None)
-        else:
+            
-            # 没有音频时使用不同的写入方式
+            # 验证生成的视频文件
-            video.write_videofile(video_path, audio=False, logger=None)
+            if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
-        
+                with VideoFileClip(video_path) as clip:
-        video.close()  # 确保关闭视频文件
+                    if clip.duration > 0 and clip.fps > 0:
-
+                        return {timestamp: video_path}
-        if os.path.getsize(video_path) > 0 and os.path.exists(video_path):
+                    
-            try:
+            raise ValueError("视频文件验证失败")
-                clip = VideoFileClip(video_path)
+            
-                duration = clip.duration
+        except Exception as e:
-                fps = clip.fps
+            logger.warning(f"视频文件处理失败: {video_path} => {str(e)}")
-                clip.close()
+            if os.path.exists(video_path):
-                if duration > 0 and fps > 0:
+                os.remove(video_path)
-                    return {timestamp: video_path}
+                
            except Exception as e:
                logger.warning(f"视频文件验证失败: {video_path} => {str(e)}")
                if os.path.exists(video_path):
                    os.remove(video_path)
    except Exception as e:
-        logger.warning(f"视频剪辑失败: {str(e)}")
+        logger.warning(f"视频剪辑失败: \n{str(traceback.format_exc())}")
        if os.path.exists(video_path):
            os.remove(video_path)
    finally:
        # 确保视频对象被正确关闭
        try:
            video.close()
            if 'subclip' in locals():
                subclip.close()
        except:
            pass
    return {}
@ -426,6 +480,4 @@ def merge_videos(video_paths, ost_list):
 if __name__ == "__main__":
-    download_videos(
+    save_clip_video('00:50-01:41', 'E:\\projects\\NarratoAI\\resource\\videos\\WeChat_20241110144511.mp4')
        "test123", ["Money Exchange Medium"], audio_duration=100, source="pixabay"
    )
--- a/app/utils/script_generator.py
+++ b/app/utils/script_generator.py
@ -326,7 +326,7 @@ class ScriptProcessor:
            start_seconds = time_to_seconds(start_str)
            end_seconds = time_to_seconds(end_str)
            duration = end_seconds - start_seconds
-            word_count = int(duration / 0.2)
+            word_count = int(duration / 0.35)
            return word_count
        except Exception as e:
--- a/app/utils/utils.py
+++ b/app/utils/utils.py
@ -395,6 +395,8 @@ def cut_video(params, progress_callback=None):
        time_list = [i['timestamp'] for i in video_script_list]
        total_clips = len(time_list)
        print("time_list", time_list)
        def clip_progress(current, total):
            progress = int((current / total) * 100)
@ -413,12 +415,16 @@ def cut_video(params, progress_callback=None):
        st.session_state['subclip_videos'] = subclip_videos
        print("list:", subclip_videos)
        for i, video_script in enumerate(video_script_list):
            print(i)
            print(video_script)
            try:
                video_script['path'] = subclip_videos[video_script['timestamp']]
            except KeyError as err:
                logger.error(f"裁剪视频失败: {err}")
-                raise ValueError(f"裁剪视频失败: {err}")
+                # raise ValueError(f"裁剪视频失败: {err}")
        return task_id, subclip_videos
--- a/app/utils/video_processor.py
+++ b/app/utils/video_processor.py
@ -65,16 +65,44 @@ class VideoProcessor:
                shot_boundaries.append(i)
        return shot_boundaries
    def filter_keyframes_by_time(self, keyframes: List[np.ndarray], 
                               keyframe_indices: List[int]) -> Tuple[List[np.ndarray], List[int]]:
        """
        过滤关键帧，确保每秒最多只有一个关键帧
        Args:
            keyframes: 关键帧列表
            keyframe_indices: 关键帧索引列表
        Returns:
            Tuple[List[np.ndarray], List[int]]: 过滤后的关键帧列表和对应的帧索引
        """
        if not keyframes or not keyframe_indices:
            return keyframes, keyframe_indices
        filtered_frames = []
        filtered_indices = []
        last_second = -1
        for frame, idx in zip(keyframes, keyframe_indices):
            current_second = idx // self.fps
            if current_second != last_second:
                filtered_frames.append(frame)
                filtered_indices.append(idx)
                last_second = current_second
        return filtered_frames, filtered_indices
    def extract_keyframes(self, frames: List[np.ndarray], shot_boundaries: List[int]) -> Tuple[List[np.ndarray], List[int]]:
        """
-        从每个镜头中提取关键帧
+        从每个镜头中提取关键帧，并确保每秒最多一个关键帧
        Args:
            frames: 视频帧列表
            shot_boundaries: 镜头边界列表
        Returns:
-            Tuple[List[np.ndarray], List[int]]: 关<EFBFBD><EFBFBD><EFBFBD>帧列表和对应的帧索引
+            Tuple[List[np.ndarray], List[int]]: 关键帧列表和对应的帧索引
        """
        keyframes = []
        keyframe_indices = []
@ -92,7 +120,10 @@ class VideoProcessor:
            keyframes.append(shot_frames[center_idx])
            keyframe_indices.append(start + center_idx)
-        return keyframes, keyframe_indices
+        # 过滤每秒多余的关键帧
        filtered_keyframes, filtered_indices = self.filter_keyframes_by_time(keyframes, keyframe_indices)
        return filtered_keyframes, filtered_indices
    def save_keyframes(self, keyframes: List[np.ndarray], keyframe_indices: List[int], 
                      output_dir: str) -> None:
@ -206,4 +237,4 @@ class VideoProcessor:
        # 调整关键帧索引，加上跳过的帧数
        adjusted_indices = [idx + skip_frames for idx in keyframe_indices]
-        self.save_keyframes(keyframes, adjusted_indices, output_dir)
+        self.save_keyframes(keyframes, adjusted_indices, output_dir)
--- a/app/utils/vision_analyzer.py
+++ b/app/utils/vision_analyzer.py
@ -100,7 +100,7 @@ class VisionAnalyzer:
                        except Exception as e:
                            retry_count += 1
-                            error_msg = f"批次 {i // batch_size} 处理出错: {str(e)}\n{traceback.format_exc()}"
+                            error_msg = f"批次 {i // batch_size} 处理出错: {str(e)}"
                            logger.error(error_msg)
                            if retry_count >= 3:
@ -111,7 +111,7 @@ class VisionAnalyzer:
                                    'model_used': self.model_name
                                })
                            else:
-                                logger.info(f"批次 {i // batch_size} 处理失败，等待60秒后重试...")
+                                logger.info(f"批次 {i // batch_size} 处理失败，等待60秒后重试当前批次...")
                                await asyncio.sleep(60)
                    pbar.update(1)
--- a/config.example.toml
+++ b/config.example.toml
@ -1,11 +1,11 @@
 [app]
-    project_version="0.3.2"
+    project_version="0.3.4"
    # 支持视频理解的大模型提供商
    #   gemini
    #   NarratoAPI
    #   qwen2-vl (待增加)
    vision_llm_provider="gemini"
-    vision_batch_size = 5
+    vision_batch_size = 7
    vision_analysis_prompt = "你是资深视频内容分析专家，擅长分析视频画面信息，分析下面视频画面内容，只输出客观的画面描述不要给任何总结或评价"
    ########## Vision Gemini API Key
--- a/webui/components/script_settings.py
+++ b/webui/components/script_settings.py
@ -18,6 +18,69 @@ from app.utils import utils, check_script, vision_analyzer, video_processor
 from webui.utils import file_utils
 def get_batch_timestamps(batch_files, prev_batch_files=None):
    """
    获取一批文件的时间戳范围
    返回: (first_timestamp, last_timestamp, timestamp_range)
    文件名格式: keyframe_001253_000050.jpg
    其中 000050 表示 00:00:50 (50秒)
         000101 表示 00:01:01 (1分1秒)
    Args:
        batch_files: 当前批次的文件列表
        prev_batch_files: 上一个批次的文件列表，用于处理单张图片的情况
    """
    if not batch_files:
        logger.warning("Empty batch files")
        return "00:00", "00:00", "00:00-00:00"
    # 如果当前批次只有一张图片，且有上一个批次的文件，则使用上一批次的最后一张作为首帧
    if len(batch_files) == 1 and prev_batch_files and len(prev_batch_files) > 0:
        first_frame = os.path.basename(prev_batch_files[-1])
        last_frame = os.path.basename(batch_files[0])
        logger.debug(f"单张图片批次，使用上一批次最后一帧作为首帧: {first_frame}")
    else:
        # 提取首帧和尾帧的时间戳
        first_frame = os.path.basename(batch_files[0])
        last_frame = os.path.basename(batch_files[-1])
    # 从文件名中提取时间信息
    first_time = first_frame.split('_')[2].replace('.jpg', '')  # 000050
    last_time = last_frame.split('_')[2].replace('.jpg', '')    # 000101
    # 转换为分:秒格式
    def format_timestamp(time_str):
        # 时间格式为 MMSS，如 0050 表示 00:50, 0101 表示 01:01
        if len(time_str) < 4:
            logger.warning(f"Invalid timestamp format: {time_str}")
            return "00:00"
        minutes = int(time_str[-4:-2])  # 取后4位的前2位作为分钟
        seconds = int(time_str[-2:])    # 取后2位作为秒数
        # 处理进位
        if seconds >= 60:
            minutes += seconds // 60
            seconds = seconds % 60
        return f"{minutes:02d}:{seconds:02d}"
    first_timestamp = format_timestamp(first_time)
    last_timestamp = format_timestamp(last_time)
    timestamp_range = f"{first_timestamp}-{last_timestamp}"
    logger.debug(f"解析时间戳: {first_frame} -> {first_timestamp}, {last_frame} -> {last_timestamp}")
    return first_timestamp, last_timestamp, timestamp_range
 def get_batch_files(keyframe_files, result, batch_size=5):
    """
    获取当前批次的图片文件
    """
    batch_start = result['batch_index'] * batch_size
    batch_end = min(batch_start + batch_size, len(keyframe_files))
    return keyframe_files[batch_start:batch_end]
 def render_script_panel(tr):
    """渲染脚本配置面板"""
    with st.container(border=True):
@ -238,7 +301,7 @@ def generate_script(tr, params):
            # 检查是否已经提取过关键帧
            keyframe_files = []
            if os.path.exists(video_keyframes_dir):
-                # 获取已有的关键帧文件
+                # 取已有的关键帧文件
                for filename in sorted(os.listdir(video_keyframes_dir)):
                    if filename.endswith('.jpg'):
                        keyframe_files.append(os.path.join(video_keyframes_dir, filename))
@ -282,7 +345,7 @@ def generate_script(tr, params):
                    except Exception as cleanup_err:
                        logger.error(f"清理失败的关键帧目录时出错: {cleanup_err}")
-                    raise Exception(f"关键帧提取失败: {str(e)}")
+                    raise Exception(f"关键帧提取<EFBFBD><EFBFBD>败: {str(e)}")
            # 根据不同的 LLM 提供商处理
            vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
@ -327,38 +390,27 @@ def generate_script(tr, params):
                    # 合并所有批次的析结果
                    frame_analysis = ""
                    prev_batch_files = None
                    for result in results:
                        if 'error' in result:
                            logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
                            continue
-                        # 获取当前批次的图片文件
+                        batch_files = get_batch_files(keyframe_files, result, config.app.get("vision_batch_size", 5))
-                        batch_start = result['batch_index'] * config.app.get("vision_batch_size", 5)
+                        logger.debug(f"批次 {result['batch_index']} 处理完成，共 {len(batch_files)} 张图片")
-                        batch_end = min(batch_start + config.app.get("vision_batch_size", 5), len(keyframe_files))
+                        logger.debug(batch_files)
                        batch_files = keyframe_files[batch_start:batch_end]
-                        # 提取首帧和尾帧的时间戳
+                        first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
-                        first_frame = os.path.basename(batch_files[0])
+                        logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
                        last_frame = os.path.basename(batch_files[-1])
                        # 从文件名中提取时间信息
                        first_time = first_frame.split('_')[2].replace('.jpg', '')  # 000002
                        last_time = last_frame.split('_')[2].replace('.jpg', '')    # 000005
                        # 转换为分:秒格式
                        def format_timestamp(time_str):
                            seconds = int(time_str)
                            minutes = seconds // 60
                            seconds = seconds % 60
                            return f"{minutes:02d}:{seconds:02d}"
                        first_timestamp = format_timestamp(first_time)
                        last_timestamp = format_timestamp(last_time)
                        # 添加带时间戳的分析结果
                        frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
                        frame_analysis += result['response']
                        frame_analysis += "\n"
                        # 更新上一个批次的文件
                        prev_batch_files = batch_files
                    if not frame_analysis.strip():
                        raise Exception("未能生成有效的帧分析结果")
@ -378,45 +430,27 @@ def generate_script(tr, params):
                    # 构建帧内容列表
                    frame_content_list = []
                    prev_batch_files = None
                    for i, result in enumerate(results):
                        if 'error' in result:
                            continue
-                        # 获取当前批次的图片文件
+                        batch_files = get_batch_files(keyframe_files, result, config.app.get("vision_batch_size", 5))
-                        batch_start = result['batch_index'] * config.app.get("vision_batch_size", 5)
+                        _, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
                        batch_end = min(batch_start + config.app.get("vision_batch_size", 5), len(keyframe_files))
                        batch_files = keyframe_files[batch_start:batch_end]
                        # 提取首帧和尾帧的时间戳
                        first_frame = os.path.basename(batch_files[0])
                        last_frame = os.path.basename(batch_files[-1])
                        # 从文件名中提取时间信息
                        first_time = first_frame.split('_')[2].replace('.jpg', '')  # 000002
                        last_time = last_frame.split('_')[2].replace('.jpg', '')    # 000005
                        # 转换为分:秒格式
                        def format_timestamp(time_str):
                            seconds = int(time_str)
                            minutes = seconds // 60
                            seconds = seconds % 60
                            return f"{minutes:02d}:{seconds:02d}"
                        first_timestamp = format_timestamp(first_time)
                        last_timestamp = format_timestamp(last_time)
                        # 构建时间戳范围字符串 (MM:SS-MM:SS 格式)
                        timestamp_range = f"{first_timestamp}-{last_timestamp}"
                        frame_content = {
-                            "timestamp": timestamp_range,  # 使用时间范围格式 "MM:SS-MM:SS"
+                            "timestamp": timestamp_range,
-                            "picture": result['response'],  # 图片分析结果
+                            "picture": result['response'],
-                            "narration": "",  # 将由 ScriptProcessor 生成
+                            "narration": "",
-                            "OST": 2  # 默认值
+                            "OST": 2
                        }
                        frame_content_list.append(frame_content)
                        logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
                        # 更新上一个批次的文件
                        prev_batch_files = batch_files
                    if not frame_content_list:
                        raise Exception("没有有效的帧内容可以处理")
@ -442,13 +476,15 @@ def generate_script(tr, params):
                    )
                    adapter = HTTPAdapter(max_retries=retry_strategy)
                    session.mount("https://", adapter)
-
+                    try:
-                    response = session.post(
+                        response = session.post(
-                        f"{config.app.get('narrato_api_url')}/video/config",
+                            f"{config.app.get('narrato_api_url')}/video/config",
-                        params=api_params,
+                            params=api_params,
-                        timeout=30,
+                            timeout=30,
-                        verify=True  # 启用证书验证
+                            verify=True  # 启用证书验证
-                    )
+                        )
                    except:
                        pass
                    custom_prompt = st.session_state.get('custom_prompt', '')
                    processor = ScriptProcessor(
@ -621,7 +657,7 @@ def save_script(tr, video_clip_json_details):
                # 显示成功消息
                st.success(tr("Script saved successfully"))
-                # 强制重新加载页面<EFBFBD><EFBFBD>更新选择框
+                # 强制重新加载页面更新选择框
                time.sleep(0.5)  # 给一点时间让用户看到成功消息
                st.rerun()