From a15ab4c9444fd983aa90ffc98eae0f0224bbc81a Mon Sep 17 00:00:00 2001 From: linyqh Date: Mon, 7 Jul 2025 21:33:25 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E8=A7=86=E9=A2=91=E5=B8=A7?= =?UTF-8?q?=E6=8F=90=E5=8F=96=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=96=B0=E5=A2=9E?= =?UTF-8?q?=E8=B6=85=E7=BA=A7=E5=85=BC=E5=AE=B9=E6=80=A7=E6=96=B9=E6=A1=88?= =?UTF-8?q?=E4=BB=A5=E6=8F=90=E9=AB=98=E6=8F=90=E5=8F=96=E6=88=90=E5=8A=9F?= =?UTF-8?q?=E7=8E=87=EF=BC=8C=E5=A2=9E=E5=BC=BA=E9=94=99=E8=AF=AF=E5=A4=84?= =?UTF-8?q?=E7=90=86=E5=92=8C=E7=94=A8=E6=88=B7=E5=8F=8D=E9=A6=88=E3=80=82?= =?UTF-8?q?=E5=9C=A8generate=5Fscript=5Fdocu.py=E4=B8=AD=E6=9B=B4=E6=96=B0?= =?UTF-8?q?=E8=BF=9B=E5=BA=A6=E6=98=BE=E7=A4=BA=E5=92=8C=E9=94=99=E8=AF=AF?= =?UTF-8?q?=E6=8F=90=E7=A4=BA=EF=BC=8C=E6=8F=90=E5=8D=87=E7=94=A8=E6=88=B7?= =?UTF-8?q?=E4=BD=93=E9=AA=8C=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/utils/video_processor.py | 193 ++++++++++++++++++++++++---- webui/tools/generate_script_docu.py | 73 +++++------ 2 files changed, 197 insertions(+), 69 deletions(-) diff --git a/app/utils/video_processor.py b/app/utils/video_processor.py index ebafabf..6c46737 100644 --- a/app/utils/video_processor.py +++ b/app/utils/video_processor.py @@ -129,7 +129,8 @@ class VideoProcessor: logger.info(f"开始提取 {len(extraction_times)} 个关键帧,使用 {hwaccel_type} 加速") - with tqdm(total=len(extraction_times), desc="提取视频帧", unit="帧") as pbar: + with tqdm(total=len(extraction_times), desc="🎬 提取视频帧", unit="帧", + bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]") as pbar: for i, timestamp in enumerate(extraction_times): frame_number = int(timestamp * self.fps) frame_numbers.append(frame_number) @@ -151,16 +152,16 @@ class VideoProcessor: if success: successful_extractions += 1 pbar.set_postfix({ - "成功": successful_extractions, - "失败": failed_extractions, - "当前": f"{timestamp:.1f}s" + "✅": successful_extractions, + "❌": failed_extractions, + "时间": f"{timestamp:.1f}s" }) else: failed_extractions += 1 pbar.set_postfix({ - "成功": successful_extractions, - "失败": failed_extractions, - "当前": f"{timestamp:.1f}s (失败)" + "✅": successful_extractions, + "❌": failed_extractions, + "时间": f"{timestamp:.1f}s" }) pbar.update(1) @@ -203,19 +204,16 @@ class VideoProcessor: # 对于 NVIDIA 显卡,优先使用纯软件解码 + NVENC 编码 if self._try_extract_with_software_decode(timestamp, output_path): return True - logger.debug(f"纯软件解码方案失败,尝试其他方案") # 策略2: 尝试标准硬件加速 if use_hw_accel and ffmpeg_utils.is_ffmpeg_hwaccel_available(): hw_accel = ffmpeg_utils.get_ffmpeg_hwaccel_args() if self._try_extract_with_hwaccel(timestamp, output_path, hw_accel): return True - logger.debug(f"硬件加速方案失败,回退到软件方案") # 策略3: 软件方案 if self._try_extract_with_software(timestamp, output_path): return True - logger.debug(f"软件方案失败,尝试超级兼容性方案") # 策略4: 超级兼容性方案(Windows 特殊处理) return self._try_extract_with_ultra_compatibility(timestamp, output_path) @@ -434,37 +432,21 @@ class VideoProcessor: if is_windows: process_kwargs["encoding"] = 'utf-8' - logger.debug(f"执行命令: {' '.join(cmd)}") result = subprocess.run(cmd, **process_kwargs) # 验证输出文件 output_path = cmd[-1] if os.path.exists(output_path) and os.path.getsize(output_path) > 0: - logger.debug(f"{description} - 成功") return True else: - logger.debug(f"{description} - 输出文件无效: {output_path}") return False except subprocess.CalledProcessError as e: - error_msg = e.stderr if hasattr(e, 'stderr') and e.stderr else str(e) - - # 分析错误类型,提供更好的调试信息 - if "mjpeg" in error_msg.lower() and "non full-range yuv" in error_msg.lower(): - logger.debug(f"{description} - MJPEG YUV 格式问题: {error_msg[:200]}") - elif "codec avOption" in error_msg.lower(): - logger.debug(f"{description} - 编码器参数问题: {error_msg[:200]}") - elif "filter" in error_msg.lower(): - logger.debug(f"{description} - 滤镜链问题: {error_msg[:200]}") - else: - logger.debug(f"{description} - 命令执行失败: {error_msg[:200]}") - + # 简化错误日志,仅记录关键信息 return False except subprocess.TimeoutExpired: - logger.debug(f"{description} - 命令执行超时") return False except Exception as e: - logger.debug(f"{description} - 未知错误: {str(e)}") return False def _detect_hw_accelerator(self) -> List[str]: @@ -510,6 +492,163 @@ class VideoProcessor: logger.error(f"视频处理失败: \n{traceback.format_exc()}") raise + def extract_frames_by_interval_ultra_compatible(self, output_dir: str, interval_seconds: float = 5.0) -> List[int]: + """ + 使用超级兼容性方案按指定时间间隔提取视频帧 + + 直接使用PNG格式提取,避免MJPEG编码问题,确保最高兼容性 + + Args: + output_dir: 输出目录 + interval_seconds: 帧提取间隔(秒) + + Returns: + List[int]: 提取的帧号列表 + """ + if not os.path.exists(output_dir): + os.makedirs(output_dir) + + # 计算起始时间和帧提取点 + start_time = 0 + end_time = self.duration + extraction_times = [] + + current_time = start_time + while current_time < end_time: + extraction_times.append(current_time) + current_time += interval_seconds + + if not extraction_times: + logger.warning("未找到需要提取的帧") + return [] + + # 提取帧 - 使用美化的进度条 + frame_numbers = [] + successful_extractions = 0 + failed_extractions = 0 + + logger.info(f"开始提取 {len(extraction_times)} 个关键帧,使用超级兼容性方案") + + with tqdm(total=len(extraction_times), desc="🎬 提取关键帧", unit="帧", + bar_format="{l_bar}{bar}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]") as pbar: + for i, timestamp in enumerate(extraction_times): + frame_number = int(timestamp * self.fps) + frame_numbers.append(frame_number) + + # 格式化时间戳字符串 (HHMMSSmmm) + hours = int(timestamp // 3600) + minutes = int((timestamp % 3600) // 60) + seconds = int(timestamp % 60) + milliseconds = int((timestamp % 1) * 1000) + time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}" + + output_path = os.path.join(output_dir, f"keyframe_{frame_number:06d}_{time_str}.jpg") + + # 直接使用超级兼容性方案 + success = self._extract_frame_ultra_compatible(timestamp, output_path) + + if success: + successful_extractions += 1 + pbar.set_postfix({ + "✅": successful_extractions, + "❌": failed_extractions, + "时间": f"{timestamp:.1f}s" + }) + else: + failed_extractions += 1 + pbar.set_postfix({ + "✅": successful_extractions, + "❌": failed_extractions, + "时间": f"{timestamp:.1f}s" + }) + + pbar.update(1) + + # 统计结果 + total_attempts = len(extraction_times) + success_rate = (successful_extractions / total_attempts) * 100 if total_attempts > 0 else 0 + + logger.info(f"关键帧提取完成: 成功 {successful_extractions}/{total_attempts} 帧 ({success_rate:.1f}%)") + + if failed_extractions > 0: + logger.warning(f"有 {failed_extractions} 帧提取失败") + + # 验证实际生成的文件 + actual_files = [f for f in os.listdir(output_dir) if f.endswith('.jpg')] + logger.info(f"实际生成文件数量: {len(actual_files)} 个") + + if len(actual_files) == 0: + logger.error("未生成任何关键帧文件") + raise Exception("关键帧提取完全失败,请检查视频文件") + + return frame_numbers + + def _extract_frame_ultra_compatible(self, timestamp: float, output_path: str) -> bool: + """ + 超级兼容性方案提取单帧 + + Args: + timestamp: 时间戳(秒) + output_path: 输出文件路径 + + Returns: + bool: 是否成功提取 + """ + # 使用 PNG 格式避免 MJPEG 问题 + png_output = output_path.replace('.jpg', '.png') + cmd = [ + "ffmpeg", + "-hide_banner", + "-loglevel", "error", + "-ss", str(timestamp), + "-i", self.video_path, + "-vframes", "1", + "-f", "image2", # 明确指定图片格式 + "-y", + png_output + ] + + try: + # 执行FFmpeg命令 + result = subprocess.run(cmd, capture_output=True, text=True, check=True, timeout=30) + + # 验证PNG文件是否成功生成 + if os.path.exists(png_output) and os.path.getsize(png_output) > 0: + # 转换PNG为JPG + try: + from PIL import Image + with Image.open(png_output) as img: + # 转换为 RGB 模式(去除 alpha 通道) + if img.mode in ('RGBA', 'LA'): + background = Image.new('RGB', img.size, (255, 255, 255)) + background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None) + img = background + img.save(output_path, 'JPEG', quality=90) + + # 删除临时 PNG 文件 + os.remove(png_output) + return True + except Exception as e: + logger.warning(f"PNG 转 JPG 失败: {e}") + # 如果转换失败,直接重命名 PNG 为 JPG + try: + os.rename(png_output, output_path) + return True + except Exception: + return False + else: + return False + + except subprocess.CalledProcessError as e: + logger.warning(f"超级兼容性方案提取帧 {timestamp:.1f}s 失败: {e}") + return False + except subprocess.TimeoutExpired: + logger.warning(f"超级兼容性方案提取帧 {timestamp:.1f}s 超时") + return False + except Exception as e: + logger.warning(f"超级兼容性方案提取帧 {timestamp:.1f}s 异常: {e}") + return False + if __name__ == "__main__": import time diff --git a/webui/tools/generate_script_docu.py b/webui/tools/generate_script_docu.py index dde125c..ed6ec25 100644 --- a/webui/tools/generate_script_docu.py +++ b/webui/tools/generate_script_docu.py @@ -25,9 +25,9 @@ def generate_script_docu(params): def update_progress(progress: float, message: str = ""): progress_bar.progress(progress) if message: - status_text.text(f"{progress}% - {message}") + status_text.text(f"🎬 {message}") else: - status_text.text(f"进度: {progress}%") + status_text.text(f"📊 进度: {progress}%") try: with st.spinner("正在生成脚本..."): @@ -54,7 +54,7 @@ def generate_script_docu(params): if keyframe_files: logger.info(f"使用已缓存的关键帧: {video_keyframes_dir}") - st.info(f"使用已缓存的关键帧,如需重新提取请删除目录: {video_keyframes_dir}") + st.info(f"✅ 使用已缓存关键帧,共 {len(keyframe_files)} 帧") update_progress(20, f"使用已缓存关键帧,共 {len(keyframe_files)} 帧") # 如果没有缓存的关键帧,则进行提取 @@ -67,30 +67,30 @@ def generate_script_docu(params): processor = video_processor.VideoProcessor(params.video_origin_path) # 显示视频信息 - st.info(f"视频信息: {processor.width}x{processor.height}, {processor.fps:.1f}fps, {processor.duration:.1f}秒") + st.info(f"📹 视频信息: {processor.width}x{processor.height}, {processor.fps:.1f}fps, {processor.duration:.1f}秒") - # 处理视频并提取关键帧 - update_progress(15, "正在提取关键帧...") + # 处理视频并提取关键帧 - 直接使用超级兼容性方案 + update_progress(15, "正在提取关键帧(使用超级兼容性方案)...") try: - processor.process_video_pipeline( + # 使用优化的关键帧提取方法 + processor.extract_frames_by_interval_ultra_compatible( output_dir=video_keyframes_dir, interval_seconds=st.session_state.get('frame_interval_input'), ) except Exception as extract_error: - # 如果硬件加速失败,尝试强制使用软件方案 - logger.warning(f"硬件加速提取失败: {extract_error}") - st.warning("硬件加速提取失败,正在尝试软件方案...") + logger.error(f"关键帧提取失败: {extract_error}") + + # 提供详细的错误信息和解决建议 + error_msg = str(extract_error) + if "权限" in error_msg or "permission" in error_msg.lower(): + suggestion = "建议:检查输出目录权限,或更换输出位置" + elif "空间" in error_msg or "space" in error_msg.lower(): + suggestion = "建议:检查磁盘空间是否足够" + else: + suggestion = "建议:检查视频文件是否损坏,或尝试转换为标准格式" - # 强制使用软件编码重试 - from app.utils import ffmpeg_utils - ffmpeg_utils.force_software_encoding() - - processor.process_video_pipeline( - output_dir=video_keyframes_dir, - interval_seconds=st.session_state.get('frame_interval_input'), - use_hw_accel=False # 明确禁用硬件加速 - ) + raise Exception(f"关键帧提取失败: {error_msg}\n{suggestion}") # 获取所有关键文件路径 for filename in sorted(os.listdir(video_keyframes_dir)): @@ -101,7 +101,7 @@ def generate_script_docu(params): # 检查目录中是否有其他文件 all_files = os.listdir(video_keyframes_dir) logger.error(f"关键帧目录内容: {all_files}") - raise Exception("未提取到任何关键帧文件,可能是 FFmpeg 兼容性问题") + raise Exception("未提取到任何关键帧文件,请检查视频文件格式") update_progress(20, f"关键帧提取完成,共 {len(keyframe_files)} 帧") st.success(f"✅ 成功提取 {len(keyframe_files)} 个关键帧") @@ -115,23 +115,14 @@ def generate_script_docu(params): except Exception as cleanup_err: logger.error(f"清理失败的关键帧目录时出错: {cleanup_err}") - # 提供更详细的错误信息和解决建议 - error_msg = str(e) - if "滤镜链" in error_msg or "filter" in error_msg.lower(): - suggestion = "建议:这可能是硬件加速兼容性问题,请尝试在设置中禁用硬件加速" - elif "cuda" in error_msg.lower() or "nvenc" in error_msg.lower(): - suggestion = "建议:NVIDIA 显卡驱动可能需要更新,或尝试禁用硬件加速" - else: - suggestion = "建议:检查视频文件是否损坏,或尝试转换为标准格式" - - raise Exception(f"关键帧提取失败: {error_msg}\n{suggestion}") + raise Exception(f"关键帧提取失败: {str(e)}") """ 2. 视觉分析(批量分析每一帧) """ vision_llm_provider = st.session_state.get('vision_llm_providers').lower() llm_params = dict() - logger.debug(f"VLM 视觉大模型提供商: {vision_llm_provider}") + logger.info(f"使用 {vision_llm_provider.upper()} 进行视觉分析") try: # ===================初始化视觉分析器=================== @@ -212,7 +203,7 @@ def generate_script_docu(params): overall_activity_summaries = [] # 合并所有批次的整体总结 prev_batch_files = None frame_counter = 1 # 初始化帧计数器,用于给所有帧分配连续的序号 - # logger.debug(json.dumps(results, indent=4, ensure_ascii=False)) + # 确保分析目录存在 analysis_dir = os.path.join(utils.storage_dir(), "temp", "analysis") os.makedirs(analysis_dir, exist_ok=True) @@ -228,11 +219,9 @@ def generate_script_docu(params): # 获取当前批次的文件列表 batch_files = get_batch_files(keyframe_files, result, vision_batch_size) - logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片") # 获取批次的时间戳范围 first_timestamp, last_timestamp, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files) - logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}") # 解析响应中的JSON数据 response_text = result['response'] @@ -377,8 +366,8 @@ def generate_script_docu(params): """ 4. 生成文案 """ - logger.info("开始准备生成解说文案") - update_progress(80, "正在生成文案...") + logger.info("开始生成解说文案") + update_progress(80, "正在生成解说文案...") from app.services.generate_narration_script import parse_frame_analysis_to_markdown, generate_narration # 从配置中获取文本生成相关配置 text_provider = config.app.get('text_llm_provider', 'gemini').lower() @@ -413,7 +402,7 @@ def generate_script_docu(params): narration_dict = narration_data['items'] # 为 narration_dict 中每个 item 新增一个 OST: 2 的字段, 代表保留原声和配音 narration_dict = [{**item, "OST": 2} for item in narration_dict] - logger.debug(f"解说文案创作完成:\n{"\n".join([item['narration'] for item in narration_dict])}") + logger.info(f"解说文案生成完成,共 {len(narration_dict)} 个片段") # 结果转换为JSON字符串 script = json.dumps(narration_dict, ensure_ascii=False, indent=2) @@ -424,20 +413,20 @@ def generate_script_docu(params): if script is None: st.error("生成脚本失败,请检查日志") st.stop() - logger.success(f"剪辑脚本生成完成") + logger.info(f"纪录片解说脚本生成完成") if isinstance(script, list): st.session_state['video_clip_json'] = script elif isinstance(script, str): st.session_state['video_clip_json'] = json.loads(script) - update_progress(80, "脚本生成完成") + update_progress(100, "脚本生成完成") time.sleep(0.1) progress_bar.progress(100) - status_text.text("脚本生成完成!") - st.success("视频脚本生成成功!") + status_text.text("🎉 脚本生成完成!") + st.success("✅ 视频脚本生成成功!") except Exception as err: - st.error(f"生成过程中发生错误: {str(err)}") + st.error(f"❌ 生成过程中发生错误: {str(err)}") logger.exception(f"生成脚本时发生错误\n{traceback.format_exc()}") finally: time.sleep(2)