diff --git a/app/services/material.py b/app/services/material.py index fc41fba..2a84f85 100644 --- a/app/services/material.py +++ b/app/services/material.py @@ -421,23 +421,10 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro Returns: 剪辑后的视频路径 """ - # 创建基于原视频的缓存目录 - video_cache_dir = os.path.join(utils.temp_dir(), "video") - video_hash = utils.md5(origin_video + str(os.path.getmtime(origin_video))) - video_clips_dir = os.path.join(video_cache_dir, video_hash) - - if not os.path.exists(video_clips_dir): - os.makedirs(video_clips_dir) - video_paths = {} total_items = len(timestamp_terms) for index, item in enumerate(timestamp_terms): material_directory = config.app.get("material_directory", "").strip() - if material_directory == "task": - material_directory = utils.task_dir(task_id) - elif material_directory and not os.path.isdir(material_directory): - material_directory = video_clips_dir # 如果没有指定material_directory,使用缓存目录 - try: saved_video_path = save_clip_video(timestamp=item, origin_video=origin_video, save_dir=material_directory) if saved_video_path: diff --git a/app/services/video.py b/app/services/video.py index fc6fce9..2dc7a45 100644 --- a/app/services/video.py +++ b/app/services/video.py @@ -48,13 +48,13 @@ def get_bgm_file(bgm_type: str = "random", bgm_file: str = ""): def combine_videos( - combined_video_path: str, - video_paths: List[str], - audio_file: str, - video_aspect: VideoAspect = VideoAspect.portrait, - video_concat_mode: VideoConcatMode = VideoConcatMode.random, - max_clip_duration: int = 5, - threads: int = 2, + combined_video_path: str, + video_paths: List[str], + audio_file: str, + video_aspect: VideoAspect = VideoAspect.portrait, + video_concat_mode: VideoConcatMode = VideoConcatMode.random, + max_clip_duration: int = 5, + threads: int = 2, ) -> str: audio_clip = AudioFileClip(audio_file) audio_duration = audio_clip.duration @@ -173,7 +173,7 @@ def wrap_text(text, max_width, font, fontsize=60): if width <= max_width: return text, height - logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 本: {text}") + logger.debug(f"换行文本, 最大宽度: {max_width}, 文本宽度: {width}, 文本: {text}") processed = True @@ -228,105 +228,143 @@ def manage_clip(clip): def generate_video_v2( - video_path: str, - audio_path: str, - subtitle_path: str, - output_file: str, - params: VideoClipParams, - list_script: list = None + video_path: str, + audio_path: str, + subtitle_path: str, + output_file: str, + params: Union[VideoParams, VideoClipParams], + progress_callback=None, ): """ - 生成最终视频,处理音频和字幕 - + 合并所有素材 Args: - video_path: 视频文件路径 - audio_path: 音频文件路径 + video_path: 视频路径 + audio_path: 单个音频文件路径 subtitle_path: 字幕文件路径 output_file: 输出文件路径 params: 视频参数 - list_script: 视频脚本列表,包含OST设置 - """ - try: - video_clip = VideoFileClip(video_path) - - # 处理音频 - if audio_path and os.path.exists(audio_path): - audio_clip = AudioFileClip(audio_path) - - if list_script: - # 根据OST设置处理音频 - # OST=0: 只使用TTS音频 - # OST=1: 只使用视频原声 - # OST=2: 混合TTS音频和视频原声 - original_audio = video_clip.audio - - # 设置音频音量 - tts_volume = params.tts_volume if hasattr(params, 'tts_volume') else 1.0 - video_volume = params.video_volume if hasattr(params, 'video_volume') else 0.1 - - # 创建最终音频 - if original_audio: - # 有些片段需要原声,有些需要TTS - final_audio = CompositeAudioClip([ - audio_clip.volumex(tts_volume), # TTS音频 - original_audio.volumex(video_volume) # 原声音频 - ]) - else: - final_audio = audio_clip.volumex(tts_volume) - else: - # 如果没有OST设置,使用默认行为 - final_audio = audio_clip - - video_clip = video_clip.set_audio(final_audio) + progress_callback: 进度回调函数,接收 0-100 的进度值 - # 处理字幕 - if subtitle_path and os.path.exists(subtitle_path): - # 添加字幕 - video_clip = add_subtitles( - video_clip, - subtitle_path, - params.font_size, - params.font_name, - params.text_fore_color, - params.subtitle_position, - params.stroke_color, - params.stroke_width + Returns: + + """ + total_steps = 4 + current_step = 0 + + def update_progress(step_name): + nonlocal current_step + current_step += 1 + if progress_callback: + progress_callback(int(current_step * 100 / total_steps)) + logger.info(f"完成步骤: {step_name}") + + try: + validate_params(video_path, audio_path, output_file, params) + + with manage_clip(VideoFileClip(video_path)) as video_clip: + aspect = VideoAspect(params.video_aspect) + video_width, video_height = aspect.to_resolution() + + logger.info(f"开始,视频尺寸: {video_width} x {video_height}") + logger.info(f" ① 视频: {video_path}") + logger.info(f" ② 音频: {audio_path}") + logger.info(f" ③ 字幕: {subtitle_path}") + logger.info(f" ④ 输出: {output_file}") + + output_dir = os.path.dirname(output_file) + update_progress("初始化完成") + + # 字体设置 + font_path = "" + if params.subtitle_enabled: + if not params.font_name: + params.font_name = "STHeitiMedium.ttc" + font_path = os.path.join(utils.font_dir(), params.font_name) + if os.name == "nt": + font_path = font_path.replace("\\", "/") + logger.info(f"使用字体: {font_path}") + + def create_text_clip(subtitle_item): + phrase = subtitle_item[1] + max_width = video_width * 0.9 + wrapped_txt, txt_height = wrap_text( + phrase, max_width=max_width, font=font_path, fontsize=params.font_size + ) + _clip = TextClip( + wrapped_txt, + font=font_path, + fontsize=params.font_size, + color=params.text_fore_color, + bg_color=params.text_background_color, + stroke_color=params.stroke_color, + stroke_width=params.stroke_width, + print_cmd=False, + ) + duration = subtitle_item[0][1] - subtitle_item[0][0] + _clip = _clip.set_start(subtitle_item[0][0]) + _clip = _clip.set_end(subtitle_item[0][1]) + _clip = _clip.set_duration(duration) + + if params.subtitle_position == "bottom": + _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h)) + elif params.subtitle_position == "top": + _clip = _clip.set_position(("center", video_height * 0.05)) + elif params.subtitle_position == "custom": + margin = 10 + max_y = video_height - _clip.h - margin + min_y = margin + custom_y = (video_height - _clip.h) * (params.custom_position / 100) + custom_y = max(min_y, min(custom_y, max_y)) + _clip = _clip.set_position(("center", custom_y)) + else: # center + _clip = _clip.set_position(("center", "center")) + return _clip + + update_progress("字体设置完成") + + # 处理音频 + original_audio = video_clip.audio + video_duration = video_clip.duration + new_audio = AudioFileClip(audio_path) + final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration) + update_progress("音频处理完成") + + # 处理字幕 + if subtitle_path and os.path.exists(subtitle_path): + video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip) + update_progress("字幕处理完成") + + # 合并音频和导出 + video_clip = video_clip.set_audio(final_audio) + video_clip.write_videofile( + output_file, + audio_codec="aac", + temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"), + threads=params.n_threads, + logger=None, + fps=30, ) - # 写入最终视频文件 - video_clip.write_videofile( - output_file, - codec="libx264", - audio_codec="aac", - temp_audiofile="temp-audio.m4a", - remove_temp=True, - threads=params.n_threads - ) - + except FileNotFoundError as e: + logger.error(f"文件不存在: {str(e)}") + raise except Exception as e: - logger.error(f"生成视频时发生错误: {str(e)}") - raise e - + logger.error(f"视频生成失败: {str(e)}") + raise finally: - # 清理资源 - if 'video_clip' in locals(): - video_clip.close() - if 'audio_clip' in locals(): - audio_clip.close() - if 'final_audio' in locals(): - final_audio.close() + logger.success("完成") def process_audio_tracks(original_audio, new_audio, params, video_duration): """处理所有音轨""" audio_tracks = [] - + if original_audio is not None: audio_tracks.append(original_audio) - + new_audio = new_audio.volumex(params.voice_volume) audio_tracks.append(new_audio) - + # 处理背景音乐 bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file) if bgm_file: @@ -336,7 +374,7 @@ def process_audio_tracks(original_audio, new_audio, params, video_duration): audio_tracks.append(bgm_clip) except Exception as e: logger.error(f"添加背景音乐失败: {str(e)}") - + return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio @@ -344,22 +382,22 @@ def process_subtitles(subtitle_path, video_clip, video_duration, create_text_cli """处理字幕""" if not (subtitle_path and os.path.exists(subtitle_path)): return video_clip - + sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8") text_clips = [] - + for item in sub.subtitles: clip = create_text_clip(subtitle_item=item) - - # 时间范围整 + + # 时间范围调整 start_time = max(clip.start, 0) if start_time >= video_duration: continue - + end_time = min(clip.end, video_duration) clip = clip.set_start(start_time).set_end(end_time) text_clips.append(clip) - + logger.info(f"处理了 {len(text_clips)} 段字幕") return CompositeVideoClip([video_clip, *text_clips]) @@ -412,12 +450,12 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4): def combine_clip_videos(combined_video_path: str, - video_paths: List[str], - video_ost_list: List[int], - list_script: list, - video_aspect: VideoAspect = VideoAspect.portrait, - threads: int = 2, - ) -> str: + video_paths: List[str], + video_ost_list: List[int], + list_script: list, + video_aspect: VideoAspect = VideoAspect.portrait, + threads: int = 2, + ) -> str: """ 合并子视频 Args: @@ -431,19 +469,10 @@ def combine_clip_videos(combined_video_path: str, Returns: str: 合并后的视频路径 """ - # 计算总时长时需要考虑毫秒精度 - total_duration = 0.0 - for item in list_script: - timestamp = item.get('new_timestamp', '') - if timestamp: - start_str, end_str = timestamp.split('-') - start_time = utils.time_to_seconds(start_str) - end_time = utils.time_to_seconds(end_str) - duration = end_time - start_time - total_duration += duration - - logger.info(f"音频的最大持续时间: {total_duration:.3f} s") - + from app.utils.utils import calculate_total_duration + audio_duration = calculate_total_duration(list_script) + logger.info(f"音频的最大持续时间: {audio_duration} s") + output_dir = os.path.dirname(combined_video_path) aspect = VideoAspect(video_aspect) video_width, video_height = aspect.to_resolution() @@ -451,42 +480,26 @@ def combine_clip_videos(combined_video_path: str, clips = [] for video_path, video_ost in zip(video_paths, video_ost_list): try: - # 加载视频片段 clip = VideoFileClip(video_path) - - # 根据OST设置处理音频 + if video_ost == 0: # 不保留原声 clip = clip.without_audio() - elif video_ost == 1: # 只保留原声 - # 保持原声,但可能需要调整音量 - if clip.audio: - clip = clip.set_audio(clip.audio.volumex(1.0)) # 可以调整音量系数 - # OST == 2 的情况会在后续处理中混合音频 - + # video_ost 为 1 或 2 时都保留原声,不需要特殊处理 + clip = clip.set_fps(30) # 处理视频尺寸 clip_w, clip_h = clip.size if clip_w != video_width or clip_h != video_height: clip = resize_video_with_padding( - clip, - target_width=video_width, + clip, + target_width=video_width, target_height=video_height ) logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}") - # 精确控制视频时长 - filename = os.path.basename(video_path) - timestamp = extract_timestamp_from_filename(filename) - if timestamp: - start_time, end_time = timestamp - clip_duration = end_time - start_time - if abs(clip.duration - clip_duration) > 0.1: # 允许0.1秒的误差 - logger.warning(f"视频 {video_path} 时长与时间戳不匹配,进行调整") - clip = clip.set_duration(clip_duration) - clips.append(clip) - + except Exception as e: logger.error(f"处理视频 {video_path} 时出错: {str(e)}") continue @@ -495,10 +508,9 @@ def combine_clip_videos(combined_video_path: str, raise ValueError("没有有效的视频片段可以合并") try: - # 合并所有视频片段 video_clip = concatenate_videoclips(clips) video_clip = video_clip.set_fps(30) - + logger.info("开始合并视频...") video_clip.write_videofile( filename=combined_video_path, @@ -509,7 +521,7 @@ def combine_clip_videos(combined_video_path: str, temp_audiofile=os.path.join(output_dir, "temp-audio.m4a") ) finally: - # 确保资源被正确释放 + # 确保资源被正确���放 video_clip.close() for clip in clips: clip.close() @@ -518,61 +530,6 @@ def combine_clip_videos(combined_video_path: str, return combined_video_path -def extract_timestamp_from_filename(filename: str) -> tuple: - """ - 从文件名中提取时间戳,支持格式: - - "vid-00-00-10_000-00-00-43_039.mp4" -> (10.0, 43.039) - 表示 00时00分10秒000毫秒 到 00时00分43秒039毫秒 - """ - try: - # 提取时间戳部分 - match = re.search(r'vid-(.+?)\.mp4$', filename) - if not match: - logger.warning(f"文件名格式不正确: {filename}") - return None - - timestamp = match.group(1) - - def parse_timestamp(time_str: str) -> float: - """解析单个时间戳字符串为秒数""" - try: - # 处理 "00-00-10_000" 格式 - main_time, milliseconds = time_str.rsplit('_', 1) # 从右边分割,处理可能存在的多个下划线 - time_components = main_time.split('-') - - if len(time_components) != 3: - raise ValueError(f"时间格式错误: {main_time}") - - hours = int(time_components[0]) - minutes = int(time_components[1]) - seconds = int(time_components[2]) - ms = int(milliseconds) - - # 转换为秒数 - total_seconds = hours * 3600 + minutes * 60 + seconds + ms / 1000 - return total_seconds - except Exception as e: - raise ValueError(f"解析时间戳失败 {time_str}: {str(e)}") - - # 分割起始和结束时间戳 - timestamps = timestamp.split('-', 5) # 最多分割5次,处理 00-00-10_000-00-00-43_039 格式 - if len(timestamps) != 6: # 应该得到 ['00', '00', '10_000', '00', '00', '43_039'] - raise ValueError(f"时间戳格式错误,无法分割: {timestamp}") - - start_str = '-'.join(timestamps[0:3]) # 组合开始时间 "00-00-10_000" - end_str = '-'.join(timestamps[3:6]) # 组合结束时间 "00-00-43_039" - - start_seconds = parse_timestamp(start_str) - end_seconds = parse_timestamp(end_str) - - logger.debug(f"从文件名 {filename} 提取时间戳: {start_seconds:.3f} - {end_seconds:.3f}") - return start_seconds, end_seconds - - except Exception as e: - logger.error(f"从文件名提取时间戳失败 {filename}: {str(e)}\n{traceback.format_exc()}") - return None - - def resize_video_with_padding(clip, target_width: int, target_height: int): """辅助函数:调整视频尺寸并添加黑边""" clip_ratio = clip.w / clip.h @@ -580,7 +537,7 @@ def resize_video_with_padding(clip, target_width: int, target_height: int): if clip_ratio == target_ratio: return clip.resize((target_width, target_height)) - + if clip_ratio > target_ratio: scale_factor = target_width / clip.w else: @@ -591,10 +548,10 @@ def resize_video_with_padding(clip, target_width: int, target_height: int): clip_resized = clip.resize(newsize=(new_width, new_height)) background = ColorClip( - size=(target_width, target_height), + size=(target_width, target_height), color=(0, 0, 0) ).set_duration(clip.duration) - + return CompositeVideoClip([ background, clip_resized.set_position("center") @@ -605,170 +562,85 @@ def validate_params(video_path, audio_path, output_file, params): """验证输入参数""" if not os.path.exists(video_path): raise FileNotFoundError(f"视频文件不存在: {video_path}") - + if not os.path.exists(audio_path): raise FileNotFoundError(f"音频文件不存在: {audio_path}") - + output_dir = os.path.dirname(output_file) if not os.path.exists(output_dir): raise FileNotFoundError(f"输出目录不存在: {output_dir}") - + if not hasattr(params, 'video_aspect'): raise ValueError("params 缺少必要参数 video_aspect") -def add_subtitles(video_clip, subtitle_path, font_size, font_name, font_color, position, shadow_color, shadow_offset): - """ - 为视频添加字幕 - - Args: - video_clip: 视频剪辑对象 - subtitle_path: 字幕文件路径 - font_size: 字体大小 - font_name: 字体名称 - font_color: 字体颜色 - position: 字幕位置 ('top', 'center', 'bottom') - shadow_color: 阴影颜色 - shadow_offset: 阴影偏移 - - Returns: - 带有字幕的视频剪辑对象 - """ - try: - # 确保字体文件存在 - font_path = os.path.join(utils.font_dir(), font_name) - if not os.path.exists(font_path): - logger.error(f"字体文件不存在: {font_path}") - # 尝试使用系统默认字体 - font_path = "Arial" if os.name == 'nt' else "/System/Library/Fonts/STHeiti Light.ttc" - logger.info(f"使用默认字体: {font_path}") - - # 设置字幕位置 - if position == "top": - pos = ("center", 50) - elif position == "center": - pos = "center" - else: # bottom - pos = ("center", -50) - - def subtitle_generator(txt): - return TextClip( - txt, - fontsize=font_size, - font=font_path, - color=font_color, - stroke_color=shadow_color, - stroke_width=shadow_offset, - method='caption', # 使用 caption 方法可能更稳定 - size=(video_clip.w * 0.9, None) # 限制字幕宽度 - ) - - # 使用 SubtitlesClip,但明确指定 UTF-8 编码 - subtitles = SubtitlesClip( - subtitle_path, - subtitle_generator, - encoding='utf-8' # 明确指定使用 UTF-8 编码 - ) - - # 添加字幕到视频 - video_with_subtitles = CompositeVideoClip([ - video_clip, - subtitles.set_position(pos) - ]) - - return video_with_subtitles - - except Exception as e: - logger.error(f"添加字幕时出错: {str(e)}\n{traceback.format_exc()}") - # 如果添加字幕失败,返回原始视频 - return video_clip - - if __name__ == "__main__": - # combined_video_path = "../../storage/tasks/12312312/com123.mp4" - # - # video_paths = ['../../storage/cache_videos/vid-00_00-00_03.mp4', - # '../../storage/cache_videos/vid-00_03-00_07.mp4', - # '../../storage/cache_videos/vid-00_12-00_17.mp4', - # '../../storage/cache_videos/vid-00_26-00_31.mp4'] - # video_ost_list = [False, True, False, True] - # list_script = [ - # { - # "picture": "夜晚,一个小孩在树林里奔跑,后面有人拿着火把在追赶", - # "timestamp": "00:00-00:03", - # "narration": "夜风高的树林,一个小孩在拼命奔跑,后面的人穷追不舍!", - # "OST": False, - # "new_timestamp": "00:00-00:03" - # }, - # { - # "picture": "追赶的人命令抓住小孩", - # "timestamp": "00:03-00:07", - # "narration": "原声播放1", - # "OST": True, - # "new_timestamp": "00:03-00:07" - # }, - # { - # "picture": "小孩躲在草丛里,黑衣人用脚踢了踢他", - # "timestamp": "00:12-00:17", - # "narration": "小孩脱下外套,跑进树林, 一路奔跑,直到第二天清晨", - # "OST": False, - # "new_timestamp": "00:07-00:12" - # }, - # { - # "picture": "小孩跑到车前,慌慌张张地对女人说有人要杀他", - # "timestamp": "00:26-00:31", - # "narration": "原声播放2", - # "OST": True, - # "new_timestamp": "00:12-00:17" - # } - # ] + combined_video_path = "../../storage/tasks/123/combined.mp4" + + video_paths = ['../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-10_000-00-00-43_039.mp4', + '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-00-45_439-00-01-01_600.mp4', + '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-07_920-00-01-25_719.mp4', + '../../storage/temp/clip_video/0b545e689a182a91af2163c7c0ca7ca3/vid-00-01-36_959-00-01-53_719.mp4'] + video_ost_list = [2, 2, 2, 2] + list_script = [ + { + "timestamp": "00:10-00:43", + "picture": "好的,以下是视频画面的客观描述:\n\n视频显示一个男人在一个树木繁茂的地区,靠近一个泥土斜坡他穿着一件深色T恤、卡其色长裤和登山靴。他背着一个军绿色背包,里面似乎装有头和其他工具。\n\n第一个镜头显示该男子从远处走近斜坡,背对着镜头。下一个镜头特写显示了的背包,一个镐头从背包中伸出来。下一个镜头显示该男子用镐头敲打斜坡。下一个镜头是该男子脚上的特写镜头,他穿着登山靴,正站在泥土斜坡上。最后一个镜显示该男子在斜坡上,仔细地拨开树根和泥土。周围的环境是树木繁茂的,阳光透过树叶照射下来。土壤是浅棕色的,斜坡上有许多树根和植被。", + "narration": "(接上文)好吧,今天我们的男主角,背着一个看似随时要发射军绿色背包,竟然化身“泥土探险家”,在斜坡上挥舞着镐头!他这是准备挖宝还是给树根做个“美容”?阳光洒下来,简直是自然界的聚光灯,仿佛在说:“快来看看,这位勇士要挑战泥土极限!”我只能默默想,如果树根能说话,它们一定会喊:“别打我,我还有家人!”这就是生活,总有些搞笑的瞬间等着我们去发现!", + "OST": 2, + "new_timestamp": "00:00:00,000-00:00:33,000" + }, + { + "timestamp": "00:45-01:01", + "picture": "好的以下是视频画面的客观描述:\n\n视频显示了一个人在森林里挖掘。\n\n第一个镜头是地面特写,显示出松散的泥土、碎石和落叶。光线照在部分区域。\n\n第二个镜头中,一模糊不清的蹲一个树根旁挖掘,一个橄榄绿色的背包放在地上。树根缠绕着常春藤。\n\n第三个镜头显示该人在一个更开阔的区域挖掘,那里有一些树根,以及部分倒的树干。他起来像是在挖掘一个较大的坑。\n\n第四个镜头是特写镜头,显示该人用工具清理土坑的墙壁。\n\n第五个镜头是土坑内部的特写镜头,可以看到土质的纹理,有一些小树根和其它植被的残留物。", + "narration": "现在,这位勇敢的挖掘者就像个“现代版的土豆农夫”,在森林里开辟新天地。的目标是什么?挖出一个宝藏还一块“树根披萨”?小心哦,别让树根追着你喊:“不要挖我,我也是有故事的!”", + "OST": 2, + "new_timestamp": "00:00:33,000-00:00:49,000" + }, + { + "timestamp": "01:07-01:25", + "picture": "好,以下是视频画面的客观描述:\n\n画面1:特写镜头,显示出一丛带有水珠的深绿色灌木叶片。叶片呈椭圆形,边缘光滑。背景是树根和泥土。\n\n画面2:一个留着胡子的男人正在一个森林中土坑里挖掘。他穿着黑色T恤和卡其色裤子,跪在地上,用具挖掘泥土。周围环绕着树木、树根和灌木。一个倒下的树干横跨土坑上方。\n\n画面3:同一个男人坐在他刚才挖的坑的边缘,看着前方。他的表情似乎略带沉思。背景与画面2相同。\n\n画面4:一个广角镜头显示出他挖出的坑。这是一个不规则形状的土坑,在树木繁茂的斜坡上。土壤呈深棕色,可见树根。\n\n画面5:同一个男人跪在地上,用一把小斧头砍一根木头。他穿着与前几个画面相同的衣服。地面上覆盖着落叶。周围是树木和灌木。", + "narration": "“哎呀,这片灌木叶子滴水如雨,感觉像是大自然的洗发水广告!但我这位‘挖宝达人’似乎更适合拍个‘森林里的单身狗’真人秀。等会儿,我要给树根唱首歌,听说它们爱音乐!”", + "OST": 2, + "new_timestamp": "00:00:49,000-00:01:07,000" + }, + { + "timestamp": "01:36-01:53", + "picture": "好的,以下是视频画面内容的客观描述:\n\n视频包含三个镜头:\n\n**镜头一:**个小型、浅水池塘,位于树林中。池塘的水看起来浑浊,呈绿褐色。池塘周围遍布泥土和落叶。多根树枝和树干横跨池塘,部分浸没在水中。周围的植被茂密,主要是深色树木和灌木。\n\n**镜头二:**距拍摄树深处,阳光透过树叶洒落在植被上。镜头中可见粗大的树干、树枝和各种绿叶植物。部分树枝似乎被砍断,切口可见。\n\n**镜头三:**近距离特写镜头,聚焦在树枝和绿叶上。叶片呈圆形,颜色为鲜绿色,有些叶片上有缺损。树枝颜色较深,呈现深褐色。背景是模糊的树林。\n", + "narration": "“好吧,看来我们的‘挖宝达人’终于找到了一‘宝藏’——一个色泽如同绿豆汤的池塘!我敢打赌,这里不仅是小鱼儿的游乐场更是树枝们的‘水疗中心’!下次来这里,我得带上浮潜装备!”", + "OST": 2, + "new_timestamp": "00:01:07,000-00:01:24,000" + } + ] + # 合并子视频 # combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script) - # cfg = VideoClipParams() - # cfg.video_aspect = VideoAspect.portrait - # cfg.font_name = "STHeitiMedium.ttc" - # cfg.font_size = 60 - # cfg.stroke_color = "#000000" - # cfg.stroke_width = 1.5 - # cfg.text_fore_color = "#FFFFFF" - # cfg.text_background_color = "transparent" - # cfg.bgm_type = "random" - # cfg.bgm_file = "" - # cfg.bgm_volume = 1.0 - # cfg.subtitle_enabled = True - # cfg.subtitle_position = "bottom" - # cfg.n_threads = 2 - # cfg.paragraph_number = 1 - # - # cfg.voice_volume = 1.0 + cfg = VideoClipParams() + cfg.video_aspect = VideoAspect.portrait + cfg.font_name = "STHeitiMedium.ttc" + cfg.font_size = 60 + cfg.stroke_color = "#000000" + cfg.stroke_width = 1.5 + cfg.text_fore_color = "#FFFFFF" + cfg.text_background_color = "transparent" + cfg.bgm_type = "random" + cfg.bgm_file = "" + cfg.bgm_volume = 1.0 + cfg.subtitle_enabled = True + cfg.subtitle_position = "bottom" + cfg.n_threads = 2 + cfg.video_volume = 1 - # generate_video(video_path=video_file, - # audio_path=audio_file, - # subtitle_path=subtitle_file, - # output_file=output_file, - # params=cfg - # ) - # - # video_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/combined-1.mp4" - # - # audio_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3" - # - # subtitle_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa\subtitle.srt" - # - # output_file = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/final-123.mp4" - # - # generate_video_v2(video_path=video_path, - # audio_path=audio_path, - # subtitle_path=subtitle_path, - # output_file=output_file, - # params=cfg - # ) + cfg.voice_volume = 1.0 - # 合并视频 - video_list = [ - './storage/cache_videos/vid-01_03-01_50.mp4', - './storage/cache_videos/vid-01_55-02_29.mp4', - './storage/cache_videos/vid-03_24-04_04.mp4', - './storage/cache_videos/vid-04_50-05_28.mp4' - ] + video_path = "../../storage/tasks/123/combined.mp4" + audio_path = "../../storage/tasks/123/final_audio.mp3" + subtitle_path = "../../storage/tasks/123/subtitle.srt" + output_file = "../../storage/tasks/123/final-123.mp4" + generate_video_v2(video_path=video_path, + audio_path=audio_path, + subtitle_path=subtitle_path, + output_file=output_file, + params=cfg + ) diff --git a/requirements.txt b/requirements.txt index 3024e71..0c864ca 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ requests~=2.31.0 -moviepy~=2.0.0.dev2 +moviepy==2.0.0.dev2 faster-whisper~=1.0.1 edge_tts~=6.1.15 uvicorn~=0.27.1 @@ -26,7 +26,7 @@ psutil>=5.9.0 opencv-python~=4.10.0.84 scikit-learn~=1.5.2 google-generativeai~=0.8.3 -Pillow>=11.0.0 +pillow~=10.3.0 python-dotenv~=1.0.1 openai~=1.53.0 tqdm>=4.66.6