优化 mac 中合并视频的逻辑;

下一步优化 ost 新增状态
- 0: 只播放解说音频
- 1: 只播放视频原声
- 2: 播放解说同时保留原声
This commit is contained in:
linyq 2024-11-05 17:32:03 +08:00
parent 6c341388f7
commit 04c4406e24
4 changed files with 271 additions and 307 deletions

View File

@ -1,6 +1,7 @@
import os
import subprocess
import random
import traceback
from urllib.parse import urlencode
import requests
@ -327,7 +328,7 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
if progress_callback:
progress_callback(index + 1, total_items)
except Exception as e:
logger.error(f"视频裁剪失败: {utils.to_json(item)} => {str(e)}")
logger.error(f"视频裁剪失败: {utils.to_json(item)} =>\n{str(traceback.format_exc())}")
return {}
logger.success(f"裁剪 {len(video_paths)} videos")
return video_paths

View File

@ -9,6 +9,7 @@ from loguru import logger
from moviepy.editor import *
from moviepy.video.tools.subtitles import SubtitlesClip
from PIL import ImageFont
from contextlib import contextmanager
from app.models import const
from app.models.schema import MaterialInfo, VideoAspect, VideoConcatMode, VideoParams, VideoClipParams
@ -202,106 +203,13 @@ def wrap_text(text, max_width, font="Arial", fontsize=60):
return result, height
def generate_video(
video_path: str,
audio_path: str,
subtitle_path: str,
output_file: str,
params: Union[VideoParams, VideoClipParams],
):
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
logger.info(f"start, video size: {video_width} x {video_height}")
logger.info(f" ① video: {video_path}")
logger.info(f" ② audio: {audio_path}")
logger.info(f" ③ subtitle: {subtitle_path}")
logger.info(f" ④ output: {output_file}")
# 写入与输出文件相同的目录
output_dir = os.path.dirname(output_file)
font_path = ""
if params.subtitle_enabled:
if not params.font_name:
params.font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), params.font_name)
if os.name == "nt":
font_path = font_path.replace("\\", "/")
logger.info(f"using font: {font_path}")
def create_text_clip(subtitle_item):
phrase = subtitle_item[1]
max_width = video_width * 0.9
wrapped_txt, txt_height = wrap_text(
phrase, max_width=max_width, font=font_path, fontsize=params.font_size
)
_clip = TextClip(
wrapped_txt,
font=font_path,
fontsize=params.font_size,
color=params.text_fore_color,
bg_color=params.text_background_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
print_cmd=False,
)
duration = subtitle_item[0][1] - subtitle_item[0][0]
_clip = _clip.set_start(subtitle_item[0][0])
_clip = _clip.set_end(subtitle_item[0][1])
_clip = _clip.set_duration(duration)
if params.subtitle_position == "bottom":
_clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
elif params.subtitle_position == "top":
_clip = _clip.set_position(("center", video_height * 0.05))
elif params.subtitle_position == "custom":
# 确保字幕完全在屏幕内
margin = 10 # 额外的边距,单位为像素
max_y = video_height - _clip.h - margin
min_y = margin
custom_y = (video_height - _clip.h) * (params.custom_position / 100)
custom_y = max(min_y, min(custom_y, max_y)) # 限制 y 值在有效范围内
_clip = _clip.set_position(("center", custom_y))
else: # center
_clip = _clip.set_position(("center", "center"))
return _clip
video_clip = VideoFileClip(video_path)
audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume)
if subtitle_path and os.path.exists(subtitle_path):
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
text_clips = []
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
text_clips.append(clip)
video_clip = CompositeVideoClip([video_clip, *text_clips])
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_file:
try:
bgm_clip = (
AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
)
bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration)
audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
except Exception as e:
logger.error(f"failed to add bgm: {str(e)}")
video_clip = video_clip.set_audio(audio_clip)
video_clip.write_videofile(
output_file,
audio_codec="aac",
temp_audiofile_path=output_dir,
threads=params.n_threads,
logger=None,
fps=30,
)
video_clip.close()
del video_clip
logger.success(""
"completed")
@contextmanager
def manage_clip(clip):
try:
yield clip
finally:
clip.close()
del clip
def generate_video_v2(
@ -310,6 +218,7 @@ def generate_video_v2(
subtitle_path: str,
output_file: str,
params: Union[VideoParams, VideoClipParams],
progress_callback=None,
):
"""
合并所有素材
@ -319,146 +228,163 @@ def generate_video_v2(
subtitle_path: 字幕文件路径
output_file: 输出文件路径
params: 视频参数
progress_callback: 进度回调函数接收 0-100 的进度值
Returns:
"""
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
total_steps = 4 # 总步数
current_step = 0
def update_progress(step_name):
nonlocal current_step
current_step += 1
if progress_callback:
progress_callback(int(current_step * 100 / total_steps))
logger.info(f"完成步骤: {step_name}")
logger.info(f"开始,视频尺寸: {video_width} x {video_height}")
logger.info(f" ① 视频: {video_path}")
logger.info(f" ② 音频: {audio_path}")
logger.info(f" ③ 字幕: {subtitle_path}")
logger.info(f" ④ 输出: {output_file}")
try:
validate_params(video_path, audio_path, output_file, params)
with manage_clip(VideoFileClip(video_path)) as video_clip:
aspect = VideoAspect(params.video_aspect)
video_width, video_height = aspect.to_resolution()
# <20><>入与输出文件相同的目录
output_dir = os.path.dirname(output_file)
logger.info(f"开始,视频尺寸: {video_width} x {video_height}")
logger.info(f" ① 视频: {video_path}")
logger.info(f" ② 音频: {audio_path}")
logger.info(f" ③ 字幕: {subtitle_path}")
logger.info(f" ④ 输出: {output_file}")
# 字体设置部分保持不变
font_path = ""
if params.subtitle_enabled:
if not params.font_name:
params.font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), params.font_name)
if os.name == "nt":
font_path = font_path.replace("\\", "/")
logger.info(f"使用字体: {font_path}")
output_dir = os.path.dirname(output_file)
update_progress("初始化完成")
# create_text_clip 函数保持不变
def create_text_clip(subtitle_item):
phrase = subtitle_item[1]
max_width = video_width * 0.9
wrapped_txt, txt_height = wrap_text(
phrase, max_width=max_width, font=font_path, fontsize=params.font_size
)
_clip = TextClip(
wrapped_txt,
font=font_path,
fontsize=params.font_size,
color=params.text_fore_color,
bg_color=params.text_background_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
print_cmd=False,
)
duration = subtitle_item[0][1] - subtitle_item[0][0]
_clip = _clip.set_start(subtitle_item[0][0])
_clip = _clip.set_end(subtitle_item[0][1])
_clip = _clip.set_duration(duration)
if params.subtitle_position == "bottom":
_clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
elif params.subtitle_position == "top":
_clip = _clip.set_position(("center", video_height * 0.05))
elif params.subtitle_position == "custom":
# 确保字幕完全在屏幕内
margin = 10 # 额外的边距,单位为像素
max_y = video_height - _clip.h - margin
min_y = margin
custom_y = (video_height - _clip.h) * (params.custom_position / 100)
custom_y = max(min_y, min(custom_y, max_y)) # 限制 y 值在有效范围内
_clip = _clip.set_position(("center", custom_y))
else: # center
_clip = _clip.set_position(("center", "center"))
return _clip
# 字体设置
font_path = ""
if params.subtitle_enabled:
if not params.font_name:
params.font_name = "STHeitiMedium.ttc"
font_path = os.path.join(utils.font_dir(), params.font_name)
if os.name == "nt":
font_path = font_path.replace("\\", "/")
logger.info(f"使用字体: {font_path}")
video_clip = VideoFileClip(video_path)
original_audio = video_clip.audio # 保存原始视频的音轨
video_duration = video_clip.duration
def create_text_clip(subtitle_item):
phrase = subtitle_item[1]
max_width = video_width * 0.9
wrapped_txt, txt_height = wrap_text(
phrase, max_width=max_width, font=font_path, fontsize=params.font_size
)
_clip = TextClip(
wrapped_txt,
font=font_path,
fontsize=params.font_size,
color=params.text_fore_color,
bg_color=params.text_background_color,
stroke_color=params.stroke_color,
stroke_width=params.stroke_width,
print_cmd=False,
)
duration = subtitle_item[0][1] - subtitle_item[0][0]
_clip = _clip.set_start(subtitle_item[0][0])
_clip = _clip.set_end(subtitle_item[0][1])
_clip = _clip.set_duration(duration)
if params.subtitle_position == "bottom":
_clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
elif params.subtitle_position == "top":
_clip = _clip.set_position(("center", video_height * 0.05))
elif params.subtitle_position == "custom":
margin = 10
max_y = video_height - _clip.h - margin
min_y = margin
custom_y = (video_height - _clip.h) * (params.custom_position / 100)
custom_y = max(min_y, min(custom_y, max_y))
_clip = _clip.set_position(("center", custom_y))
else: # center
_clip = _clip.set_position(("center", "center"))
return _clip
# 处理新的音频文件
new_audio = AudioFileClip(audio_path).volumex(params.voice_volume)
update_progress("字体设置完成")
# 合并音频轨道
# 处理音频
original_audio = video_clip.audio
video_duration = video_clip.duration
new_audio = AudioFileClip(audio_path)
final_audio = process_audio_tracks(original_audio, new_audio, params, video_duration)
update_progress("音频处理完成")
# 处理字幕
if subtitle_path and os.path.exists(subtitle_path):
video_clip = process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip)
update_progress("字幕处理完成")
# 合并音频和导出
video_clip = video_clip.set_audio(final_audio)
video_clip.write_videofile(
output_file,
audio_codec="aac",
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a"),
threads=params.n_threads,
logger=None,
fps=30,
)
except FileNotFoundError as e:
logger.error(f"文件不存在: {str(e)}")
raise
except Exception as e:
logger.error(f"视频生成失败: {str(e)}")
raise
finally:
logger.success("完成")
def process_audio_tracks(original_audio, new_audio, params, video_duration):
"""处理所有音轨"""
audio_tracks = []
# 检查原始视频音轨
if original_audio is not None:
audio_tracks.append(original_audio)
# 添加新的音频
new_audio = new_audio.volumex(params.voice_volume)
audio_tracks.append(new_audio)
# 背景音乐处理部分
# 处理背景音乐
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
if bgm_file:
try:
bgm_clip = (
AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
)
bgm_clip = AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
bgm_clip = afx.audio_loop(bgm_clip, duration=video_duration)
audio_tracks.append(bgm_clip)
except Exception as e:
logger.error(f"添加背景音乐失败: {str(e)}")
return CompositeAudioClip(audio_tracks) if audio_tracks else new_audio
# 确保至少有一个有效的音轨
if not audio_tracks:
logger.warning("没有有效的音轨可用")
final_audio = new_audio
else:
# 合并所有音频轨道
final_audio = CompositeAudioClip(audio_tracks)
# 字幕处理部分
if subtitle_path and os.path.exists(subtitle_path):
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
text_clips = []
def process_subtitles(subtitle_path, video_clip, video_duration, create_text_clip):
"""处理字幕"""
if not (subtitle_path and os.path.exists(subtitle_path)):
return video_clip
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
# 确保字幕的开始时间不早于视频开始
start_time = max(clip.start, 0)
# 如果字幕的开始时间晚于视频结束时间,则跳过此字幕
if start_time >= video_duration:
continue
# 调整字幕的结束时间,但不要超过视频长度
end_time = min(clip.end, video_duration)
# 调整字幕的时间范围
clip = clip.set_start(start_time).set_end(end_time)
text_clips.append(clip)
sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
text_clips = []
for item in sub.subtitles:
clip = create_text_clip(subtitle_item=item)
logger.info(f"处理了 {len(text_clips)} 段字幕")
# 创建一个新的视频剪辑,包含所有字幕
video_clip = CompositeVideoClip([video_clip, *text_clips])
video_clip = video_clip.set_audio(final_audio)
video_clip.write_videofile(
output_file,
audio_codec="aac",
temp_audiofile_path=output_dir,
threads=params.n_threads,
logger=None,
fps=30,
)
video_clip.close()
del video_clip
logger.success("完成")
# 时间范围调整
start_time = max(clip.start, 0)
if start_time >= video_duration:
continue
end_time = min(clip.end, video_duration)
clip = clip.set_start(start_time).set_end(end_time)
text_clips.append(clip)
logger.info(f"处理了 {len(text_clips)} 段字幕")
return CompositeVideoClip([video_clip, *text_clips])
def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
@ -526,86 +452,114 @@ def combine_clip_videos(combined_video_path: str,
threads: 线程数
Returns:
str: 合并后的视频路径
"""
from app.utils.utils import calculate_total_duration
audio_duration = calculate_total_duration(list_script)
logger.info(f"音频的最大持续时间: {audio_duration} s")
# 每个剪辑所需的持续时间
req_dur = audio_duration / len(video_paths)
# req_dur = max_clip_duration
# logger.info(f"每个剪辑的最大长度为 {req_dur} s")
output_dir = os.path.dirname(combined_video_path)
aspect = VideoAspect(video_aspect)
video_width, video_height = aspect.to_resolution()
clips = []
video_duration = 0
# 一遍又一遍地添加下载的剪辑,直到达到音频的持续时间 max_duration
# while video_duration < audio_duration:
for video_path, video_ost in zip(video_paths, video_ost_list):
cache_video_path = utils.root_dir()
clip = VideoFileClip(os.path.join(cache_video_path, video_path))
# 通过 ost 字段判断是否播放原声
if not video_ost:
clip = clip.without_audio()
# # 检查剪辑是否比剩余音频长
# if (audio_duration - video_duration) < clip.duration:
# clip = clip.subclip(0, (audio_duration - video_duration))
# # 仅当计算出的剪辑长度 req_dur 短于实际剪辑时,才缩短剪辑以防止静止图像
# elif req_dur < clip.duration:
# clip = clip.subclip(0, req_dur)
clip = clip.set_fps(30)
try:
# 直接使用视频路径不再拼接root_dir
clip = VideoFileClip(video_path)
# 通过 ost 字段判断是否播放原声
if not video_ost:
clip = clip.without_audio()
clip = clip.set_fps(30)
# 并非所有视频的大小都相同,因此我们需要调整它们的大小
clip_w, clip_h = clip.size
if clip_w != video_width or clip_h != video_height:
clip_ratio = clip.w / clip.h
video_ratio = video_width / video_height
# 处理视频尺寸
clip_w, clip_h = clip.size
if clip_w != video_width or clip_h != video_height:
clip = resize_video_with_padding(
clip,
target_width=video_width,
target_height=video_height
)
logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}")
if clip_ratio == video_ratio:
# 等比例缩放
clip = clip.resize((video_width, video_height))
else:
# 等比缩放视频
if clip_ratio > video_ratio:
# 按照目标宽度等比缩放
scale_factor = video_width / clip_w
else:
# 按照目标高度等比缩放
scale_factor = video_height / clip_h
clips.append(clip)
except Exception as e:
logger.error(f"处理视频 {video_path} 时出错: {str(e)}")
continue
new_width = int(clip_w * scale_factor)
new_height = int(clip_h * scale_factor)
clip_resized = clip.resize(newsize=(new_width, new_height))
if not clips:
raise ValueError("没有有效的视频片段可以合并")
background = ColorClip(size=(video_width, video_height), color=(0, 0, 0))
clip = CompositeVideoClip([
background.set_duration(clip.duration),
clip_resized.set_position("center")
])
try:
video_clip = concatenate_videoclips(clips)
video_clip = video_clip.set_fps(30)
logger.info("开始合并视频...")
video_clip.write_videofile(
filename=combined_video_path,
threads=threads,
logger=None,
audio_codec="aac",
fps=30,
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
)
finally:
# 确保资源被正确释放
video_clip.close()
for clip in clips:
clip.close()
logger.info(f"将视频 {video_path} 大小调整为 {video_width} x {video_height}, 剪辑尺寸: {clip_w} x {clip_h}")
clips.append(clip)
video_duration += clip.duration
video_clip = concatenate_videoclips(clips)
video_clip = video_clip.set_fps(30)
logger.info(f"合并视频中...")
video_clip.write_videofile(filename=combined_video_path,
threads=threads,
logger=None,
temp_audiofile_path=output_dir,
audio_codec="aac",
fps=30,
)
video_clip.close()
logger.success(f"completed")
logger.success("视频合并完成")
return combined_video_path
def resize_video_with_padding(clip, target_width: int, target_height: int):
"""辅助函数:调整视频尺寸并添加黑边"""
clip_ratio = clip.w / clip.h
target_ratio = target_width / target_height
if clip_ratio == target_ratio:
return clip.resize((target_width, target_height))
if clip_ratio > target_ratio:
scale_factor = target_width / clip.w
else:
scale_factor = target_height / clip.h
new_width = int(clip.w * scale_factor)
new_height = int(clip.h * scale_factor)
clip_resized = clip.resize(newsize=(new_width, new_height))
background = ColorClip(
size=(target_width, target_height),
color=(0, 0, 0)
).set_duration(clip.duration)
return CompositeVideoClip([
background,
clip_resized.set_position("center")
])
def validate_params(video_path, audio_path, output_file, params):
"""验证输入参数"""
if not os.path.exists(video_path):
raise FileNotFoundError(f"视频文件不存在: {video_path}")
if not os.path.exists(audio_path):
raise FileNotFoundError(f"音频文件不存在: {audio_path}")
output_dir = os.path.dirname(output_file)
if not os.path.exists(output_dir):
raise FileNotFoundError(f"输出目录不存在: {output_dir}")
if not hasattr(params, 'video_aspect'):
raise ValueError("params 缺少必要参数 video_aspect")
if __name__ == "__main__":
# combined_video_path = "../../storage/tasks/12312312/com123.mp4"
#
@ -646,23 +600,23 @@ if __name__ == "__main__":
# ]
# combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script)
cfg = VideoClipParams()
cfg.video_aspect = VideoAspect.portrait
cfg.font_name = "STHeitiMedium.ttc"
cfg.font_size = 60
cfg.stroke_color = "#000000"
cfg.stroke_width = 1.5
cfg.text_fore_color = "#FFFFFF"
cfg.text_background_color = "transparent"
cfg.bgm_type = "random"
cfg.bgm_file = ""
cfg.bgm_volume = 1.0
cfg.subtitle_enabled = True
cfg.subtitle_position = "bottom"
cfg.n_threads = 2
cfg.paragraph_number = 1
cfg.voice_volume = 1.0
# cfg = VideoClipParams()
# cfg.video_aspect = VideoAspect.portrait
# cfg.font_name = "STHeitiMedium.ttc"
# cfg.font_size = 60
# cfg.stroke_color = "#000000"
# cfg.stroke_width = 1.5
# cfg.text_fore_color = "#FFFFFF"
# cfg.text_background_color = "transparent"
# cfg.bgm_type = "random"
# cfg.bgm_file = ""
# cfg.bgm_volume = 1.0
# cfg.subtitle_enabled = True
# cfg.subtitle_position = "bottom"
# cfg.n_threads = 2
# cfg.paragraph_number = 1
#
# cfg.voice_volume = 1.0
# generate_video(video_path=video_file,
# audio_path=audio_file,
@ -670,18 +624,27 @@ if __name__ == "__main__":
# output_file=output_file,
# params=cfg
# )
#
# video_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/combined-1.mp4"
#
# audio_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3"
#
# subtitle_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa\subtitle.srt"
#
# output_file = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/final-123.mp4"
#
# generate_video_v2(video_path=video_path,
# audio_path=audio_path,
# subtitle_path=subtitle_path,
# output_file=output_file,
# params=cfg
# )
video_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/combined-1.mp4"
# 合并视频
video_list = [
'./storage/cache_videos/vid-01_03-01_50.mp4',
'./storage/cache_videos/vid-01_55-02_29.mp4',
'./storage/cache_videos/vid-03_24-04_04.mp4',
'./storage/cache_videos/vid-04_50-05_28.mp4'
]
audio_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3"
subtitle_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa\subtitle.srt"
output_file = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/final-123.mp4"
generate_video_v2(video_path=video_path,
audio_path=audio_path,
subtitle_path=subtitle_path,
output_file=output_file,
params=cfg
)

View File

@ -423,5 +423,5 @@ def cut_video(params, progress_callback=None):
return task_id, subclip_videos
except Exception as e:
logger.error(f"视频裁剪过程中发生错误: {traceback.format_exc()}")
logger.error(f"视频裁剪过程中发生错误: \n{traceback.format_exc()}")
raise

View File

@ -551,7 +551,7 @@ with middle_panel:
# 试听语言合成
if st.button(tr("Play Voice")):
play_content = "这是一段试听语言"
play_content = "感谢关注 NarratoAI有任何问题或建议可以关注微信公众号求助或讨论"
if not play_content:
play_content = params.video_script
if not play_content:
@ -565,7 +565,7 @@ with middle_panel:
voice_rate=params.voice_rate,
voice_file=audio_file,
)
# if the voice file generation failed, try again with a default content.
# 如果语音文件生成失败,请使用默认内容重试。
if not sub_maker:
play_content = "This is a example voice. if you hear this, the voice synthesis failed with the original content."
sub_maker = voice.tts(