From 2bc94651a26094eb624c8f272cc126d86a0350d5 Mon Sep 17 00:00:00 2001
From: linyq <linyqemail@163.com>
Date: Thu, 19 Sep 2024 18:23:54 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9C=AA=E5=AE=8C=E6=88=90=20generate=5Fvideo?=
 =?UTF-8?q?=5Fv2=20=E5=8A=9F=E8=83=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 app/services/task.py  |   2 +-
 app/services/video.py | 350 ++++++++++++++++++++++++++++--------------
 app/services/voice.py |   2 +-
 app/utils/utils.py    |  92 ++---------
 4 files changed, 253 insertions(+), 193 deletions(-)

diff --git a/app/services/task.py b/app/services/task.py
index b1895d3..e58f4b4 100644
--- a/app/services/task.py
+++ b/app/services/task.py
@@ -428,7 +428,7 @@ def start_subclip(task_id, params: VideoClipParams, subclip_path_videos):
             combined_video_path=combined_video_path,
             video_paths=subclip_videos,
             video_ost_list=video_ost,
-            audio_file=audio_file,
+            list_script=list_script,
             video_aspect=params.video_aspect,
             threads=n_threads
         )
diff --git a/app/services/video.py b/app/services/video.py
index d5cc18f..9924923 100644
--- a/app/services/video.py
+++ b/app/services/video.py
@@ -1,3 +1,4 @@
+import re
 import glob
 import random
 from typing import List
@@ -216,9 +217,7 @@ def generate_video(
     logger.info(f"  ③ subtitle: {subtitle_path}")
     logger.info(f"  ④ output: {output_file}")
 
-    # https://github.com/harry0703/NarratoAI/issues/217
-    # PermissionError: [WinError 32] The process cannot access the file because it is being used by another process: 'final-1.mp4.tempTEMP_MPY_wvf_snd.mp3'
-    # write into the same directory as the output file
+    # 写入与输出文件相同的目录
     output_dir = os.path.dirname(output_file)
 
     font_path = ""
@@ -303,6 +302,133 @@ def generate_video(
     logger.success("completed")
 
 
+def generate_video_v2(
+        video_path: str,
+        audio_paths: List[str],
+        subtitle_path: str,
+        output_file: str,
+        params: Union[VideoParams, VideoClipParams],
+):
+    aspect = VideoAspect(params.video_aspect)
+    video_width, video_height = aspect.to_resolution()
+
+    logger.info(f"开始，视频尺寸: {video_width} x {video_height}")
+    logger.info(f"  ① 视频: {video_path}")
+    logger.info(f"  ② 音频文件数量: {len(audio_paths)}")
+    logger.info(f"  ③ 字幕: {subtitle_path}")
+    logger.info(f"  ④ 输出: {output_file}")
+
+    # 写入与输出文件相同的目录
+    output_dir = os.path.dirname(output_file)
+
+    # 字体设置部分保持不变
+    font_path = ""
+    if params.subtitle_enabled:
+        if not params.font_name:
+            params.font_name = "STHeitiMedium.ttc"
+        font_path = os.path.join(utils.font_dir(), params.font_name)
+        if os.name == "nt":
+            font_path = font_path.replace("\\", "/")
+        logger.info(f"使用字体: {font_path}")
+
+    # create_text_clip 函数保持不变
+    def create_text_clip(subtitle_item):
+        phrase = subtitle_item[1]
+        max_width = video_width * 0.9
+        wrapped_txt, txt_height = wrap_text(
+            phrase, max_width=max_width, font=font_path, fontsize=params.font_size
+        )
+        _clip = TextClip(
+            wrapped_txt,
+            font=font_path,
+            fontsize=params.font_size,
+            color=params.text_fore_color,
+            bg_color=params.text_background_color,
+            stroke_color=params.stroke_color,
+            stroke_width=params.stroke_width,
+            print_cmd=False,
+        )
+        duration = subtitle_item[0][1] - subtitle_item[0][0]
+        _clip = _clip.set_start(subtitle_item[0][0])
+        _clip = _clip.set_end(subtitle_item[0][1])
+        _clip = _clip.set_duration(duration)
+        if params.subtitle_position == "bottom":
+            _clip = _clip.set_position(("center", video_height * 0.95 - _clip.h))
+        elif params.subtitle_position == "top":
+            _clip = _clip.set_position(("center", video_height * 0.05))
+        elif params.subtitle_position == "custom":
+            # 确保字幕完全在屏幕内
+            margin = 10  # 额外的边距，单位为像素
+            max_y = video_height - _clip.h - margin
+            min_y = margin
+            custom_y = (video_height - _clip.h) * (params.custom_position / 100)
+            custom_y = max(min_y, min(custom_y, max_y))  # 限制 y 值在有效范围内
+            _clip = _clip.set_position(("center", custom_y))
+        else:  # center
+            _clip = _clip.set_position(("center", "center"))
+        return _clip
+
+    video_clip = VideoFileClip(video_path)
+
+    # 处理多个音频文件
+    audio_clips = []
+    for audio_path in audio_paths:
+        # 从文件名中提取时间信息
+        match = re.search(r'audio_(\d{2}-\d{2}-\d{2}-\d{2})\.mp3', os.path.basename(audio_path))
+        if match:
+            time_str = match.group(1)
+            start, end = time_str.split('-')[:2], time_str.split('-')[2:]
+            start_time = sum(int(x) * 60 ** i for i, x in enumerate(reversed(start)))
+            end_time = sum(int(x) * 60 ** i for i, x in enumerate(reversed(end)))
+
+            audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume)
+            audio_clip = audio_clip.set_start(start_time).set_end(end_time)
+            audio_clips.append(audio_clip)
+        else:
+            logger.warning(f"无法从文件名解析时间信息: {audio_path}")
+
+    # 合并所有音频剪辑
+    if audio_clips:
+        audio_clip = CompositeAudioClip(audio_clips)
+    else:
+        logger.warning("没有有效的音频文件")
+        audio_clip = AudioClip(lambda t: 0, duration=video_clip.duration)
+
+    # 字幕处理部分保持不变
+    if subtitle_path and os.path.exists(subtitle_path):
+        sub = SubtitlesClip(subtitles=subtitle_path, encoding="utf-8")
+        text_clips = []
+        for item in sub.subtitles:
+            clip = create_text_clip(subtitle_item=item)
+            text_clips.append(clip)
+        video_clip = CompositeVideoClip([video_clip, *text_clips])
+
+    # 背景音乐处理部分保持不变
+    bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
+    if bgm_file:
+        try:
+            bgm_clip = (
+                AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
+            )
+            bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration)
+            audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
+        except Exception as e:
+            logger.error(f"添加背景音乐失败: {str(e)}")
+
+    video_clip = video_clip.set_audio(audio_clip)
+    video_clip.write_videofile(
+        output_file,
+        audio_codec="aac",
+        temp_audiofile_path=output_dir,
+        threads=params.n_threads or 2,
+        logger=None,
+        fps=30,
+    )
+    video_clip.close()
+    del video_clip
+    logger.success("完成")
+
+
 def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
     for material in materials:
         if not material.url:
@@ -352,8 +478,8 @@ def preprocess_video(materials: List[MaterialInfo], clip_duration=4):
 
 def combine_clip_videos(combined_video_path: str,
                         video_paths: List[str],
-                        video_ost_list: List[str],
-                        audio_file: str,
+                        video_ost_list: List[bool],
+                        list_script: list,
                         video_aspect: VideoAspect = VideoAspect.portrait,
                         threads: int = 2,
                         ) -> str:
@@ -369,8 +495,8 @@ def combine_clip_videos(combined_video_path: str,
     Returns:
 
     """
-    audio_clip = AudioFileClip(audio_file)
-    audio_duration = audio_clip.duration
+    from app.utils.utils import calculate_total_duration
+    audio_duration = calculate_total_duration(list_script)
     logger.info(f"音频的最大持续时间: {audio_duration} s")
     # 每个剪辑所需的持续时间
     req_dur = audio_duration / len(video_paths)
@@ -384,62 +510,52 @@ def combine_clip_videos(combined_video_path: str,
     clips = []
     video_duration = 0
     # 一遍又一遍地添加下载的剪辑，直到达到音频的持续时间 （max_duration）
-    while video_duration < audio_duration:
-        for video_path, video_ost in zip(video_paths, video_ost_list):
-            clip = VideoFileClip(video_path)
-            if video_ost:
-                clip = clip.set_audio(audio_clip)
+    # while video_duration < audio_duration:
+    for video_path, video_ost in zip(video_paths, video_ost_list):
+        clip = VideoFileClip(video_path)
+        # 通过 ost 字段判断是否播放原声
+        if not video_ost:
+            clip = clip.without_audio()
+        # # 检查剪辑是否比剩余音频长
+        # if (audio_duration - video_duration) < clip.duration:
+        #     clip = clip.subclip(0, (audio_duration - video_duration))
+        # # 仅当计算出的剪辑长度 （req_dur） 短于实际剪辑时，才缩短剪辑以防止静止图像
+        # elif req_dur < clip.duration:
+        #     clip = clip.subclip(0, req_dur)
+        clip = clip.set_fps(30)
+
+        # 并非所有视频的大小都相同，因此我们需要调整它们的大小
+        clip_w, clip_h = clip.size
+        if clip_w != video_width or clip_h != video_height:
+            clip_ratio = clip.w / clip.h
+            video_ratio = video_width / video_height
+
+            if clip_ratio == video_ratio:
+                # 等比例缩放
+                clip = clip.resize((video_width, video_height))
             else:
-                clip = clip.set_audio(audio_clip).without_audio()
-            # 检查剪辑是否比剩余音频长
-            if (audio_duration - video_duration) < clip.duration:
-                clip = clip.subclip(0, (audio_duration - video_duration))
-            # 仅当计算出的剪辑长度 （req_dur） 短于实际剪辑时，才缩短剪辑以防止静止图像
-            elif req_dur < clip.duration:
-                clip = clip.subclip(0, req_dur)
-            clip = clip.set_fps(30)
-
-            # 并非所有视频的大小都相同，因此我们需要调整它们的大小
-            clip_w, clip_h = clip.size
-            if clip_w != video_width or clip_h != video_height:
-                clip_ratio = clip.w / clip.h
-                video_ratio = video_width / video_height
-
-                if clip_ratio == video_ratio:
-                    # 等比例缩放
-                    clip = clip.resize((video_width, video_height))
+                # 等比缩放视频
+                if clip_ratio > video_ratio:
+                    # 按照目标宽度等比缩放
+                    scale_factor = video_width / clip_w
                 else:
-                    # 等比缩放视频
-                    if clip_ratio > video_ratio:
-                        # 按照目标宽度等比缩放
-                        scale_factor = video_width / clip_w
-                    else:
-                        # 按照目标高度等比缩放
-                        scale_factor = video_height / clip_h
+                    # 按照目标高度等比缩放
+                    scale_factor = video_height / clip_h
 
-                    new_width = int(clip_w * scale_factor)
-                    new_height = int(clip_h * scale_factor)
-                    clip_resized = clip.resize(newsize=(new_width, new_height))
+                new_width = int(clip_w * scale_factor)
+                new_height = int(clip_h * scale_factor)
+                clip_resized = clip.resize(newsize=(new_width, new_height))
 
-                    background = ColorClip(size=(video_width, video_height), color=(0, 0, 0))
-                    clip = CompositeVideoClip([
-                        background.set_duration(clip.duration),
-                        clip_resized.set_position("center")
-                    ])
+                background = ColorClip(size=(video_width, video_height), color=(0, 0, 0))
+                clip = CompositeVideoClip([
+                    background.set_duration(clip.duration),
+                    clip_resized.set_position("center")
+                ])
 
-                logger.info(f"将视频 {video_path} 大小调整为 {video_width} x {video_height}, 剪辑尺寸: {clip_w} x {clip_h}")
+            logger.info(f"将视频 {video_path} 大小调整为 {video_width} x {video_height}, 剪辑尺寸: {clip_w} x {clip_h}")
 
-            # TODO: 片段时长过长时，需要缩短，但暂时没有好的解决方案
-            # if clip.duration > 5:
-            #     ctime = utils.reduce_video_time(txt=video_script)
-            #     if clip.duration > (2 * ctime):
-            #         clip = clip.subclip(ctime, 2*ctime)
-            #     else:
-            #         clip = clip.subclip(0, ctime)
-            #     logger.info(f"视频 {video_path} 片段时长较长，将剪辑时长缩短至 {ctime} 秒")
-
-            clips.append(clip)
-            video_duration += clip.duration
+        clips.append(clip)
+        video_duration += clip.duration
 
     video_clip = concatenate_videoclips(clips)
     video_clip = video_clip.set_fps(30)
@@ -457,68 +573,78 @@ def combine_clip_videos(combined_video_path: str,
 
 
 if __name__ == "__main__":
-    from app.utils import utils
+    combined_video_path = "../../storage/tasks/12312312/com123.mp4"
 
-    suffix = "*.mp4"
-    song_dir = utils.video_dir()
-    files = glob.glob(os.path.join(song_dir, suffix))
+    video_paths = ['../../storage/cache_videos/vid-00_00-00_03.mp4',
+                   '../../storage/cache_videos/vid-00_03-00_07.mp4',
+                   '../../storage/cache_videos/vid-00_12-00_17.mp4',
+                   '../../storage/cache_videos/vid-00_26-00_31.mp4']
+    video_ost_list = [False, True, False, True]
+    list_script = [
+        {
+            "picture": "夜晚，一个小孩在树林里奔跑，后面有人拿着火把在追赶",
+            "timestamp": "00:00-00:03",
+            "narration": "夜黑风高的树林，一个小孩在拼命奔跑，后面的人穷追不舍！",
+            "OST": False
+        },
+        {
+            "picture": "追赶的人命令抓住小孩",
+            "timestamp": "00:03-00:07",
+            "narration": "原声播放1",
+            "OST": True
+        },
+        {
+            "picture": "小孩躲在草丛里，黑衣人用脚踢了踢他",
+            "timestamp": "00:12-00:17",
+            "narration": "小孩脱下外套，跑进树林, 一路奔跑，直到第二天清晨",
+            "OST": False
+        },
+        {
+            "picture": "小孩跑到车前，慌慌张张地对女人说有人要杀他",
+            "timestamp": "00:26-00:31",
+            "narration": "原声播放2",
+            "OST": True
+        }
+    ]
+    # combine_clip_videos(combined_video_path=combined_video_path, video_paths=video_paths, video_ost_list=video_ost_list, list_script=list_script)
 
-    print(files)
+    cfg = VideoClipParams()
+    cfg.video_aspect = VideoAspect.portrait
+    cfg.font_name = "STHeitiMedium.ttc"
+    cfg.font_size = 60
+    cfg.stroke_color = "#000000"
+    cfg.stroke_width = 1.5
+    cfg.text_fore_color = "#FFFFFF"
+    cfg.text_background_color = "transparent"
+    cfg.bgm_type = "random"
+    cfg.bgm_file = ""
+    cfg.bgm_volume = 1.0
+    cfg.subtitle_enabled = True
+    cfg.subtitle_position = "bottom"
+    cfg.n_threads = 2
+    cfg.paragraph_number = 1
 
-    # m = MaterialInfo()
-    # m.url = "/Users/harry/Downloads/IMG_2915.JPG"
-    # m.provider = "local"
-    # materials = preprocess_video([m], clip_duration=4)
-    # print(materials)
+    cfg.voice_volume = 1.0
 
-    # txt_en = "Here's your guide to travel hacks for budget-friendly adventures"
-    # txt_zh = "测试长字段这是您的旅行技巧指南帮助您进行预算友好的冒险"
-    # font = utils.resource_dir() + "/fonts/STHeitiMedium.ttc"
-    # for txt in [txt_en, txt_zh]:
-    #     t, h = wrap_text(text=txt, max_width=1000, font=font, fontsize=60)
-    #     print(t)
-    #
-    # task_id = "aa563149-a7ea-49c2-b39f-8c32cc225baf"
-    # task_dir = utils.task_dir(task_id)
-    # video_file = f"{task_dir}/combined-1.mp4"
-    # audio_file = f"{task_dir}/audio.mp3"
-    # subtitle_file = f"{task_dir}/subtitle.srt"
-    # output_file = f"{task_dir}/final.mp4"
-    #
-    # # video_paths = []
-    # # for file in os.listdir(utils.storage_dir("test")):
-    # #     if file.endswith(".mp4"):
-    # #         video_paths.append(os.path.join(utils.storage_dir("test"), file))
-    # #
-    # # combine_videos(combined_video_path=video_file,
-    # #                audio_file=audio_file,
-    # #                video_paths=video_paths,
-    # #                video_aspect=VideoAspect.portrait,
-    # #                video_concat_mode=VideoConcatMode.random,
-    # #                max_clip_duration=5,
-    # #                threads=2)
-    #
-    # cfg = VideoParams()
-    # cfg.video_aspect = VideoAspect.portrait
-    # cfg.font_name = "STHeitiMedium.ttc"
-    # cfg.font_size = 60
-    # cfg.stroke_color = "#000000"
-    # cfg.stroke_width = 1.5
-    # cfg.text_fore_color = "#FFFFFF"
-    # cfg.text_background_color = "transparent"
-    # cfg.bgm_type = "random"
-    # cfg.bgm_file = ""
-    # cfg.bgm_volume = 1.0
-    # cfg.subtitle_enabled = True
-    # cfg.subtitle_position = "bottom"
-    # cfg.n_threads = 2
-    # cfg.paragraph_number = 1
-    #
-    # cfg.voice_volume = 1.0
-    #
     # generate_video(video_path=video_file,
     #                audio_path=audio_file,
     #                subtitle_path=subtitle_file,
     #                output_file=output_file,
     #                params=cfg
     #                )
+
+    video_path = "../../storage/tasks/12312312/com123.mp4"
+
+    audio_paths = ['../../storage/tasks/12312312/audio_00-00-00-03.mp3',
+                   '../../storage/tasks/12312312/audio_00-12-00-17.mp3']
+
+    subtitle_path = "../../storage/tasks/12312312/subtitle_multiple.srt"
+
+    output_file = "../../storage/tasks/12312312/out123.mp4"
+
+    generate_video_v2(video_path=video_path,
+                       audio_paths=audio_paths,
+                       subtitle_path=subtitle_path,
+                       output_file=output_file,
+                       params=cfg
+                      )
diff --git a/app/services/voice.py b/app/services/voice.py
index e54eda9..20180ba 100644
--- a/app/services/voice.py
+++ b/app/services/voice.py
@@ -1354,7 +1354,7 @@ if __name__ == "__main__":
     voice_name = parse_voice_name(voice_name)
     print(voice_name)
 
-    with open("../../resource/scripts/2024-0913-040147.json", 'r', encoding='utf-8') as f:
+    with open("../../resource/scripts/test.json", 'r', encoding='utf-8') as f:
         data = json.load(f)
 
     audio_files, sub_maker_list = tts_multiple(task_id="12312312", list_script=data, voice_name=voice_name, voice_rate=1)
diff --git a/app/utils/utils.py b/app/utils/utils.py
index 728aed2..95d796b 100644
--- a/app/utils/utils.py
+++ b/app/utils/utils.py
@@ -7,6 +7,7 @@ from loguru import logger
 import json
 from uuid import uuid4
 import urllib3
+from datetime import datetime
 
 from app.models import const
 
@@ -313,82 +314,15 @@ def seconds_to_time(seconds: float) -> str:
     return f"{int(h):02d}:{int(m):02d}:{s:06.3f}"
 
 
-def load_locales(i18n_dir):
-    _locales = {}
-    for root, dirs, files in os.walk(i18n_dir):
-        for file in files:
-            if file.endswith(".json"):
-                lang = file.split(".")[0]
-                with open(os.path.join(root, file), "r", encoding="utf-8") as f:
-                    _locales[lang] = json.loads(f.read())
-    return _locales
-
-
-def parse_extension(filename):
-    return os.path.splitext(filename)[1].strip().lower().replace(".", "")
-
-
-def script_dir(sub_dir: str = ""):
-    d = resource_dir(f"scripts")
-    if sub_dir:
-        d = os.path.join(d, sub_dir)
-    if not os.path.exists(d):
-        os.makedirs(d)
-    return d
-
-
-def video_dir(sub_dir: str = ""):
-    d = resource_dir(f"videos")
-    if sub_dir:
-        d = os.path.join(d, sub_dir)
-    if not os.path.exists(d):
-        os.makedirs(d)
-    return d
-
-
-def split_timestamp(timestamp):
-    """
-    拆分时间戳
-    """
-    start, end = timestamp.split('-')
-    start_hour, start_minute = map(int, start.split(':'))
-    end_hour, end_minute = map(int, end.split(':'))
-
-    start_time = '00:{:02d}:{:02d}'.format(start_hour, start_minute)
-    end_time = '00:{:02d}:{:02d}'.format(end_hour, end_minute)
-
-    return start_time, end_time
-
-
-def reduce_video_time(txt: str, duration: float = 0.21531):
-    """
-    按照字数缩减视频时长，一个字耗时约 0.21531 s,
-    Returns:
-    """
-    # 返回结果四舍五入为整数
-    duration = len(txt) * duration
-    return int(duration)
-
-
-def get_current_country():
-    """
-    判断当前网络IP地址所在的国家
-    """
-    try:
-        # 使用ipapi.co的免费API获取IP地址信息
-        response = requests.get('https://ipapi.co/json/')
-        data = response.json()
-
-        # 获取国家名称
-        country = data.get('country_name')
-
-        if country:
-            logger.debug(f"当前网络IP地址位于：{country}")
-            return country
-        else:
-            logger.debug("无法确定当前网络IP地址所在的国家")
-            return None
-
-    except requests.RequestException:
-        logger.error("获取IP地址信息时发生错误，请检查网络连接")
-        return None
+def calculate_total_duration(scenes):
+    total_seconds = 0
+    
+    for scene in scenes:
+        start, end = scene['timestamp'].split('-')
+        start_time = datetime.strptime(start, '%M:%S')
+        end_time = datetime.strptime(end, '%M:%S')
+        
+        duration = end_time - start_time
+        total_seconds += duration.total_seconds()
+    
+    return total_seconds