diff --git a/app/services/audio_merger.py b/app/services/audio_merger.py new file mode 100644 index 0000000..cf2a204 --- /dev/null +++ b/app/services/audio_merger.py @@ -0,0 +1,108 @@ +import os +import subprocess +import edge_tts +from edge_tts import submaker +from pydub import AudioSegment +from typing import List +from loguru import logger +from app.utils import utils + + +def check_ffmpeg(): + """检查FFmpeg是否已安装""" + try: + subprocess.run(['ffmpeg', '-version'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + return True + except FileNotFoundError: + return False + + +def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int): + """ + 合并多个音频文件到一个指定总时长的音频文件中 + + :param audio_file_paths: 音频文件路径列表 + :param total_duration: 最终音频文件的总时长(秒) + """ + output_dir = utils.task_dir(task_id) + + if not check_ffmpeg(): + logger.error("错误:FFmpeg未安装。请安装FFmpeg后再运行此脚本。") + return None, None + + # 创建一个总时长为total_duration的空白音频 + blank_audio = AudioSegment.silent(duration=total_duration * 1000) # pydub使用毫秒 + # 创建SubMaker对象 + sub_maker = edge_tts.SubMaker() + + for audio_path in audio_file_paths: + if not os.path.exists(audio_path): + logger.info(f"警告:文件 {audio_path} 不存在,已跳过。") + continue + + # 从文件名中提取时间戳 + filename = os.path.basename(audio_path) + start_time, end_time = extract_timestamp(filename) + + # 读取音频文件 + try: + audio = AudioSegment.from_mp3(audio_path) + except Exception as e: + logger.error(f"错误:无法读取文件 {audio_path}。错误信息:{str(e)}") + continue + # 将音频插入到空白音频的指定位置 + blank_audio = blank_audio.overlay(audio, position=start_time * 1000) + + # 添加字幕信息 + duration = (end_time - start_time) * 1000 # 转换为毫秒 + # TODO 不是 filename 需要考虑怎么把字幕文本弄过来 + sub_maker.create_sub((start_time * 1000, duration), filename) + + # 尝试导出为WAV格式 + try: + output_file = os.path.join(output_dir, "audio.wav") + blank_audio.export(output_file, format="wav") + logger.info(f"音频合并完成,已保存为 {output_file}") + except Exception as e: + logger.info(f"导出为WAV格式失败,尝试使用MP3格式:{str(e)}") + try: + output_file = "merged_audio.mp3" + blank_audio.export(output_file, format="mp3", codec="libmp3lame") + logger.info(f"音频合并完成,已保存为 {output_file}") + except Exception as e: + logger.error(f"导出音频失败:{str(e)}") + return None, None + + return output_file, sub_maker + + +def extract_timestamp(filename): + """从文件名中提取开始和结束时间戳""" + time_part = filename.split('_')[1].split('.')[0] + times = time_part.split('-') + + # 将时间戳转换为秒 + start_seconds = time_to_seconds(times[0], times[1]) + end_seconds = time_to_seconds(times[2], times[3]) + + return start_seconds, end_seconds + + +def time_to_seconds(minutes, seconds): + """将分钟和秒转换为总秒数""" + return int(minutes) * 60 + int(seconds) + + +if __name__ == "__main__": + # 示例用法 + audio_files = [ + "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-06-00-24.mp3", + "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-32-00-38.mp3", + "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-43-00-52.mp3", + "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-52-01-09.mp3", + "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01-13-01-15.mp3" + ] + total_duration = 75 + + a, b = merge_audio_files("test456", audio_files, total_duration) + print(a, b) \ No newline at end of file diff --git a/app/services/llm.py b/app/services/llm.py index 0fe4950..66784a7 100644 --- a/app/services/llm.py +++ b/app/services/llm.py @@ -817,8 +817,8 @@ if __name__ == "__main__": # gemini_video_transcription(video_subject, video_path, language) # 2. 解说文案 - # video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4" - video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4" + video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4" + # video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4" video_plot = """ 李自忠拿着儿子李牧名下的存折,去银行取钱给儿子救命,却被要求证明"你儿子是你儿子"。 走投无路时碰到银行被抢劫,劫匪给了他两沓钱救命,李自忠却因此被银行以抢劫罪起诉,并顶格判处20年有期徒刑。 diff --git a/app/services/task.py b/app/services/task.py index 2e3ff30..7de5ac4 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -8,7 +8,7 @@ from loguru import logger from app.config import config from app.models import const from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams -from app.services import llm, material, subtitle, video, voice +from app.services import llm, material, subtitle, video, voice, audio_merger from app.services import state as sm from app.utils import utils @@ -97,7 +97,7 @@ def generate_subtitle(task_id, params, video_script, sub_maker, audio_file): if not params.subtitle_enabled: return "" - subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt") + subtitle_path = path.join(utils.task_dir(task_id), "subtitle111.srt") subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}") @@ -353,6 +353,9 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos): logger.debug(f"解说完整脚本: \n{video_script}") logger.debug(f"解说 OST 列表: \n{video_ost}") logger.debug(f"解说时间戳列表: \n{time_list}") + # 获取视频总时长(单位 s) + total_duration = list_script[-1]['new_timestamp'] + total_duration = int(total_duration.split("-")[1].split(":")[0]) * 60 + int(total_duration.split("-")[1].split(":")[1]) except Exception as e: logger.error(f"无法读取视频json脚本,请检查配置是否正确。{e}") raise ValueError("无法读取视频json脚本,请检查配置是否正确") @@ -372,21 +375,27 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos): logger.error( "音频文件为空,可能是网络不可用。如果您在中国,请使用VPN。或者手动选择 zh-CN-Yunjian-男性 音频") return + logger.info("合并音频") + audio_file, sub_maker = audio_merger.merge_audio_files(task_id, audio_files, total_duration) + + # audio_duration = voice.get_audio_duration(sub_maker) + # audio_duration = math.ceil(audio_duration) sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30) subtitle_path = "" if params.subtitle_enabled: - subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") + subtitle_path = path.join(utils.task_dir(task_id), f"subtitle111.srt") subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}") # subtitle_fallback = False if subtitle_provider == "edge": - voice.create_subtitle_from_multiple( - text=video_script, - sub_maker_list=sub_maker_list, - list_script=list_script, - subtitle_file=subtitle_path - ) + voice.create_subtitle(text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path) + # voice.create_subtitle( + # text=video_script, + # sub_maker_list=sub_maker_list, + # list_script=list_script, + # subtitle_file=subtitle_path + # ) # if not os.path.exists(subtitle_path): # subtitle_fallback = True # logger.warning("找不到字幕文件,回退到whisper") @@ -475,18 +484,35 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos): if __name__ == "__main__": - task_id = "test123456" - subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4', - '00:06-00:15': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_06-00_15.mp4', - '01:10-01:17': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_10-01_17.mp4', - '00:47-01:03': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_47-01_03.mp4', - '01:03-01:10': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_03-01_10.mp4', - '02:40-03:08': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-02_40-03_08.mp4', - '03:02-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_02-03_20.mp4', - '03:18-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_18-03_20.mp4'} + # task_id = "test123" + # subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4', + # '00:06-00:15': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_06-00_15.mp4', + # '01:10-01:17': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_10-01_17.mp4', + # '00:47-01:03': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_47-01_03.mp4', + # '01:03-01:10': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_03-01_10.mp4', + # '02:40-03:08': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-02_40-03_08.mp4', + # '03:02-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_02-03_20.mp4', + # '03:18-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_18-03_20.mp4'} + # + # params = VideoClipParams( + # video_clip_json_path="E:\\projects\\NarratoAI\\resource/scripts/test003.json", + # video_origin_path="E:\\projects\\NarratoAI\\resource/videos/1.mp4", + # ) + # start_subclip(task_id, params, subclip_path_videos=subclip_path_videos) + + task_id = "test456" + subclip_path_videos = {'00:00-00:06': './storage/cache_videos/vid-00_00-00_06.mp4', + '00:06-00:24': './storage/cache_videos/vid-00_06-00_24.mp4', + '01:28-01:36': './storage/cache_videos/vid-01_28-01_36.mp4', + '00:41-00:47': './storage/cache_videos/vid-00_41-00_47.mp4', + '01:58-02:03': './storage/cache_videos/vid-01_58-02_03.mp4', + '02:03-02:12': './storage/cache_videos/vid-02_03-02_12.mp4', + '02:40-02:57': './storage/cache_videos/vid-02_40-02_57.mp4', + '03:14-03:18': './storage/cache_videos/vid-03_14-03_18.mp4', + '03:18-03:20': './storage/cache_videos/vid-03_18-03_20.mp4'} params = VideoClipParams( - video_clip_json_path="E:\\projects\\NarratoAI\\resource/scripts/test003.json", - video_origin_path="E:\\projects\\NarratoAI\\resource/videos/1.mp4", + video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/test003.json", + video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4", ) start_subclip(task_id, params, subclip_path_videos=subclip_path_videos) diff --git a/app/services/voice.py b/app/services/voice.py index e40fa5d..4464140 100644 --- a/app/services/voice.py +++ b/app/services/voice.py @@ -2,12 +2,12 @@ import os import re import json import traceback - import edge_tts import asyncio from loguru import logger from typing import List from datetime import datetime +from edge_tts.submaker import mktimestamp from xml.sax.saxutils import unescape from edge_tts import submaker, SubMaker from moviepy.video.tools import subtitles @@ -1293,6 +1293,96 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis traceback.print_exc() +def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str): + """ + 优化字幕文件 + 1. 将字幕文件按照标点符号分割成多行 + 2. 逐行匹配字幕文件中的文本 + 3. 生成新的字幕文件 + """ + + text = _format_text(text) + + def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str: + """ + 1 + 00:00:00,000 --> 00:00:02,360 + 跑步是一项简单易行的运动 + """ + start_t = mktimestamp(start_time).replace(".", ",") + end_t = mktimestamp(end_time).replace(".", ",") + return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n" + + start_time = -1.0 + sub_items = [] + sub_index = 0 + + script_lines = utils.split_string_by_punctuations(text) + + def match_line(_sub_line: str, _sub_index: int): + if len(script_lines) <= _sub_index: + return "" + + _line = script_lines[_sub_index] + if _sub_line == _line: + return script_lines[_sub_index].strip() + + _sub_line_ = re.sub(r"[^\w\s]", "", _sub_line) + _line_ = re.sub(r"[^\w\s]", "", _line) + if _sub_line_ == _line_: + return _line_.strip() + + _sub_line_ = re.sub(r"\W+", "", _sub_line) + _line_ = re.sub(r"\W+", "", _line) + if _sub_line_ == _line_: + return _line.strip() + + return "" + + sub_line = "" + + try: + for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)): + _start_time, end_time = offset + if start_time < 0: + start_time = _start_time + + sub = unescape(sub) + sub_line += sub + sub_text = match_line(sub_line, sub_index) + if sub_text: + sub_index += 1 + line = formatter( + idx=sub_index, + start_time=start_time, + end_time=end_time, + sub_text=sub_text, + ) + sub_items.append(line) + start_time = -1.0 + sub_line = "" + + if len(sub_items) == len(script_lines): + with open(subtitle_file, "w", encoding="utf-8") as file: + file.write("\n".join(sub_items) + "\n") + try: + sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8") + duration = max([tb for ((ta, tb), txt) in sbs]) + logger.info( + f"completed, subtitle file created: {subtitle_file}, duration: {duration}" + ) + except Exception as e: + logger.error(f"failed, error: {str(e)}") + os.remove(subtitle_file) + else: + logger.warning( + f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}" + ) + + except Exception as e: + logger.error(f"failed, error: {str(e)}") + + def get_audio_duration(sub_maker: submaker.SubMaker): """ 获取音频时长