优化剪辑逻辑123

This commit is contained in:
linyq 2024-09-27 12:01:42 +08:00
parent 67d6f353eb
commit e440dc619f
4 changed files with 247 additions and 23 deletions

View File

@ -0,0 +1,108 @@
import os
import subprocess
import edge_tts
from edge_tts import submaker
from pydub import AudioSegment
from typing import List
from loguru import logger
from app.utils import utils
def check_ffmpeg():
"""检查FFmpeg是否已安装"""
try:
subprocess.run(['ffmpeg', '-version'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
return True
except FileNotFoundError:
return False
def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int):
"""
合并多个音频文件到一个指定总时长的音频文件中
:param audio_file_paths: 音频文件路径列表
:param total_duration: 最终音频文件的总时长
"""
output_dir = utils.task_dir(task_id)
if not check_ffmpeg():
logger.error("错误FFmpeg未安装。请安装FFmpeg后再运行此脚本。")
return None, None
# 创建一个总时长为total_duration的空白音频
blank_audio = AudioSegment.silent(duration=total_duration * 1000) # pydub使用毫秒
# 创建SubMaker对象
sub_maker = edge_tts.SubMaker()
for audio_path in audio_file_paths:
if not os.path.exists(audio_path):
logger.info(f"警告:文件 {audio_path} 不存在,已跳过。")
continue
# 从文件名中提取时间戳
filename = os.path.basename(audio_path)
start_time, end_time = extract_timestamp(filename)
# 读取音频文件
try:
audio = AudioSegment.from_mp3(audio_path)
except Exception as e:
logger.error(f"错误:无法读取文件 {audio_path}。错误信息:{str(e)}")
continue
# 将音频插入到空白音频的指定位置
blank_audio = blank_audio.overlay(audio, position=start_time * 1000)
# 添加字幕信息
duration = (end_time - start_time) * 1000 # 转换为毫秒
# TODO 不是 filename 需要考虑怎么把字幕文本弄过来
sub_maker.create_sub((start_time * 1000, duration), filename)
# 尝试导出为WAV格式
try:
output_file = os.path.join(output_dir, "audio.wav")
blank_audio.export(output_file, format="wav")
logger.info(f"音频合并完成,已保存为 {output_file}")
except Exception as e:
logger.info(f"导出为WAV格式失败尝试使用MP3格式{str(e)}")
try:
output_file = "merged_audio.mp3"
blank_audio.export(output_file, format="mp3", codec="libmp3lame")
logger.info(f"音频合并完成,已保存为 {output_file}")
except Exception as e:
logger.error(f"导出音频失败:{str(e)}")
return None, None
return output_file, sub_maker
def extract_timestamp(filename):
"""从文件名中提取开始和结束时间戳"""
time_part = filename.split('_')[1].split('.')[0]
times = time_part.split('-')
# 将时间戳转换为秒
start_seconds = time_to_seconds(times[0], times[1])
end_seconds = time_to_seconds(times[2], times[3])
return start_seconds, end_seconds
def time_to_seconds(minutes, seconds):
"""将分钟和秒转换为总秒数"""
return int(minutes) * 60 + int(seconds)
if __name__ == "__main__":
# 示例用法
audio_files = [
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-06-00-24.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-32-00-38.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-43-00-52.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-52-01-09.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01-13-01-15.mp3"
]
total_duration = 75
a, b = merge_audio_files("test456", audio_files, total_duration)
print(a, b)

View File

@ -817,8 +817,8 @@ if __name__ == "__main__":
# gemini_video_transcription(video_subject, video_path, language)
# 2. 解说文案
# video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4"
video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4"
video_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4"
# video_path = "E:\\projects\\NarratoAI\\resource\\videos\\1.mp4"
video_plot = """
李自忠拿着儿子李牧名下的存折去银行取钱给儿子救命却被要求证明"你儿子是你儿子"
走投无路时碰到银行被抢劫劫匪给了他两沓钱救命李自忠却因此被银行以抢劫罪起诉并顶格判处20年有期徒刑

View File

@ -8,7 +8,7 @@ from loguru import logger
from app.config import config
from app.models import const
from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams
from app.services import llm, material, subtitle, video, voice
from app.services import llm, material, subtitle, video, voice, audio_merger
from app.services import state as sm
from app.utils import utils
@ -97,7 +97,7 @@ def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
if not params.subtitle_enabled:
return ""
subtitle_path = path.join(utils.task_dir(task_id), "subtitle.srt")
subtitle_path = path.join(utils.task_dir(task_id), "subtitle111.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
@ -353,6 +353,9 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
logger.debug(f"解说完整脚本: \n{video_script}")
logger.debug(f"解说 OST 列表: \n{video_ost}")
logger.debug(f"解说时间戳列表: \n{time_list}")
# 获取视频总时长(单位 s)
total_duration = list_script[-1]['new_timestamp']
total_duration = int(total_duration.split("-")[1].split(":")[0]) * 60 + int(total_duration.split("-")[1].split(":")[1])
except Exception as e:
logger.error(f"无法读取视频json脚本请检查配置是否正确。{e}")
raise ValueError("无法读取视频json脚本请检查配置是否正确")
@ -372,21 +375,27 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
logger.error(
"音频文件为空可能是网络不可用。如果您在中国请使用VPN。或者手动选择 zh-CN-Yunjian-男性 音频")
return
logger.info("合并音频")
audio_file, sub_maker = audio_merger.merge_audio_files(task_id, audio_files, total_duration)
# audio_duration = voice.get_audio_duration(sub_maker)
# audio_duration = math.ceil(audio_duration)
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
subtitle_path = ""
if params.subtitle_enabled:
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle111.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
# subtitle_fallback = False
if subtitle_provider == "edge":
voice.create_subtitle_from_multiple(
text=video_script,
sub_maker_list=sub_maker_list,
list_script=list_script,
subtitle_file=subtitle_path
)
voice.create_subtitle(text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path)
# voice.create_subtitle(
# text=video_script,
# sub_maker_list=sub_maker_list,
# list_script=list_script,
# subtitle_file=subtitle_path
# )
# if not os.path.exists(subtitle_path):
# subtitle_fallback = True
# logger.warning("找不到字幕文件回退到whisper")
@ -475,18 +484,35 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
if __name__ == "__main__":
task_id = "test123456"
subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4',
'00:06-00:15': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_06-00_15.mp4',
'01:10-01:17': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_10-01_17.mp4',
'00:47-01:03': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_47-01_03.mp4',
'01:03-01:10': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_03-01_10.mp4',
'02:40-03:08': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-02_40-03_08.mp4',
'03:02-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_02-03_20.mp4',
'03:18-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_18-03_20.mp4'}
# task_id = "test123"
# subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4',
# '00:06-00:15': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_06-00_15.mp4',
# '01:10-01:17': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_10-01_17.mp4',
# '00:47-01:03': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_47-01_03.mp4',
# '01:03-01:10': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_03-01_10.mp4',
# '02:40-03:08': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-02_40-03_08.mp4',
# '03:02-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_02-03_20.mp4',
# '03:18-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_18-03_20.mp4'}
#
# params = VideoClipParams(
# video_clip_json_path="E:\\projects\\NarratoAI\\resource/scripts/test003.json",
# video_origin_path="E:\\projects\\NarratoAI\\resource/videos/1.mp4",
# )
# start_subclip(task_id, params, subclip_path_videos=subclip_path_videos)
task_id = "test456"
subclip_path_videos = {'00:00-00:06': './storage/cache_videos/vid-00_00-00_06.mp4',
'00:06-00:24': './storage/cache_videos/vid-00_06-00_24.mp4',
'01:28-01:36': './storage/cache_videos/vid-01_28-01_36.mp4',
'00:41-00:47': './storage/cache_videos/vid-00_41-00_47.mp4',
'01:58-02:03': './storage/cache_videos/vid-01_58-02_03.mp4',
'02:03-02:12': './storage/cache_videos/vid-02_03-02_12.mp4',
'02:40-02:57': './storage/cache_videos/vid-02_40-02_57.mp4',
'03:14-03:18': './storage/cache_videos/vid-03_14-03_18.mp4',
'03:18-03:20': './storage/cache_videos/vid-03_18-03_20.mp4'}
params = VideoClipParams(
video_clip_json_path="E:\\projects\\NarratoAI\\resource/scripts/test003.json",
video_origin_path="E:\\projects\\NarratoAI\\resource/videos/1.mp4",
video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/test003.json",
video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4",
)
start_subclip(task_id, params, subclip_path_videos=subclip_path_videos)

View File

@ -2,12 +2,12 @@ import os
import re
import json
import traceback
import edge_tts
import asyncio
from loguru import logger
from typing import List
from datetime import datetime
from edge_tts.submaker import mktimestamp
from xml.sax.saxutils import unescape
from edge_tts import submaker, SubMaker
from moviepy.video.tools import subtitles
@ -1293,6 +1293,96 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
traceback.print_exc()
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
"""
优化字幕文件
1. 将字幕文件按照标点符号分割成多行
2. 逐行匹配字幕文件中的文本
3. 生成新的字幕文件
"""
text = _format_text(text)
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
"""
1
00:00:00,000 --> 00:00:02,360
跑步是一项简单易行的运动
"""
start_t = mktimestamp(start_time).replace(".", ",")
end_t = mktimestamp(end_time).replace(".", ",")
return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
start_time = -1.0
sub_items = []
sub_index = 0
script_lines = utils.split_string_by_punctuations(text)
def match_line(_sub_line: str, _sub_index: int):
if len(script_lines) <= _sub_index:
return ""
_line = script_lines[_sub_index]
if _sub_line == _line:
return script_lines[_sub_index].strip()
_sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
_line_ = re.sub(r"[^\w\s]", "", _line)
if _sub_line_ == _line_:
return _line_.strip()
_sub_line_ = re.sub(r"\W+", "", _sub_line)
_line_ = re.sub(r"\W+", "", _line)
if _sub_line_ == _line_:
return _line.strip()
return ""
sub_line = ""
try:
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
_start_time, end_time = offset
if start_time < 0:
start_time = _start_time
sub = unescape(sub)
sub_line += sub
sub_text = match_line(sub_line, sub_index)
if sub_text:
sub_index += 1
line = formatter(
idx=sub_index,
start_time=start_time,
end_time=end_time,
sub_text=sub_text,
)
sub_items.append(line)
start_time = -1.0
sub_line = ""
if len(sub_items) == len(script_lines):
with open(subtitle_file, "w", encoding="utf-8") as file:
file.write("\n".join(sub_items) + "\n")
try:
sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
duration = max([tb for ((ta, tb), txt) in sbs])
logger.info(
f"completed, subtitle file created: {subtitle_file}, duration: {duration}"
)
except Exception as e:
logger.error(f"failed, error: {str(e)}")
os.remove(subtitle_file)
else:
logger.warning(
f"failed, sub_items len: {len(sub_items)}, script_lines len: {len(script_lines)}"
)
except Exception as e:
logger.error(f"failed, error: {str(e)}")
def get_audio_duration(sub_maker: submaker.SubMaker):
"""
获取音频时长