mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-11 18:42:49 +00:00
剪辑逻辑进度60%;
待优化点: 1. 生成字幕逻辑优化 2. 文案解说的时间和脚本时间的优化
This commit is contained in:
parent
e440dc619f
commit
7b3014ad42
@ -353,7 +353,7 @@ class VideoClipParams(BaseModel):
|
||||
bgm_file: Optional[str] = Field(default="", description="背景音乐文件")
|
||||
bgm_volume: Optional[float] = Field(default=0.2, description="背景音乐音量")
|
||||
|
||||
subtitle_enabled: Optional[bool] = Field(default=True, description="是否启用字幕")
|
||||
subtitle_enabled: Optional[bool] = Field(default=False, description="是否启用字幕")
|
||||
subtitle_position: Optional[str] = Field(default="bottom", description="字幕位置") # top, bottom, center
|
||||
font_name: Optional[str] = Field(default="STHeitiMedium.ttc", description="字体名称")
|
||||
text_fore_color: Optional[str] = Field(default="#FFFFFF", description="文字前景色")
|
||||
@ -364,5 +364,5 @@ class VideoClipParams(BaseModel):
|
||||
stroke_width: float = Field(default=1.5, description="文字描边宽度")
|
||||
custom_position: float = Field(default=70.0, description="自定义位置")
|
||||
|
||||
# n_threads: Optional[int] = 2 # 线程数
|
||||
n_threads: Optional[int] = 8 # 线程数,有助于提升视频处理速度
|
||||
# paragraph_number: Optional[int] = 1 # 段落数量
|
||||
|
||||
@ -1,9 +1,10 @@
|
||||
import os
|
||||
import json
|
||||
import subprocess
|
||||
import edge_tts
|
||||
from edge_tts import submaker
|
||||
from pydub import AudioSegment
|
||||
from typing import List
|
||||
from typing import List, Dict
|
||||
from loguru import logger
|
||||
from app.utils import utils
|
||||
|
||||
@ -17,12 +18,13 @@ def check_ffmpeg():
|
||||
return False
|
||||
|
||||
|
||||
def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int):
|
||||
def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration: int, video_script: list):
|
||||
"""
|
||||
合并多个音频文件到一个指定总时长的音频文件中
|
||||
|
||||
合并多个音频文件到一个指定总时长的音频文件中,并生成相应的字幕
|
||||
:param task_id: 任务ID
|
||||
:param audio_file_paths: 音频文件路径列表
|
||||
:param total_duration: 最终音频文件的总时长(秒)
|
||||
:param video_script: JSON格式的视频脚本
|
||||
"""
|
||||
output_dir = utils.task_dir(task_id)
|
||||
|
||||
@ -35,6 +37,17 @@ def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration:
|
||||
# 创建SubMaker对象
|
||||
sub_maker = edge_tts.SubMaker()
|
||||
|
||||
# 解析JSON格式的video_script
|
||||
script_data = video_script
|
||||
|
||||
for segment in script_data:
|
||||
start_time, end_time = parse_timestamp(segment['new_timestamp'])
|
||||
duration = (end_time - start_time) * 1000 # 转换为毫秒
|
||||
|
||||
if not segment['OST']:
|
||||
# 如果不是原声,则添加narration作为字幕
|
||||
sub_maker.create_sub((start_time * 1000, duration), segment['narration'])
|
||||
|
||||
for audio_path in audio_file_paths:
|
||||
if not os.path.exists(audio_path):
|
||||
logger.info(f"警告:文件 {audio_path} 不存在,已跳过。")
|
||||
@ -50,14 +63,10 @@ def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration:
|
||||
except Exception as e:
|
||||
logger.error(f"错误:无法读取文件 {audio_path}。错误信息:{str(e)}")
|
||||
continue
|
||||
|
||||
# 将音频插入到空白音频的指定位置
|
||||
blank_audio = blank_audio.overlay(audio, position=start_time * 1000)
|
||||
|
||||
# 添加字幕信息
|
||||
duration = (end_time - start_time) * 1000 # 转换为毫秒
|
||||
# TODO 不是 filename 需要考虑怎么把字幕文本弄过来
|
||||
sub_maker.create_sub((start_time * 1000, duration), filename)
|
||||
|
||||
# 尝试导出为WAV格式
|
||||
try:
|
||||
output_file = os.path.join(output_dir, "audio.wav")
|
||||
@ -66,7 +75,7 @@ def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration:
|
||||
except Exception as e:
|
||||
logger.info(f"导出为WAV格式失败,尝试使用MP3格式:{str(e)}")
|
||||
try:
|
||||
output_file = "merged_audio.mp3"
|
||||
output_file = os.path.join(output_dir, "audio.mp3")
|
||||
blank_audio.export(output_file, format="mp3", codec="libmp3lame")
|
||||
logger.info(f"音频合并完成,已保存为 {output_file}")
|
||||
except Exception as e:
|
||||
@ -75,6 +84,10 @@ def merge_audio_files(task_id: str, audio_file_paths: List[str], total_duration:
|
||||
|
||||
return output_file, sub_maker
|
||||
|
||||
def parse_timestamp(timestamp: str) -> tuple:
|
||||
"""解析时间戳字符串为秒数"""
|
||||
start, end = timestamp.split('-')
|
||||
return time_to_seconds(*start.split(':')), time_to_seconds(*end.split(':'))
|
||||
|
||||
def extract_timestamp(filename):
|
||||
"""从文件名中提取开始和结束时间戳"""
|
||||
@ -95,14 +108,17 @@ def time_to_seconds(minutes, seconds):
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 示例用法
|
||||
audio_files = [
|
||||
audio_files =[
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-06-00-24.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-32-00-38.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-43-00-52.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00-52-01-09.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01-13-01-15.mp3"
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01-13-01-15.mp3",
|
||||
]
|
||||
total_duration = 75
|
||||
total_duration = 38
|
||||
video_script_path = "/Users/apple/Desktop/home/NarratoAI/resource/scripts/test003.json"
|
||||
with open(video_script_path, "r", encoding="utf-8") as f:
|
||||
video_script = json.load(f)
|
||||
|
||||
a, b = merge_audio_files("test456", audio_files, total_duration)
|
||||
print(a, b)
|
||||
output_file, sub_maker = merge_audio_files("test456", audio_files, total_duration, video_script)
|
||||
print(output_file, sub_maker)
|
||||
@ -1,10 +1,12 @@
|
||||
import json
|
||||
import os.path
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
from timeit import default_timer as timer
|
||||
from loguru import logger
|
||||
import google.generativeai as genai
|
||||
|
||||
from app.config import config
|
||||
from app.utils import utils
|
||||
@ -278,8 +280,40 @@ def correct(subtitle_file, video_script):
|
||||
logger.success("Subtitle is correct")
|
||||
|
||||
|
||||
def create_with_gemini(audio_file: str, subtitle_file: str = "", api_key: Optional[str] = None) -> Optional[str]:
|
||||
if not api_key:
|
||||
logger.error("Gemini API key is not provided")
|
||||
return None
|
||||
|
||||
genai.configure(api_key=api_key)
|
||||
|
||||
logger.info(f"开始使用Gemini模型处理音频文件: {audio_file}")
|
||||
|
||||
model = genai.GenerativeModel(model_name="gemini-1.5-flash")
|
||||
prompt = "生成这段语音的转录文本。请以SRT格式输出,包含时间戳。"
|
||||
|
||||
try:
|
||||
with open(audio_file, "rb") as f:
|
||||
audio_data = f.read()
|
||||
|
||||
response = model.generate_content([prompt, audio_data])
|
||||
transcript = response.text
|
||||
|
||||
if not subtitle_file:
|
||||
subtitle_file = f"{audio_file}.srt"
|
||||
|
||||
with open(subtitle_file, "w", encoding="utf-8") as f:
|
||||
f.write(transcript)
|
||||
|
||||
logger.info(f"Gemini生成的字幕文件已保存: {subtitle_file}")
|
||||
return subtitle_file
|
||||
except Exception as e:
|
||||
logger.error(f"使用Gemini处理音频时出错: {e}")
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
task_id = "c12fd1e6-4b0a-4d65-a075-c87abe35a072"
|
||||
task_id = "task456"
|
||||
task_dir = utils.task_dir(task_id)
|
||||
subtitle_file = f"{task_dir}/subtitle.srt"
|
||||
audio_file = f"{task_dir}/audio.mp3"
|
||||
@ -297,3 +331,10 @@ if __name__ == "__main__":
|
||||
|
||||
subtitle_file = f"{task_dir}/subtitle-test.srt"
|
||||
create(audio_file, subtitle_file)
|
||||
|
||||
# 使用Gemini模型处理音频
|
||||
gemini_api_key = config.app.get("gemini_api_key") # 请替换为实际的API密钥
|
||||
gemini_subtitle_file = create_with_gemini(audio_file, api_key=gemini_api_key)
|
||||
|
||||
if gemini_subtitle_file:
|
||||
print(f"Gemini生成的字幕文件: {gemini_subtitle_file}")
|
||||
|
||||
@ -338,7 +338,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
|
||||
# tts 角色名称
|
||||
voice_name = voice.parse_voice_name(params.voice_name)
|
||||
|
||||
logger.info("\n\n## 1. 读取视频json脚本")
|
||||
logger.info("\n\n## 1. 加载视频脚本")
|
||||
video_script_path = path.join(params.video_clip_json_path)
|
||||
# 判断json文件是否存在
|
||||
if path.exists(video_script_path):
|
||||
@ -376,7 +376,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
|
||||
"音频文件为空,可能是网络不可用。如果您在中国,请使用VPN。或者手动选择 zh-CN-Yunjian-男性 音频")
|
||||
return
|
||||
logger.info("合并音频")
|
||||
audio_file, sub_maker = audio_merger.merge_audio_files(task_id, audio_files, total_duration)
|
||||
audio_file, sub_maker = audio_merger.merge_audio_files(task_id, audio_files, total_duration, list_script)
|
||||
|
||||
# audio_duration = voice.get_audio_duration(sub_maker)
|
||||
# audio_duration = math.ceil(audio_duration)
|
||||
@ -387,7 +387,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
|
||||
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle111.srt")
|
||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
|
||||
# subtitle_fallback = False
|
||||
subtitle_fallback = False
|
||||
if subtitle_provider == "edge":
|
||||
voice.create_subtitle(text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path)
|
||||
# voice.create_subtitle(
|
||||
@ -401,7 +401,8 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
|
||||
# logger.warning("找不到字幕文件,回退到whisper")
|
||||
#
|
||||
# if subtitle_provider == "whisper" or subtitle_fallback:
|
||||
# subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
||||
# # subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
||||
# subtitle.create_with_gemini(audio_file=audio_file, subtitle_file=subtitle_path, api_key=config.app.get("gemini_api_key", ""))
|
||||
# logger.info("\n\n## 更正字幕")
|
||||
# subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
|
||||
|
||||
@ -449,7 +450,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
|
||||
video_ost_list=video_ost,
|
||||
list_script=list_script,
|
||||
video_aspect=params.video_aspect,
|
||||
threads=1 # 暂时只支持单线程
|
||||
threads=params.n_threads # 多线程
|
||||
)
|
||||
|
||||
_progress += 50 / 2
|
||||
@ -461,7 +462,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos):
|
||||
# 把所有东西合到在一起
|
||||
video.generate_video_v2(
|
||||
video_path=combined_video_path,
|
||||
audio_paths=audio_files,
|
||||
audio_path=audio_file,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=final_video_path,
|
||||
params=params,
|
||||
|
||||
@ -294,7 +294,7 @@ def generate_video(
|
||||
output_file,
|
||||
audio_codec="aac",
|
||||
temp_audiofile_path=output_dir,
|
||||
threads=params.n_threads or 2,
|
||||
threads=params.n_threads,
|
||||
logger=None,
|
||||
fps=30,
|
||||
)
|
||||
@ -306,7 +306,7 @@ def generate_video(
|
||||
|
||||
def generate_video_v2(
|
||||
video_path: str,
|
||||
audio_paths: List[str],
|
||||
audio_path: str,
|
||||
subtitle_path: str,
|
||||
output_file: str,
|
||||
params: Union[VideoParams, VideoClipParams],
|
||||
@ -314,11 +314,11 @@ def generate_video_v2(
|
||||
"""
|
||||
合并所有素材
|
||||
Args:
|
||||
video_path:
|
||||
audio_paths:
|
||||
subtitle_path:
|
||||
output_file:
|
||||
params:
|
||||
video_path: 视频路径
|
||||
audio_path: 单个音频文件路径
|
||||
subtitle_path: 字幕文件路径
|
||||
output_file: 输出文件路径
|
||||
params: 视频参数
|
||||
|
||||
Returns:
|
||||
|
||||
@ -328,7 +328,7 @@ def generate_video_v2(
|
||||
|
||||
logger.info(f"开始,视频尺寸: {video_width} x {video_height}")
|
||||
logger.info(f" ① 视频: {video_path}")
|
||||
logger.info(f" ② 音频文件数量: {len(audio_paths)}")
|
||||
logger.info(f" ② 音频: {audio_path}")
|
||||
logger.info(f" ③ 字幕: {subtitle_path}")
|
||||
logger.info(f" ④ 输出: {output_file}")
|
||||
|
||||
@ -386,40 +386,8 @@ def generate_video_v2(
|
||||
original_audio = video_clip.audio # 保存原始视频的音轨
|
||||
video_duration = video_clip.duration
|
||||
|
||||
# 处理多个音频文件
|
||||
audio_clips = []
|
||||
for audio_path in audio_paths:
|
||||
# 确保每个音频文件路径是正确的
|
||||
if not os.path.exists(audio_path):
|
||||
logger.warning(f"音频文件不存在: {audio_path}")
|
||||
continue
|
||||
|
||||
# 从文件名中提取时间信息
|
||||
match = re.search(r'audio_(\d{2}-\d{2}-\d{2}-\d{2})\.mp3', os.path.basename(audio_path))
|
||||
if match:
|
||||
time_str = match.group(1)
|
||||
start, end = time_str.split('-')[:2], time_str.split('-')[2:]
|
||||
start_time = sum(int(x) * 60 ** i for i, x in enumerate(reversed(start)))
|
||||
end_time = sum(int(x) * 60 ** i for i, x in enumerate(reversed(end)))
|
||||
|
||||
audio_clip = AudioFileClip(audio_path).volumex(params.voice_volume)
|
||||
|
||||
# 确保结束时间不超过音频实际长度
|
||||
actual_end_time = min(end_time - start_time, audio_clip.duration)
|
||||
|
||||
audio_clip = audio_clip.subclip(0, actual_end_time)
|
||||
audio_clip = audio_clip.set_start(start_time).set_end(start_time + actual_end_time)
|
||||
audio_clips.append(audio_clip)
|
||||
else:
|
||||
logger.warning(f"无法从文件名解析时间信息: {audio_path}")
|
||||
|
||||
# 合并所有音频剪辑,包括原始音轨
|
||||
if audio_clips:
|
||||
audio_clips.insert(0, original_audio) # 将原始音轨添加到音频剪辑列表的开头
|
||||
audio_clip = CompositeAudioClip(audio_clips)
|
||||
else:
|
||||
logger.warning("没有有效的音频文件,使用原始音轨")
|
||||
audio_clip = original_audio
|
||||
# 处理新的音频文件
|
||||
new_audio = AudioFileClip(audio_path).volumex(params.voice_volume)
|
||||
|
||||
# 字幕处理部分
|
||||
if subtitle_path and os.path.exists(subtitle_path):
|
||||
@ -451,22 +419,29 @@ def generate_video_v2(
|
||||
|
||||
# 背景音乐处理部分
|
||||
bgm_file = get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
|
||||
|
||||
# 合并音频轨道
|
||||
audio_tracks = [original_audio, new_audio]
|
||||
|
||||
if bgm_file:
|
||||
try:
|
||||
bgm_clip = (
|
||||
AudioFileClip(bgm_file).volumex(params.bgm_volume).audio_fadeout(3)
|
||||
)
|
||||
bgm_clip = afx.audio_loop(bgm_clip, duration=video_clip.duration)
|
||||
audio_clip = CompositeAudioClip([audio_clip, bgm_clip])
|
||||
bgm_clip = afx.audio_loop(bgm_clip, duration=video_duration)
|
||||
audio_tracks.append(bgm_clip)
|
||||
except Exception as e:
|
||||
logger.error(f"添加背景音乐失败: {str(e)}")
|
||||
|
||||
video_clip = video_clip.set_audio(audio_clip)
|
||||
# 合并所有音频轨道
|
||||
final_audio = CompositeAudioClip(audio_tracks)
|
||||
|
||||
video_clip = video_clip.set_audio(final_audio)
|
||||
video_clip.write_videofile(
|
||||
output_file,
|
||||
audio_codec="aac",
|
||||
temp_audiofile_path=output_dir,
|
||||
threads=params.n_threads or 2,
|
||||
threads=params.n_threads,
|
||||
logger=None,
|
||||
fps=30,
|
||||
)
|
||||
@ -607,7 +582,7 @@ def combine_clip_videos(combined_video_path: str,
|
||||
|
||||
video_clip = concatenate_videoclips(clips)
|
||||
video_clip = video_clip.set_fps(30)
|
||||
logger.info(f"合并中...")
|
||||
logger.info(f"合并视频中...")
|
||||
video_clip.write_videofile(filename=combined_video_path,
|
||||
threads=threads,
|
||||
logger=None,
|
||||
@ -687,19 +662,14 @@ if __name__ == "__main__":
|
||||
|
||||
video_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/combined-1.mp4"
|
||||
|
||||
audio_paths = ['../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3',
|
||||
'../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-14-00-17.mp3',
|
||||
'../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-17-00-22.mp3',
|
||||
'../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-34-00-45.mp3',
|
||||
'../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-59-01-09.mp3',
|
||||
]
|
||||
audio_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/audio_00-00-00-07.mp3"
|
||||
|
||||
subtitle_path = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa\subtitle.srt"
|
||||
|
||||
output_file = "../../storage/tasks/7f5ae494-abce-43cf-8f4f-4be43320eafa/final-123.mp4"
|
||||
|
||||
generate_video_v2(video_path=video_path,
|
||||
audio_paths=audio_paths,
|
||||
audio_path=audio_path,
|
||||
subtitle_path=subtitle_path,
|
||||
output_file=output_file,
|
||||
params=cfg
|
||||
|
||||
@ -1034,8 +1034,8 @@ def is_azure_v2_voice(voice_name: str):
|
||||
def tts(
|
||||
text: str, voice_name: str, voice_rate: float, voice_file: str
|
||||
) -> [SubMaker, None]:
|
||||
if is_azure_v2_voice(voice_name):
|
||||
return azure_tts_v2(text, voice_name, voice_file)
|
||||
# if is_azure_v2_voice(voice_name):
|
||||
# return azure_tts_v2(text, voice_name, voice_file)
|
||||
return azure_tts_v1(text, voice_name, voice_rate, voice_file)
|
||||
|
||||
|
||||
@ -1414,7 +1414,7 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
|
||||
|
||||
# 检查文件是否已存在,如存在且不强制重新生成,则跳过
|
||||
if os.path.exists(audio_file) and not force_regenerate:
|
||||
if os.path.exists(audio_file):
|
||||
logger.info(f"音频文件已存在,跳过生成: {audio_file}")
|
||||
audio_files.append(audio_file)
|
||||
continue
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user