mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-03-13 15:21:30 +00:00
commit
9aefe76a8c
1
.gitignore
vendored
1
.gitignore
vendored
@ -32,4 +32,5 @@ resource/fonts/*.ttf
|
||||
resource/fonts/*.otf
|
||||
resource/srt/*.srt
|
||||
app/models/faster-whisper-large-v2/*
|
||||
app/models/faster-whisper-large-v3/*
|
||||
app/models/bert/*
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
<h3 align="center">一站式 AI 影视解说+自动化剪辑工具🎬🎞️ </h3>
|
||||
|
||||
|
||||
<h3>📖 <a href="README-cn.md">English</a> | 简体中文 | <a href="README-ja.md">日本語</a> </h3>
|
||||
<h3>📖 <a href="README-en.md">English</a> | 简体中文 | <a href="README-ja.md">日本語</a> </h3>
|
||||
<div align="center">
|
||||
|
||||
[//]: # ( <a href="https://trendshift.io/repositories/8731" target="_blank"><img src="https://trendshift.io/api/badge/repositories/8731" alt="harry0703%2FNarratoAI | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>)
|
||||
@ -83,7 +83,7 @@ _**注意⚠️:近期在 x (推特) 上发现有人冒充作者在 pump.fun
|
||||
## 配置要求 📦
|
||||
|
||||
- 建议最低 CPU 4核或以上,内存 8G 或以上,显卡非必须
|
||||
- Windows 10 或 MacOS 11.0 以上系统
|
||||
- Windows 10/11 或 MacOS 11.0 以上系统
|
||||
- [Python 3.10+](https://www.python.org/downloads/)
|
||||
|
||||
## 反馈建议 📢
|
||||
|
||||
@ -20,7 +20,9 @@ class VideoConcatMode(str, Enum):
|
||||
|
||||
class VideoAspect(str, Enum):
|
||||
landscape = "16:9"
|
||||
landscape_2 = "4:3"
|
||||
portrait = "9:16"
|
||||
portrait_2 = "3:4"
|
||||
square = "1:1"
|
||||
|
||||
def to_resolution(self):
|
||||
@ -360,13 +362,14 @@ class VideoClipParams(BaseModel):
|
||||
text_back_color: Optional[str] = None # 文本背景色
|
||||
stroke_color: str = "black" # 描边颜色
|
||||
stroke_width: float = 1.5 # 描边宽度
|
||||
subtitle_position: str = "bottom" # top, bottom, center, custom
|
||||
subtitle_position: str = "bottom" # top, bottom, center, custom
|
||||
custom_position: float = 70.0 # 自定义位置
|
||||
|
||||
n_threads: Optional[int] = Field(default=16, description="解说语音音量") # 线程<E7BABF><E7A88B><EFBFBD>,有助于提升视频处理速度
|
||||
n_threads: Optional[int] = Field(default=16, description="线程数") # 线程数,有助于提升视频处理速度
|
||||
|
||||
tts_volume: Optional[float] = Field(default=1.0, description="解说语音音量(后处理)")
|
||||
original_volume: Optional[float] = Field(default=1.0, description="视频原声音量")
|
||||
bgm_volume: Optional[float] = Field(default=0.6, description="背景音乐音量")
|
||||
bgm_volume: Optional[float] = Field(default=0.3, description="背景音乐音量")
|
||||
|
||||
|
||||
class VideoTranscriptionRequest(BaseModel):
|
||||
|
||||
@ -6,6 +6,7 @@ class GenerateScriptRequest(BaseModel):
|
||||
video_path: str
|
||||
video_theme: Optional[str] = ""
|
||||
custom_prompt: Optional[str] = ""
|
||||
frame_interval_input: Optional[int] = 5
|
||||
skip_seconds: Optional[int] = 0
|
||||
threshold: Optional[int] = 30
|
||||
vision_batch_size: Optional[int] = 5
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -18,15 +18,14 @@ def check_ffmpeg():
|
||||
return False
|
||||
|
||||
|
||||
def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list):
|
||||
def merge_audio_files(task_id: str, total_duration: float, list_script: list):
|
||||
"""
|
||||
合并音频文件,根据OST设置处理不同的音频轨道
|
||||
合并音频文件
|
||||
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
audio_files: TTS生成的音频文件列表
|
||||
total_duration: 总时长
|
||||
list_script: 完整脚本信息,包含OST设置
|
||||
list_script: 完整脚本信息,包含duration时长和audio路径
|
||||
|
||||
Returns:
|
||||
str: 合并后的音频文件路径
|
||||
@ -39,36 +38,38 @@ def merge_audio_files(task_id: str, audio_files: list, total_duration: float, li
|
||||
# 创建一个空的音频片段
|
||||
final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位
|
||||
|
||||
# 计算每个片段的开始位置(基于duration字段)
|
||||
current_position = 0 # 初始位置(秒)
|
||||
|
||||
# 遍历脚本中的每个片段
|
||||
for segment, audio_file in zip(list_script, audio_files):
|
||||
for segment in list_script:
|
||||
try:
|
||||
# 加载TTS音频文件
|
||||
tts_audio = AudioSegment.from_file(audio_file)
|
||||
|
||||
# 获取片段的开始和结束时间
|
||||
start_time, end_time = segment['new_timestamp'].split('-')
|
||||
start_seconds = utils.time_to_seconds(start_time)
|
||||
end_seconds = utils.time_to_seconds(end_time)
|
||||
|
||||
# 根据OST设置处理音频
|
||||
if segment['OST'] == 0:
|
||||
# 只使用TTS音频
|
||||
final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000)
|
||||
elif segment['OST'] == 1:
|
||||
# 只使用原声(假设原声已经在视频中)
|
||||
continue
|
||||
elif segment['OST'] == 2:
|
||||
# 混合TTS音频和原声
|
||||
original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000)
|
||||
mixed_audio = original_audio.overlay(tts_audio)
|
||||
final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000)
|
||||
# 获取片段时长(秒)
|
||||
duration = segment['duration']
|
||||
|
||||
# 检查audio字段是否为空
|
||||
if segment['audio'] and os.path.exists(segment['audio']):
|
||||
# 加载TTS音频文件
|
||||
tts_audio = AudioSegment.from_file(segment['audio'])
|
||||
|
||||
# 将TTS音频添加到最终音频
|
||||
final_audio = final_audio.overlay(tts_audio, position=current_position * 1000)
|
||||
else:
|
||||
# audio为空,不添加音频,仅保留间隔
|
||||
logger.info(f"片段 {segment.get('timestamp', '')} 没有音频文件,保留 {duration} 秒的间隔")
|
||||
|
||||
# 更新下一个片段的开始位置
|
||||
current_position += duration
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}")
|
||||
logger.error(f"处理音频片段时出错: {str(e)}")
|
||||
# 即使处理失败,也要更新位置,确保后续片段位置正确
|
||||
if 'duration' in segment:
|
||||
current_position += segment['duration']
|
||||
continue
|
||||
|
||||
# 保存合并后的音频文件
|
||||
output_audio_path = os.path.join(utils.task_dir(task_id), "final_audio.mp3")
|
||||
output_audio_path = os.path.join(utils.task_dir(task_id), "merger_audio.mp3")
|
||||
final_audio.export(output_audio_path, format="mp3")
|
||||
logger.info(f"合并后的音频文件已保存: {output_audio_path}")
|
||||
|
||||
@ -93,7 +94,7 @@ def time_to_seconds(time_str):
|
||||
|
||||
# 分割时间部分
|
||||
parts = time_part.split(':')
|
||||
|
||||
|
||||
if len(parts) == 3: # HH:MM:SS
|
||||
h, m, s = map(int, parts)
|
||||
seconds = h * 3600 + m * 60 + s
|
||||
@ -118,11 +119,11 @@ def extract_timestamp(filename):
|
||||
# 从文件名中提取时间部分
|
||||
time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分
|
||||
start_time, end_time = time_part.split('-') # 分割成开始和结束时间
|
||||
|
||||
|
||||
# 将下划线格式转换回冒号格式
|
||||
start_time = start_time.replace('_', ':')
|
||||
end_time = end_time.replace('_', ':')
|
||||
|
||||
|
||||
# 将时间戳转换为秒
|
||||
start_seconds = time_to_seconds(start_time)
|
||||
end_seconds = time_to_seconds(end_time)
|
||||
@ -135,17 +136,36 @@ def extract_timestamp(filename):
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 示例用法
|
||||
audio_files =[
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:06-00:24.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:32-00:38.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:43-00:52.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:52-01:09.mp3",
|
||||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01:13-01:15.mp3",
|
||||
]
|
||||
total_duration = 38
|
||||
video_script_path = "/Users/apple/Desktop/home/NarratoAI/resource/scripts/test003.json"
|
||||
with open(video_script_path, "r", encoding="utf-8") as f:
|
||||
video_script = json.load(f)
|
||||
total_duration = 90
|
||||
|
||||
output_file = merge_audio_files("test456", audio_files, total_duration, video_script)
|
||||
video_script = [
|
||||
{'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!',
|
||||
'timestamp': '00:00:00-00:00:26',
|
||||
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
|
||||
'OST': 0, 'duration': 26,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3'},
|
||||
{'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:01:29',
|
||||
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
|
||||
'OST': 0, 'duration': 14,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3'},
|
||||
{'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58',
|
||||
'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
|
||||
'OST': 1, 'duration': 17,
|
||||
'audio': ''},
|
||||
{'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
|
||||
'timestamp': '00:04:58-00:05:20',
|
||||
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
|
||||
'OST': 0, 'duration': 22,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3'},
|
||||
{'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
|
||||
'timestamp': '00:05:45-00:05:53',
|
||||
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
|
||||
'OST': 0, 'duration': 8,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'},
|
||||
{'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03',
|
||||
'narration': '抓刺客',
|
||||
'OST': 1, 'duration': 3,
|
||||
'audio': ''}]
|
||||
|
||||
output_file = merge_audio_files("test456", total_duration, video_script)
|
||||
print(output_file)
|
||||
|
||||
256
app/services/clip_video.py
Normal file
256
app/services/clip_video.py
Normal file
@ -0,0 +1,256 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
'''
|
||||
@Project: NarratoAI
|
||||
@File : clip_video
|
||||
@Author : 小林同学
|
||||
@Date : 2025/5/6 下午6:14
|
||||
'''
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import json
|
||||
import hashlib
|
||||
from loguru import logger
|
||||
from typing import Dict, List, Optional
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def parse_timestamp(timestamp: str) -> tuple:
|
||||
"""
|
||||
解析时间戳字符串,返回开始和结束时间
|
||||
|
||||
Args:
|
||||
timestamp: 格式为'HH:MM:SS-HH:MM:SS'或'HH:MM:SS,sss-HH:MM:SS,sss'的时间戳字符串
|
||||
|
||||
Returns:
|
||||
tuple: (开始时间, 结束时间) 格式为'HH:MM:SS'或'HH:MM:SS,sss'
|
||||
"""
|
||||
start_time, end_time = timestamp.split('-')
|
||||
return start_time, end_time
|
||||
|
||||
|
||||
def calculate_end_time(start_time: str, duration: float, extra_seconds: float = 1.0) -> str:
|
||||
"""
|
||||
根据开始时间和持续时间计算结束时间
|
||||
|
||||
Args:
|
||||
start_time: 开始时间,格式为'HH:MM:SS'或'HH:MM:SS,sss'(带毫秒)
|
||||
duration: 持续时间,单位为秒
|
||||
extra_seconds: 额外添加的秒数,默认为1秒
|
||||
|
||||
Returns:
|
||||
str: 计算后的结束时间,格式与输入格式相同
|
||||
"""
|
||||
# 检查是否包含毫秒
|
||||
has_milliseconds = ',' in start_time
|
||||
milliseconds = 0
|
||||
|
||||
if has_milliseconds:
|
||||
time_part, ms_part = start_time.split(',')
|
||||
h, m, s = map(int, time_part.split(':'))
|
||||
milliseconds = int(ms_part)
|
||||
else:
|
||||
h, m, s = map(int, start_time.split(':'))
|
||||
|
||||
# 转换为总毫秒数
|
||||
total_milliseconds = ((h * 3600 + m * 60 + s) * 1000 + milliseconds +
|
||||
int((duration + extra_seconds) * 1000))
|
||||
|
||||
# 计算新的时、分、秒、毫秒
|
||||
ms_new = total_milliseconds % 1000
|
||||
total_seconds = total_milliseconds // 1000
|
||||
h_new = int(total_seconds // 3600)
|
||||
m_new = int((total_seconds % 3600) // 60)
|
||||
s_new = int(total_seconds % 60)
|
||||
|
||||
# 返回与输入格式一致的时间字符串
|
||||
if has_milliseconds:
|
||||
return f"{h_new:02d}:{m_new:02d}:{s_new:02d},{ms_new:03d}"
|
||||
else:
|
||||
return f"{h_new:02d}:{m_new:02d}:{s_new:02d}"
|
||||
|
||||
|
||||
def check_hardware_acceleration() -> Optional[str]:
|
||||
"""
|
||||
检查系统支持的硬件加速选项
|
||||
|
||||
Returns:
|
||||
Optional[str]: 硬件加速参数,如果不支持则返回None
|
||||
"""
|
||||
# 检查NVIDIA GPU支持
|
||||
try:
|
||||
nvidia_check = subprocess.run(
|
||||
["ffmpeg", "-hwaccel", "cuda", "-i", "/dev/null", "-f", "null", "-"],
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
|
||||
)
|
||||
if nvidia_check.returncode == 0:
|
||||
return "cuda"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 检查MacOS videotoolbox支持
|
||||
try:
|
||||
videotoolbox_check = subprocess.run(
|
||||
["ffmpeg", "-hwaccel", "videotoolbox", "-i", "/dev/null", "-f", "null", "-"],
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
|
||||
)
|
||||
if videotoolbox_check.returncode == 0:
|
||||
return "videotoolbox"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 检查Intel Quick Sync支持
|
||||
try:
|
||||
qsv_check = subprocess.run(
|
||||
["ffmpeg", "-hwaccel", "qsv", "-i", "/dev/null", "-f", "null", "-"],
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
|
||||
)
|
||||
if qsv_check.returncode == 0:
|
||||
return "qsv"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def clip_video(
|
||||
video_origin_path: str,
|
||||
tts_result: List[Dict],
|
||||
output_dir: Optional[str] = None,
|
||||
task_id: Optional[str] = None
|
||||
) -> Dict[str, str]:
|
||||
"""
|
||||
根据时间戳裁剪视频
|
||||
|
||||
Args:
|
||||
video_origin_path: 原始视频的路径
|
||||
tts_result: 包含时间戳和持续时间信息的列表
|
||||
output_dir: 输出目录路径,默认为None时会自动生成
|
||||
task_id: 任务ID,用于生成唯一的输出目录,默认为None时会自动生成
|
||||
|
||||
Returns:
|
||||
Dict[str, str]: 时间戳到裁剪后视频路径的映射
|
||||
"""
|
||||
# 检查视频文件是否存在
|
||||
if not os.path.exists(video_origin_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_origin_path}")
|
||||
|
||||
# 如果未提供task_id,则根据输入生成一个唯一ID
|
||||
if task_id is None:
|
||||
content_for_hash = f"{video_origin_path}_{json.dumps(tts_result)}"
|
||||
task_id = hashlib.md5(content_for_hash.encode()).hexdigest()
|
||||
|
||||
# 设置输出目录
|
||||
if output_dir is None:
|
||||
output_dir = os.path.join(
|
||||
os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
|
||||
"storage", "temp", "clip_video", task_id
|
||||
)
|
||||
|
||||
# 确保输出目录存在
|
||||
Path(output_dir).mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# 检查硬件加速支持
|
||||
hwaccel = check_hardware_acceleration()
|
||||
hwaccel_args = []
|
||||
if hwaccel:
|
||||
hwaccel_args = ["-hwaccel", hwaccel]
|
||||
logger.info(f"使用硬件加速: {hwaccel}")
|
||||
|
||||
# 存储裁剪结果
|
||||
result = {}
|
||||
|
||||
for item in tts_result:
|
||||
_id = item.get("_id", item.get("timestamp", "unknown"))
|
||||
timestamp = item["timestamp"]
|
||||
start_time, _ = parse_timestamp(timestamp)
|
||||
|
||||
# 根据持续时间计算真正的结束时间(加上1秒余量)
|
||||
duration = item["duration"]
|
||||
calculated_end_time = calculate_end_time(start_time, duration)
|
||||
|
||||
# 转换为FFmpeg兼容的时间格式(逗号替换为点)
|
||||
ffmpeg_start_time = start_time.replace(',', '.')
|
||||
ffmpeg_end_time = calculated_end_time.replace(',', '.')
|
||||
|
||||
# 格式化输出文件名(使用连字符替代冒号和逗号)
|
||||
safe_start_time = start_time.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = calculated_end_time.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
output_path = os.path.join(output_dir, output_filename)
|
||||
|
||||
# 构建FFmpeg命令
|
||||
ffmpeg_cmd = [
|
||||
"ffmpeg", "-y", *hwaccel_args,
|
||||
"-i", video_origin_path,
|
||||
"-ss", ffmpeg_start_time,
|
||||
"-to", ffmpeg_end_time,
|
||||
"-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264",
|
||||
"-c:a", "aac",
|
||||
"-strict", "experimental",
|
||||
output_path
|
||||
]
|
||||
|
||||
# 执行FFmpeg命令
|
||||
try:
|
||||
logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}到{ffmpeg_end_time}")
|
||||
# logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
|
||||
|
||||
process = subprocess.run(
|
||||
ffmpeg_cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=True
|
||||
)
|
||||
|
||||
result[_id] = output_path
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"裁剪视频片段失败: {timestamp}")
|
||||
logger.error(f"错误信息: {e.stderr}")
|
||||
raise RuntimeError(f"视频裁剪失败: {e.stderr}")
|
||||
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
video_origin_path = "/Users/apple/Desktop/home/NarratoAI/resource/videos/qyn2-2无片头片尾.mp4"
|
||||
|
||||
tts_result = [{'timestamp': '00:00:00-00:01:15',
|
||||
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
|
||||
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
|
||||
'duration': 25.55,
|
||||
'text': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!'},
|
||||
{'timestamp': '00:01:15-00:04:40',
|
||||
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
|
||||
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
|
||||
'duration': 13.488,
|
||||
'text': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…'},
|
||||
{'timestamp': '00:04:58-00:05:45',
|
||||
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
|
||||
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
|
||||
'duration': 21.363,
|
||||
'text': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!'},
|
||||
{'timestamp': '00:05:45-00:06:00',
|
||||
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
|
||||
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
|
||||
'duration': 7.675, 'text': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!'}]
|
||||
subclip_path_videos = {
|
||||
'00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-00-00-00-01-15.mp4',
|
||||
'00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-01-15-00-04-40.mp4',
|
||||
'00:04:41-00:04:58': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4',
|
||||
'00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-58-00-05-45.mp4',
|
||||
'00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-05-45-00-06-00.mp4',
|
||||
'00:06:00-00:06:03': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4',
|
||||
}
|
||||
|
||||
# 使用方法示例
|
||||
try:
|
||||
result = clip_video(video_origin_path, tts_result, subclip_path_videos)
|
||||
print("裁剪结果:")
|
||||
print(json.dumps(result, indent=4, ensure_ascii=False))
|
||||
except Exception as e:
|
||||
print(f"发生错误: {e}")
|
||||
264
app/services/generate_narration_script.py
Normal file
264
app/services/generate_narration_script.py
Normal file
@ -0,0 +1,264 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
'''
|
||||
@Project: NarratoAI
|
||||
@File : 生成介绍文案
|
||||
@Author : 小林同学
|
||||
@Date : 2025/5/8 上午11:33
|
||||
'''
|
||||
|
||||
import json
|
||||
import os
|
||||
import traceback
|
||||
from openai import OpenAI
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def parse_frame_analysis_to_markdown(json_file_path):
|
||||
"""
|
||||
解析视频帧分析JSON文件并转换为Markdown格式
|
||||
|
||||
:param json_file_path: JSON文件路径
|
||||
:return: Markdown格式的字符串
|
||||
"""
|
||||
# 检查文件是否存在
|
||||
if not os.path.exists(json_file_path):
|
||||
return f"错误: 文件 {json_file_path} 不存在"
|
||||
|
||||
try:
|
||||
# 读取JSON文件
|
||||
with open(json_file_path, 'r', encoding='utf-8') as file:
|
||||
data = json.load(file)
|
||||
|
||||
# 初始化Markdown字符串
|
||||
markdown = ""
|
||||
|
||||
# 获取总结和帧观察数据
|
||||
summaries = data.get('overall_activity_summaries', [])
|
||||
frame_observations = data.get('frame_observations', [])
|
||||
|
||||
# 按批次组织数据
|
||||
batch_frames = {}
|
||||
for frame in frame_observations:
|
||||
batch_index = frame.get('batch_index')
|
||||
if batch_index not in batch_frames:
|
||||
batch_frames[batch_index] = []
|
||||
batch_frames[batch_index].append(frame)
|
||||
|
||||
# 生成Markdown内容
|
||||
for i, summary in enumerate(summaries, 1):
|
||||
batch_index = summary.get('batch_index')
|
||||
time_range = summary.get('time_range', '')
|
||||
batch_summary = summary.get('summary', '')
|
||||
|
||||
markdown += f"## 片段 {i}\n"
|
||||
markdown += f"- 时间范围:{time_range}\n"
|
||||
|
||||
# 添加片段描述
|
||||
markdown += f"- 片段描述:{batch_summary}\n" if batch_summary else f"- 片段描述:\n"
|
||||
|
||||
markdown += "- 详细描述:\n"
|
||||
|
||||
# 添加该批次的帧观察详情
|
||||
frames = batch_frames.get(batch_index, [])
|
||||
for frame in frames:
|
||||
timestamp = frame.get('timestamp', '')
|
||||
observation = frame.get('observation', '')
|
||||
|
||||
# 直接使用原始文本,不进行分割
|
||||
markdown += f" - {timestamp}: {observation}\n" if observation else f" - {timestamp}: \n"
|
||||
|
||||
markdown += "\n"
|
||||
|
||||
return markdown
|
||||
|
||||
except Exception as e:
|
||||
return f"处理JSON文件时出错: {traceback.format_exc()}"
|
||||
|
||||
|
||||
def generate_narration(markdown_content, api_key, base_url, model):
|
||||
"""
|
||||
调用OpenAI API根据视频帧分析的Markdown内容生成解说文案
|
||||
|
||||
:param markdown_content: Markdown格式的视频帧分析内容
|
||||
:param api_key: OpenAI API密钥
|
||||
:param base_url: API基础URL,如果使用非官方API
|
||||
:param model: 使用的模型名称
|
||||
:return: 生成的解说文案
|
||||
"""
|
||||
try:
|
||||
# 构建提示词
|
||||
prompt = """
|
||||
我是一名荒野建造解说的博主,以下是一些同行的对标文案,请你深度学习并总结这些文案的风格特点跟内容特点:
|
||||
|
||||
<example_text_1>
|
||||
解压助眠的天花板就是荒野建造,沉浸丝滑的搭建过程可以说每一帧都是极致享受,我保证强迫症来了都找不出一丁点毛病。更别说全屋严丝合缝的拼接工艺,还能轻松抵御零下二十度气温,让你居住的每一天都温暖如春。
|
||||
在家闲不住的西姆今天也打算来一次野外建造,行走没多久他就发现许多倒塌的树,任由它们自生自灭不如将其利用起来。想到这他就开始挥舞铲子要把地基挖掘出来,虽然每次只能挖一点点,但架不住他体能惊人。没多长时间一个 2x3 的深坑就赫然出现,这深度住他一人绰绰有余。
|
||||
随后他去附近收集来原木,这些都是搭建墙壁的最好材料。而在投入使用前自然要把表皮刮掉,防止森林中的白蚁蛀虫。处理好一大堆后西姆还在两端打孔,使用木钉固定在一起。这可不是用来做墙壁的,而是做庇护所的承重柱。只要木头间的缝隙足够紧密,那搭建出的木屋就能足够坚固。
|
||||
每向上搭建一层,他都会在中间塞入苔藓防寒,保证不会泄露一丝热量。其他几面也是用相同方法,很快西姆就做好了三面墙壁,每一根木头都极其工整,保证强迫症来了都要点个赞再走。
|
||||
在继续搭建墙壁前西姆决定将壁炉制作出来,毕竟森林夜晚的气温会很低,保暖措施可是重中之重。完成后他找来一块大树皮用来充当庇护所的大门,而上面刮掉的木屑还能作为壁炉的引火物,可以说再完美不过。
|
||||
测试了排烟没问题后他才开始搭建最后一面墙壁,这一面要预留门和窗,所以在搭建到一半后还需要在原木中间开出卡口,让自己劈砍时能轻松许多。此时只需将另外一根如法炮制,两端拼接在一起后就是一扇大小适中的窗户。而随着随后一层苔藓铺好,最后一根原木落位,这个庇护所的雏形就算完成。
|
||||
大门的安装他没选择用合页,而是在底端雕刻出榫头,门框上则雕刻出榫眼,只能说西姆的眼就是一把尺,这完全就是严丝合缝。此时他才开始搭建屋顶。这里西姆用的方法不同,他先把最外围的原木固定好,随后将原木平铺在上面,就能得到完美的斜面屋顶。等他将四周的围栏也装好后,工整的屋顶看起来十分舒服,西姆躺上去都不想动。
|
||||
稍作休息后,他利用剩余的苔藓,对屋顶的缝隙处密封。可这样西姆觉得不够保险,于是他找来一些黏土,再次对原本的缝隙二次加工,保管这庇护所冬天也暖和。最后只需要平铺上枯叶,以及挖掘出的泥土,整个屋顶就算完成。
|
||||
考虑到庇护所的美观性,自然少不了覆盖上苔藓,翠绿的颜色看起来十分舒服。就连门口的庭院旁,他都移植了许多小树做点缀,让这木屋与周边环境融为一体。西姆才刚完成好这件事,一场大雨就骤然降临。好在此时的他已经不用淋雨,更别说这屋顶防水十分不错,室内没一点雨水渗透进来。
|
||||
等待温度回升的过程,西姆利用墙壁本身的凹槽,把床框镶嵌在上面,只需要铺上苔藓,以及自带的床单枕头,一张完美的单人床就做好。辛苦劳作一整天,西姆可不会亏待自己。他将自带的牛肉腌制好后,直接放到壁炉中烤,只需要等待三十分钟,就能享受这美味的一顿。
|
||||
在辛苦建造一星期后,他终于可以在自己搭建的庇护所中,享受最纯正的野外露营。后面西姆回家补给了一堆物资,再次回来时森林已经大雪纷飞,让他原本翠绿的小屋,更换上了冬季限定皮肤。好在内部设施没受什么影响,和他离开时一样整洁。
|
||||
就是房间中已经没多少柴火,让西姆今天又得劈柴。寒冷干燥的天气,让木头劈起来十分轻松。没多久他就收集到一大堆,这些足够燃烧好几天。虽然此时外面大雪纷飞,但小屋中却开始逐渐温暖。这次他除了带来一些食物外,还有几瓶调味料,以及一整套被褥,让自己的居住舒适度提高一大截。
|
||||
而秋天他有收集干草的缘故,只需要塞入枕套中密封起来,就能作为靠垫用。就这居住条件,比一般人在家过的还要奢侈。趁着壁炉木头变木炭的过程,西姆则开始不紧不慢的处理食物。他取出一块牛排,改好花刀以后,撒上一堆调料腌制起来。接着用锡纸包裹好,放到壁炉中直接炭烤,搭配上自带的红酒,是一个非常好的选择。
|
||||
随着时间来到第二天,外面的积雪融化了不少,西姆简单做顿煎蛋补充体力后,决定制作一个室外篝火堆,用来晚上驱散周边野兽。搭建这玩意没什么技巧,只需要找到一大堆木棍,利用大树的夹缝将其掰弯,然后将其堆积在一起,就是一个简易版的篝火堆。看这外形有点像帐篷,好在西姆没想那么多。
|
||||
等待天色暗淡下来后,他才来到室外将其点燃,顺便处理下多余的废料。只可惜这场景没朋友陪在身边,对西姆来说可能是个遗憾。而哪怕森林只有他一个人,都依旧做了好几个小时。等到里面的篝火彻底燃尽后,西姆还找来雪球,覆盖到上面将火熄灭,这防火意识可谓十分好。最后在室内二十五度的高温下,裹着被子睡觉。
|
||||
</example_text_1>
|
||||
|
||||
<example_text_2>
|
||||
解压助眠的天花板就是荒野建造,沉浸丝滑的搭建过程每一帧都是极致享受,全屋严丝合缝的拼接工艺,能轻松抵御零下二十度气温,居住体验温暖如春。
|
||||
在家闲不住的西姆开启野外建造。他发现倒塌的树,决定加以利用。先挖掘出 2x3 的深坑作为地基,接着收集原木,刮掉表皮防白蚁蛀虫,打孔用木钉固定制作承重柱。搭建墙壁时,每一层都塞入苔藓防寒,很快做好三面墙。
|
||||
为应对森林夜晚低温,西姆制作壁炉,用大树皮当大门,刮下的木屑做引火物。搭建最后一面墙时预留门窗,通过在原木中间开口拼接做出窗户。大门采用榫卯结构安装,严丝合缝。
|
||||
搭建屋顶时,先固定外围原木,再平铺原木形成斜面屋顶,之后用苔藓、黏土密封缝隙,铺上枯叶和泥土。为美观,在木屋覆盖苔藓,移植小树点缀。完工时遇大雨,木屋防水良好。
|
||||
西姆利用墙壁凹槽镶嵌床框,铺上苔藓、床单枕头做成床。劳作一天后,他用壁炉烤牛肉享用。建造一星期后,他开始野外露营。
|
||||
后来西姆回家补给物资,回来时森林大雪纷飞。他劈柴储备,带回食物、调味料和被褥,提高居住舒适度,还用干草做靠垫。他用壁炉烤牛排,搭配红酒。
|
||||
第二天,积雪融化,西姆制作室外篝火堆防野兽。用大树夹缝掰弯木棍堆积而成,晚上点燃处理废料,结束后用雪球灭火,最后在室内二十五度的环境中裹被入睡。
|
||||
</example_text_2>
|
||||
|
||||
<example_text_3>
|
||||
如果战争到来,这个深埋地下十几米的庇护所绝对是 bug 般的存在。即使被敌人发现,还能通过快速通道一秒逃出。里面不仅有竹子、地暖、地下水井,还自制抽水机。在解决用水问题的同时,甚至自研无土栽培技术,过上完全自给自足的生活。
|
||||
阿伟的老婆美如花,但阿伟从来不回家,来到野外他乐哈哈,一言不合就开挖。众所周知当战争来临时,地下堡垒的安全性是最高的。阿伟苦苦研习两载半,只为练就一身挖洞本领。在这双逆天麒麟臂的加持下,如此坚硬的泥土都只能当做炮灰。
|
||||
得到了充足的空间后,他便开始对这些边缘进行打磨。随后阿伟将细线捆在木棍上,以此描绘出圆柱的轮廓。接着再一点点铲掉多余的部分。虽然是由泥土一体式打造,但这样的桌子保准用上千年都不成问题。
|
||||
考虑到十几米的深度进出非常不方便,于是阿伟找来两根长达 66.6 米的木头,打算为庇护所打造一条快速通道。只见他将木桩牢牢地插入地下,并顺着洞口的方向延伸出去,直到贯穿整个山洞。接着在每个木桩的连接处钉入铁钉,确保轨道不能有一毫米的偏差。完成后再制作一个木质框架,从而达到前后滑动的效果。
|
||||
不得不说阿伟这手艺简直就是大钢管子杵青蛙。在上面放上一个木制的车斗,还能加快搬运泥土的速度。没多久庇护所的内部就已经初见雏形。为了住起来更加舒适,还需要为自己打造一张床。虽然深处的泥土同样很坚固,但好处就是不用担心垮塌的风险。
|
||||
阿伟不仅设计了更加符合人体工学的拱形,并且还在一旁雕刻处壁龛。就是这氛围怎么看着有点不太吉利。别看阿伟一身腱子肉,但这身体里的艺术细菌可不少。每个边缘的地方他都做了精雕细琢,瞬间让整个卧室的颜值提升一大截。
|
||||
住在地下的好处就是房子面积全靠挖,每平方消耗两个半馒头。不仅没有了房贷的压力,就连买墓地的钱也省了。阿伟将中间的墙壁挖空,从而得到取暖的壁炉。当然最重要的还有排烟问题,要想从上往下打通十几米的山体是件极其困难的事。好在阿伟年轻时报过忆坤年的古墓派补习班,这打洞技术堪比隔壁学校的土拨鼠专业。虽然深度长达十几米,但排烟效果却一点不受影响,一个字专业!
|
||||
随后阿伟继续对壁炉底部雕刻,打通了底部放柴火的空间,并制作出放锅的灶头。完成后阿伟从侧面将壁炉打通,并制作出一条导热的通道,以此连接到床铺的位置。毕竟住在这么一个风湿宝地,不注意保暖除湿很容易得老寒腿。
|
||||
阿伟在床面上挖出一条条管道,以便于温度能传输到床的每个角落。接下来就可以根据这些通道的长度裁切出同样长短的竹子,根据竹筒的大小凿出相互连接的孔洞,最后再将竹筒内部打通,以达到温度传送的效果。
|
||||
而后阿伟将这些管道安装到凹槽内,在他严谨的制作工艺下,每根竹子刚好都能镶嵌进去。在铺设床面之前还需要用木塞把圆孔堵住,防止泥土掉落进管道。泥土虽然不能隔绝湿气,但却是十分优良的导热材料。等他把床面都压平后就可以小心的将这些木塞拔出来,最后再用黏土把剩余的管道也遮盖起来,直到整个墙面恢复原样。
|
||||
接下来还需要测试一下加热效果,当他把火点起来后,温度很快就传送到了管道内,把火力一点点加大,直到热气流淌到更远的床面。随着小孔里的青烟冒出,也预示着阿伟的地暖可以投入使用。而后阿伟制作了一些竹条,并用细绳将它们喜结连理。
|
||||
千里之行始于足下,美好的家园要靠自己双手打造。明明可以靠才艺吃饭的阿伟偏偏要用八块腹肌征服大家,就问这样的男人哪个野生婆娘不喜欢?完成后阿伟还用自己 35 码的大腚感受了一下,真烫!
|
||||
随后阿伟来到野区找到一根上好的雷击木,他当即就把木头咔嚓成两段,并取下两节较为完整的带了回去,刚好能和圆桌配套。另外一个在里面凿出凹槽,并插入木棍连接,得到一个夯土的木锤。住过农村的小伙伴都知道,这样夯出来的地面堪比水泥地,不仅坚硬耐磨,还不用担心脚底打滑。忙碌了一天的阿伟已经饥渴难耐,拿出野生小烤肠,安安心心住新房,光脚爬上大热炕,一觉能睡到天亮。
|
||||
第二天阿伟打算将房间扩宽,毕竟吃住的地方有了,还要解决个人卫生的问题。阿伟在另一侧增加了一个房间,他打算将这里打造成洗澡的地方。为了防止泥土垮塌,他将顶部做成圆弧形,等挖出足够的空间后,旁边的泥土已经堆成了小山。
|
||||
为了方便清理这些泥土,阿伟在之前的轨道增加了转弯,交接处依然是用铁钉固定,一直延伸到房间的最里面。有了运输车的帮助,这些成吨的泥土也能轻松的运送出去,并且还能体验过山车的感觉。很快他就完成了清理工作。
|
||||
为了更方便的在里面洗澡,他将底部一点点挖空,这么大的浴缸,看来阿伟并不打算一个人住。完成后他将墙面雕刻的凹凸有致,让这里看起来更加豪华。接着用洛阳铲挖出排水口,并用一根相同大小的竹筒作为开关。
|
||||
由于四周都是泥土还不能防水,阿伟特意找了一些白蚁巢,用来制作可以防水的野生水泥。现在就可以将里里外外,能接触到水的地方都涂抹一遍。细心的阿伟还找来这种 500 克一斤的鹅卵石,对池子表面进行装饰。
|
||||
没错,水源问题阿伟早已经考虑在内,他打算直接在旁边挖个水井,毕竟已经挖了这么深,再向下挖一挖,应该就能到达地下水的深度。经过几日的奋战,能看得出阿伟已经消瘦了不少,但一想到马上就能拥有的豪宅,他直接化身为无情的挖土机器,很快就挖到了好几米的深度。
|
||||
考虑到自己的弹跳力有限,阿伟在一旁定入木桩,然后通过绳子爬上爬下。随着深度越来越深,井底已经开始渗出水来,这也预示着打井成功。没多久这里面将渗满泉水,仅凭一次就能挖到水源,看来这里还真是块风湿宝地。
|
||||
随后阿伟在井口四周挖出凹槽,以便于井盖的安置。这一量才知道,井的深度已经达到了足足的 5 米。阿伟把木板组合在一起,再沿着标记切掉多余部分,他甚至还给井盖做了把手。可是如何从这么深的井里打水还是个问题,但从阿伟坚定的眼神来看,他应该想到了解决办法。
|
||||
只见他将树桩锯成两半,然后用凿子把里面一点点掏空,另外一半也是如法炮制。接着还要在底部挖出圆孔,要想成功将水从 5 米深的地方抽上来,那就不得不提到大家熟知的勾股定理。没错,这跟勾股定理没什么关系。
|
||||
阿伟给竹筒做了一个木塞,并在里面打上安装连接轴的孔。为了增加密闭性,阿伟不得不牺牲了自己的 AJ,剪出与木塞相同的大小后,再用木钉固定住。随后他收集了一些树胶,并放到火上加热融化。接下来就可以涂在木塞上增加使用寿命。
|
||||
现在将竹筒组装完成,就可以利用虹吸原理将水抽上来。完成后就可以把井盖盖上去,再用泥土在上面覆盖,现在就不用担心失足掉下去了。
|
||||
接下来阿伟去采集了一些大漆,将它涂抹在木桶接缝处,就能将其二合为一。完了再接入旁边浴缸的入水口,每个连接的地方都要做好密封,不然后面很容易漏水。随后就可以安装上活塞,并用一根木桩作为省力杠杆,根据空气压强的原理将井水抽上来。
|
||||
经过半小时的来回拉扯,硕大的浴缸终于被灌满,阿伟也是忍不住洗了把脸。接下来还需要解决排水的问题,阿伟在地上挖出沟渠,一直贯穿到屋外,然后再用竹筒从出水口连接,每个接口处都要抹上胶水,就连门外的出水口他都做了隐藏。
|
||||
在野外最重要的就是庇护所、水源还有食物。既然已经完成了前二者,那么阿伟还需要拥有可持续发展的食物来源。他先是在地上挖了两排地洞,然后在每根竹筒的表面都打上无数孔洞,这就是他打算用来种植的载体。在此之前,还需要用大火对竹筒进行杀菌消毒。
|
||||
趁着这时候,他去搬了一麻袋的木屑,先用芭蕉叶覆盖在上面,再铺上厚厚的黏土隔绝温度。在火焰的温度下,能让里面的木屑达到生长条件。
|
||||
等到第二天所有材料都晾凉后,阿伟才将竹筒内部掏空,并将木屑一点点地塞入竹筒。一切准备就绪,就可以将竹筒插入提前挖好的地洞。最后再往竹筒里塞入种子,依靠房间内的湿度和温度,就能达到大棚种植的效果。稍加时日,这些种子就会慢慢发芽。
|
||||
虽然暂时还吃不上自己培养的食物,但好在阿伟从表哥贺强那里学到不少钓鱼本领,哪怕只有一根小小的竹竿,也能让他钓上两斤半的大鲶鱼。新鲜的食材,那肯定是少不了高温消毒的过程。趁着鱼没熟,阿伟直接爬进浴缸,冰凉的井水瞬间洗去了身上的疲惫。这一刻的阿伟是无比的享受。
|
||||
不久后鱼也烤得差不多了,阿伟的生活现在可以说是有滋有味。住在十几米的地下,不仅能安全感满满,哪怕遇到危险,还能通过轨道快速逃生。
|
||||
<example_text_3>
|
||||
|
||||
<video_frame_description>
|
||||
%s
|
||||
</video_frame_description>
|
||||
|
||||
我正在尝试做这个内容的解说纪录片视频,我需要你以 <video_frame_description> </video_frame_description> 中的内容为解说目标,根据我刚才提供给你的对标文案 <example_text> 特点,以及你总结的特点,帮我生成一段关于荒野建造的解说文案,文案需要符合平台受欢迎的解说风格,请使用 json 格式进行输出;使用 <output> 中的输出格式:
|
||||
|
||||
<output>
|
||||
{
|
||||
"items": [
|
||||
{
|
||||
"_id": 1, # 唯一递增id
|
||||
"timestamp": "00:00:05,390-00:00:10,430",
|
||||
"picture": "画面描述",
|
||||
"narration": "解说文案",
|
||||
}
|
||||
}
|
||||
</output>
|
||||
|
||||
<restriction>
|
||||
1. 只输出 json 内容,不要输出其他任何说明性的文字
|
||||
2. 解说文案的语言使用 简体中文
|
||||
3. 严禁虚构画面,所有画面只能从 <video_frame_description> 中摘取
|
||||
</restriction>
|
||||
""" % (markdown_content)
|
||||
|
||||
# 使用OpenAI SDK初始化客户端
|
||||
client = OpenAI(
|
||||
api_key=api_key,
|
||||
base_url=base_url
|
||||
)
|
||||
|
||||
# 使用SDK发送请求
|
||||
if model not in ["deepseek-reasoner"]:
|
||||
# deepseek-reasoner 不支持 json 输出
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": "你是一名专业的短视频解说文案撰写专家。"},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=1.5,
|
||||
response_format={"type": "json_object"},
|
||||
)
|
||||
# 提取生成的文案
|
||||
if response.choices and len(response.choices) > 0:
|
||||
narration_script = response.choices[0].message.content
|
||||
# 打印消耗的tokens
|
||||
logger.debug(f"消耗的tokens: {response.usage.total_tokens}")
|
||||
return narration_script
|
||||
else:
|
||||
return "生成解说文案失败: 未获取到有效响应"
|
||||
else:
|
||||
# 不支持 json 输出,需要多一步处理 ```json ``` 的步骤
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{"role": "system", "content": "你是一名专业的短视频解说文案撰写专家。"},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
temperature=1.5,
|
||||
)
|
||||
# 提取生成的文案
|
||||
if response.choices and len(response.choices) > 0:
|
||||
narration_script = response.choices[0].message.content
|
||||
# 打印消耗的tokens
|
||||
logger.debug(f"文案消耗的tokens: {response.usage.total_tokens}")
|
||||
# 清理 narration_script 字符串前后的 ```json ``` 字符串
|
||||
narration_script = narration_script.replace("```json", "").replace("```", "")
|
||||
return narration_script
|
||||
else:
|
||||
return "生成解说文案失败: 未获取到有效响应"
|
||||
|
||||
except Exception as e:
|
||||
return f"调用API生成解说文案时出错: {traceback.format_exc()}"
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
text_provider = 'openai'
|
||||
text_api_key = "sk-xxx"
|
||||
text_model = "deepseek-reasoner"
|
||||
text_base_url = "https://api.deepseek.com"
|
||||
video_frame_description_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1139.json"
|
||||
|
||||
# 测试新的JSON文件
|
||||
test_file_path = "/Users/apple/Desktop/home/NarratoAI/storage/temp/analysis/frame_analysis_20250508_1458.json"
|
||||
markdown_output = parse_frame_analysis_to_markdown(test_file_path)
|
||||
# print(markdown_output)
|
||||
|
||||
# 输出到文件以便检查格式
|
||||
output_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/narration_script.md"
|
||||
with open(output_file, 'w', encoding='utf-8') as f:
|
||||
f.write(markdown_output)
|
||||
# print(f"\n已将Markdown输出保存到: {output_file}")
|
||||
|
||||
# 生成解说文案
|
||||
narration = generate_narration(
|
||||
markdown_output,
|
||||
text_api_key,
|
||||
base_url=text_base_url,
|
||||
model=text_model
|
||||
)
|
||||
|
||||
# 保存解说文案
|
||||
print(narration)
|
||||
print(type(narration))
|
||||
narration_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/final_narration_script.json"
|
||||
with open(narration_file, 'w', encoding='utf-8') as f:
|
||||
f.write(narration)
|
||||
print(f"\n已将解说文案保存到: {narration_file}")
|
||||
393
app/services/generate_video.py
Normal file
393
app/services/generate_video.py
Normal file
@ -0,0 +1,393 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
'''
|
||||
@Project: NarratoAI
|
||||
@File : generate_video
|
||||
@Author : 小林同学
|
||||
@Date : 2025/5/7 上午11:55
|
||||
'''
|
||||
|
||||
import os
|
||||
import traceback
|
||||
from typing import Optional, Dict, Any
|
||||
from loguru import logger
|
||||
from moviepy import (
|
||||
VideoFileClip,
|
||||
AudioFileClip,
|
||||
CompositeAudioClip,
|
||||
CompositeVideoClip,
|
||||
TextClip,
|
||||
afx
|
||||
)
|
||||
from moviepy.video.tools.subtitles import SubtitlesClip
|
||||
from PIL import ImageFont
|
||||
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def merge_materials(
|
||||
video_path: str,
|
||||
audio_path: str,
|
||||
output_path: str,
|
||||
subtitle_path: Optional[str] = None,
|
||||
bgm_path: Optional[str] = None,
|
||||
options: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""
|
||||
合并视频、音频、BGM和字幕素材生成最终视频
|
||||
|
||||
参数:
|
||||
video_path: 视频文件路径
|
||||
audio_path: 音频文件路径
|
||||
output_path: 输出文件路径
|
||||
subtitle_path: 字幕文件路径,可选
|
||||
bgm_path: 背景音乐文件路径,可选
|
||||
options: 其他选项配置,可包含以下字段:
|
||||
- voice_volume: 人声音量,默认1.0
|
||||
- bgm_volume: 背景音乐音量,默认0.3
|
||||
- original_audio_volume: 原始音频音量,默认0.0
|
||||
- keep_original_audio: 是否保留原始音频,默认False
|
||||
- subtitle_font: 字幕字体,默认None,系统会使用默认字体
|
||||
- subtitle_font_size: 字幕字体大小,默认40
|
||||
- subtitle_color: 字幕颜色,默认白色
|
||||
- subtitle_bg_color: 字幕背景颜色,默认透明
|
||||
- subtitle_position: 字幕位置,可选值'bottom', 'top', 'center',默认'bottom'
|
||||
- custom_position: 自定义位置
|
||||
- stroke_color: 描边颜色,默认黑色
|
||||
- stroke_width: 描边宽度,默认1
|
||||
- threads: 处理线程数,默认2
|
||||
- fps: 输出帧率,默认30
|
||||
|
||||
返回:
|
||||
输出视频的路径
|
||||
"""
|
||||
# 合并选项默认值
|
||||
if options is None:
|
||||
options = {}
|
||||
|
||||
# 设置默认参数值
|
||||
voice_volume = options.get('voice_volume', 1.0)
|
||||
bgm_volume = options.get('bgm_volume', 0.3)
|
||||
original_audio_volume = options.get('original_audio_volume', 0.0) # 默认为0,即不保留原声
|
||||
keep_original_audio = options.get('keep_original_audio', False) # 是否保留原声
|
||||
subtitle_font = options.get('subtitle_font', '')
|
||||
subtitle_font_size = options.get('subtitle_font_size', 40)
|
||||
subtitle_color = options.get('subtitle_color', '#FFFFFF')
|
||||
subtitle_bg_color = options.get('subtitle_bg_color', 'transparent')
|
||||
subtitle_position = options.get('subtitle_position', 'bottom')
|
||||
custom_position = options.get('custom_position', 70)
|
||||
stroke_color = options.get('stroke_color', '#000000')
|
||||
stroke_width = options.get('stroke_width', 1)
|
||||
threads = options.get('threads', 2)
|
||||
fps = options.get('fps', 30)
|
||||
|
||||
# 处理透明背景色问题 - MoviePy 2.1.1不支持'transparent'值
|
||||
if subtitle_bg_color == 'transparent':
|
||||
subtitle_bg_color = None # None在新版MoviePy中表示透明背景
|
||||
|
||||
# 创建输出目录(如果不存在)
|
||||
output_dir = os.path.dirname(output_path)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
logger.info(f"开始合并素材...")
|
||||
logger.info(f" ① 视频: {video_path}")
|
||||
logger.info(f" ② 音频: {audio_path}")
|
||||
if subtitle_path:
|
||||
logger.info(f" ③ 字幕: {subtitle_path}")
|
||||
if bgm_path:
|
||||
logger.info(f" ④ 背景音乐: {bgm_path}")
|
||||
logger.info(f" ⑤ 输出: {output_path}")
|
||||
|
||||
# 加载视频
|
||||
try:
|
||||
video_clip = VideoFileClip(video_path)
|
||||
logger.info(f"视频尺寸: {video_clip.size[0]}x{video_clip.size[1]}, 时长: {video_clip.duration}秒")
|
||||
|
||||
# 提取视频原声(如果需要)
|
||||
original_audio = None
|
||||
if keep_original_audio and original_audio_volume > 0:
|
||||
try:
|
||||
original_audio = video_clip.audio
|
||||
if original_audio:
|
||||
original_audio = original_audio.with_effects([afx.MultiplyVolume(original_audio_volume)])
|
||||
logger.info(f"已提取视频原声,音量设置为: {original_audio_volume}")
|
||||
else:
|
||||
logger.warning("视频没有音轨,无法提取原声")
|
||||
except Exception as e:
|
||||
logger.error(f"提取视频原声失败: {str(e)}")
|
||||
original_audio = None
|
||||
|
||||
# 移除原始音轨,稍后会合并新的音频
|
||||
video_clip = video_clip.without_audio()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"加载视频失败: {str(e)}")
|
||||
raise
|
||||
|
||||
# 处理背景音乐和所有音频轨道合成
|
||||
audio_tracks = []
|
||||
|
||||
# 先添加主音频(配音)
|
||||
if audio_path and os.path.exists(audio_path):
|
||||
try:
|
||||
voice_audio = AudioFileClip(audio_path).with_effects([afx.MultiplyVolume(voice_volume)])
|
||||
audio_tracks.append(voice_audio)
|
||||
logger.info(f"已添加配音音频,音量: {voice_volume}")
|
||||
except Exception as e:
|
||||
logger.error(f"加载配音音频失败: {str(e)}")
|
||||
|
||||
# 添加原声(如果需要)
|
||||
if original_audio is not None:
|
||||
audio_tracks.append(original_audio)
|
||||
logger.info(f"已添加视频原声,音量: {original_audio_volume}")
|
||||
|
||||
# 添加背景音乐(如果有)
|
||||
if bgm_path and os.path.exists(bgm_path):
|
||||
try:
|
||||
bgm_clip = AudioFileClip(bgm_path).with_effects([
|
||||
afx.MultiplyVolume(bgm_volume),
|
||||
afx.AudioFadeOut(3),
|
||||
afx.AudioLoop(duration=video_clip.duration),
|
||||
])
|
||||
audio_tracks.append(bgm_clip)
|
||||
logger.info(f"已添加背景音乐,音量: {bgm_volume}")
|
||||
except Exception as e:
|
||||
logger.error(f"添加背景音乐失败: \n{traceback.format_exc()}")
|
||||
|
||||
# 合成最终的音频轨道
|
||||
if audio_tracks:
|
||||
final_audio = CompositeAudioClip(audio_tracks)
|
||||
video_clip = video_clip.with_audio(final_audio)
|
||||
logger.info(f"已合成所有音频轨道,共{len(audio_tracks)}个")
|
||||
else:
|
||||
logger.warning("没有可用的音频轨道,输出视频将没有声音")
|
||||
|
||||
# 处理字体路径
|
||||
font_path = None
|
||||
if subtitle_path and subtitle_font:
|
||||
font_path = os.path.join(utils.font_dir(), subtitle_font)
|
||||
if os.name == "nt":
|
||||
font_path = font_path.replace("\\", "/")
|
||||
logger.info(f"使用字体: {font_path}")
|
||||
|
||||
# 处理视频尺寸
|
||||
video_width, video_height = video_clip.size
|
||||
|
||||
# 字幕处理函数
|
||||
def create_text_clip(subtitle_item):
|
||||
"""创建单个字幕片段"""
|
||||
phrase = subtitle_item[1]
|
||||
max_width = video_width * 0.9
|
||||
|
||||
# 如果有字体路径,进行文本换行处理
|
||||
wrapped_txt = phrase
|
||||
txt_height = 0
|
||||
if font_path:
|
||||
wrapped_txt, txt_height = wrap_text(
|
||||
phrase,
|
||||
max_width=max_width,
|
||||
font=font_path,
|
||||
fontsize=subtitle_font_size
|
||||
)
|
||||
|
||||
# 创建文本片段
|
||||
try:
|
||||
_clip = TextClip(
|
||||
text=wrapped_txt,
|
||||
font=font_path,
|
||||
font_size=subtitle_font_size,
|
||||
color=subtitle_color,
|
||||
bg_color=subtitle_bg_color, # 这里已经在前面处理过,None表示透明
|
||||
stroke_color=stroke_color,
|
||||
stroke_width=stroke_width,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"创建字幕片段失败: {str(e)}, 使用简化参数重试")
|
||||
# 如果上面的方法失败,尝试使用更简单的参数
|
||||
_clip = TextClip(
|
||||
text=wrapped_txt,
|
||||
font=font_path,
|
||||
font_size=subtitle_font_size,
|
||||
color=subtitle_color,
|
||||
)
|
||||
|
||||
# 设置字幕时间
|
||||
duration = subtitle_item[0][1] - subtitle_item[0][0]
|
||||
_clip = _clip.with_start(subtitle_item[0][0])
|
||||
_clip = _clip.with_end(subtitle_item[0][1])
|
||||
_clip = _clip.with_duration(duration)
|
||||
|
||||
# 设置字幕位置
|
||||
if subtitle_position == "bottom":
|
||||
_clip = _clip.with_position(("center", video_height * 0.95 - _clip.h))
|
||||
elif subtitle_position == "top":
|
||||
_clip = _clip.with_position(("center", video_height * 0.05))
|
||||
elif subtitle_position == "custom":
|
||||
margin = 10
|
||||
max_y = video_height - _clip.h - margin
|
||||
min_y = margin
|
||||
custom_y = (video_height - _clip.h) * (custom_position / 100)
|
||||
custom_y = max(
|
||||
min_y, min(custom_y, max_y)
|
||||
)
|
||||
_clip = _clip.with_position(("center", custom_y))
|
||||
else: # center
|
||||
_clip = _clip.with_position(("center", "center"))
|
||||
|
||||
return _clip
|
||||
|
||||
# 创建TextClip工厂函数
|
||||
def make_textclip(text):
|
||||
return TextClip(
|
||||
text=text,
|
||||
font=font_path,
|
||||
font_size=subtitle_font_size,
|
||||
color=subtitle_color,
|
||||
)
|
||||
|
||||
# 处理字幕
|
||||
if subtitle_path and os.path.exists(subtitle_path):
|
||||
try:
|
||||
# 加载字幕文件
|
||||
sub = SubtitlesClip(
|
||||
subtitles=subtitle_path,
|
||||
encoding="utf-8",
|
||||
make_textclip=make_textclip
|
||||
)
|
||||
|
||||
# 创建每个字幕片段
|
||||
text_clips = []
|
||||
for item in sub.subtitles:
|
||||
clip = create_text_clip(subtitle_item=item)
|
||||
text_clips.append(clip)
|
||||
|
||||
# 合成视频和字幕
|
||||
video_clip = CompositeVideoClip([video_clip, *text_clips])
|
||||
logger.info(f"已添加{len(text_clips)}个字幕片段")
|
||||
except Exception as e:
|
||||
logger.error(f"处理字幕失败: \n{traceback.format_exc()}")
|
||||
|
||||
# 导出最终视频
|
||||
try:
|
||||
video_clip.write_videofile(
|
||||
output_path,
|
||||
audio_codec="aac",
|
||||
temp_audiofile_path=output_dir,
|
||||
threads=threads,
|
||||
fps=fps,
|
||||
)
|
||||
logger.success(f"素材合并完成: {output_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"导出视频失败: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
# 释放资源
|
||||
video_clip.close()
|
||||
del video_clip
|
||||
|
||||
return output_path
|
||||
|
||||
|
||||
def wrap_text(text, max_width, font="Arial", fontsize=60):
|
||||
"""
|
||||
文本换行函数,使长文本适应指定宽度
|
||||
|
||||
参数:
|
||||
text: 需要换行的文本
|
||||
max_width: 最大宽度(像素)
|
||||
font: 字体路径
|
||||
fontsize: 字体大小
|
||||
|
||||
返回:
|
||||
换行后的文本和文本高度
|
||||
"""
|
||||
# 创建ImageFont对象
|
||||
try:
|
||||
font_obj = ImageFont.truetype(font, fontsize)
|
||||
except:
|
||||
# 如果无法加载指定字体,使用默认字体
|
||||
font_obj = ImageFont.load_default()
|
||||
|
||||
def get_text_size(inner_text):
|
||||
inner_text = inner_text.strip()
|
||||
left, top, right, bottom = font_obj.getbbox(inner_text)
|
||||
return right - left, bottom - top
|
||||
|
||||
width, height = get_text_size(text)
|
||||
if width <= max_width:
|
||||
return text, height
|
||||
|
||||
processed = True
|
||||
|
||||
_wrapped_lines_ = []
|
||||
words = text.split(" ")
|
||||
_txt_ = ""
|
||||
for word in words:
|
||||
_before = _txt_
|
||||
_txt_ += f"{word} "
|
||||
_width, _height = get_text_size(_txt_)
|
||||
if _width <= max_width:
|
||||
continue
|
||||
else:
|
||||
if _txt_.strip() == word.strip():
|
||||
processed = False
|
||||
break
|
||||
_wrapped_lines_.append(_before)
|
||||
_txt_ = f"{word} "
|
||||
_wrapped_lines_.append(_txt_)
|
||||
if processed:
|
||||
_wrapped_lines_ = [line.strip() for line in _wrapped_lines_]
|
||||
result = "\n".join(_wrapped_lines_).strip()
|
||||
height = len(_wrapped_lines_) * height
|
||||
return result, height
|
||||
|
||||
_wrapped_lines_ = []
|
||||
chars = list(text)
|
||||
_txt_ = ""
|
||||
for word in chars:
|
||||
_txt_ += word
|
||||
_width, _height = get_text_size(_txt_)
|
||||
if _width <= max_width:
|
||||
continue
|
||||
else:
|
||||
_wrapped_lines_.append(_txt_)
|
||||
_txt_ = ""
|
||||
_wrapped_lines_.append(_txt_)
|
||||
result = "\n".join(_wrapped_lines_).strip()
|
||||
height = len(_wrapped_lines_) * height
|
||||
return result, height
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
merger_mp4 = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merger.mp4'
|
||||
merger_sub = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merged_subtitle_00_00_00-00_01_30.srt'
|
||||
merger_audio = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/merger_audio.mp3'
|
||||
bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
|
||||
output_video = '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/combined_test.mp4'
|
||||
|
||||
# 调用示例
|
||||
options = {
|
||||
'voice_volume': 1.0, # 配音音量
|
||||
'bgm_volume': 0.1, # 背景音乐音量
|
||||
'original_audio_volume': 1.0, # 视频原声音量,0表示不保留
|
||||
'keep_original_audio': True, # 是否保留原声
|
||||
'subtitle_font': 'MicrosoftYaHeiNormal.ttc', # 这里使用相对字体路径,会自动在 font_dir() 目录下查找
|
||||
'subtitle_font_size': 40,
|
||||
'subtitle_color': '#FFFFFF',
|
||||
'subtitle_bg_color': None, # 直接使用None表示透明背景
|
||||
'subtitle_position': 'bottom',
|
||||
'threads': 2
|
||||
}
|
||||
|
||||
try:
|
||||
merge_materials(
|
||||
video_path=merger_mp4,
|
||||
audio_path=merger_audio,
|
||||
subtitle_path=merger_sub,
|
||||
bgm_path=bgm_path,
|
||||
output_path=output_video,
|
||||
options=options
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"合并素材失败: \n{traceback.format_exc()}")
|
||||
@ -7,7 +7,7 @@ from typing import List
|
||||
from loguru import logger
|
||||
from openai import OpenAI
|
||||
from openai import AzureOpenAI
|
||||
from moviepy.editor import VideoFileClip
|
||||
from moviepy import VideoFileClip
|
||||
from openai.types.chat import ChatCompletion
|
||||
import google.generativeai as gemini
|
||||
from googleapiclient.errors import ResumableUploadError
|
||||
|
||||
@ -4,9 +4,10 @@ import random
|
||||
import traceback
|
||||
from urllib.parse import urlencode
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
import requests
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
from loguru import logger
|
||||
from moviepy.video.io.VideoFileClip import VideoFileClip
|
||||
|
||||
@ -306,7 +307,50 @@ def format_timestamp(seconds: float) -> str:
|
||||
return f"{hours:02d}:{minutes:02d}:{whole_seconds:02d},{milliseconds:03d}"
|
||||
|
||||
|
||||
def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> dict:
|
||||
def _detect_hardware_acceleration() -> Optional[str]:
|
||||
"""
|
||||
检测系统可用的硬件加速器
|
||||
|
||||
Returns:
|
||||
Optional[str]: 硬件加速参数,如果不支持则返回None
|
||||
"""
|
||||
# 检查NVIDIA GPU支持
|
||||
try:
|
||||
nvidia_check = subprocess.run(
|
||||
["ffmpeg", "-hwaccel", "cuda", "-i", "/dev/null", "-f", "null", "-"],
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
|
||||
)
|
||||
if nvidia_check.returncode == 0:
|
||||
return "cuda"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 检查MacOS videotoolbox支持
|
||||
try:
|
||||
videotoolbox_check = subprocess.run(
|
||||
["ffmpeg", "-hwaccel", "videotoolbox", "-i", "/dev/null", "-f", "null", "-"],
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
|
||||
)
|
||||
if videotoolbox_check.returncode == 0:
|
||||
return "videotoolbox"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 检查Intel Quick Sync支持
|
||||
try:
|
||||
qsv_check = subprocess.run(
|
||||
["ffmpeg", "-hwaccel", "qsv", "-i", "/dev/null", "-f", "null", "-"],
|
||||
stderr=subprocess.PIPE, stdout=subprocess.PIPE, text=True, check=False
|
||||
)
|
||||
if qsv_check.returncode == 0:
|
||||
return "qsv"
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> str:
|
||||
"""
|
||||
保存剪辑后的视频
|
||||
|
||||
@ -328,29 +372,43 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
|
||||
if not os.path.exists(save_dir):
|
||||
os.makedirs(save_dir)
|
||||
|
||||
# 生成更规范的视频文件名
|
||||
video_id = f"vid-{timestamp.replace(':', '-').replace(',', '_')}"
|
||||
video_path = os.path.join(save_dir, f"{video_id}.mp4")
|
||||
# 解析时间戳
|
||||
start_str, end_str = timestamp.split('-')
|
||||
|
||||
# 格式化输出文件名(使用连字符替代冒号和逗号)
|
||||
safe_start_time = start_str.replace(':', '-').replace(',', '-')
|
||||
safe_end_time = end_str.replace(':', '-').replace(',', '-')
|
||||
output_filename = f"vid_{safe_start_time}@{safe_end_time}.mp4"
|
||||
video_path = os.path.join(save_dir, output_filename)
|
||||
|
||||
# 如果视频已存在,直接返回
|
||||
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
||||
logger.info(f"video already exists: {video_path}")
|
||||
return {timestamp: video_path}
|
||||
logger.info(f"视频已存在: {video_path}")
|
||||
return video_path
|
||||
|
||||
try:
|
||||
# 加载视频获取总时长
|
||||
video = VideoFileClip(origin_video)
|
||||
total_duration = video.duration
|
||||
# 检查视频是否存在
|
||||
if not os.path.exists(origin_video):
|
||||
logger.error(f"源视频文件不存在: {origin_video}")
|
||||
return ''
|
||||
|
||||
# 获取视频总时长
|
||||
try:
|
||||
probe_cmd = ["ffprobe", "-v", "error", "-show_entries", "format=duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=1", origin_video]
|
||||
total_duration = float(subprocess.check_output(probe_cmd).decode('utf-8').strip())
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"获取视频时长失败: {str(e)}")
|
||||
return ''
|
||||
|
||||
# 解析时间戳
|
||||
start_str, end_str = timestamp.split('-')
|
||||
# 计算时间点
|
||||
start = time_to_seconds(start_str)
|
||||
end = time_to_seconds(end_str)
|
||||
|
||||
# 验证时间段
|
||||
if start >= total_duration:
|
||||
logger.warning(f"起始时间 {format_timestamp(start)} ({start:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒)")
|
||||
video.close()
|
||||
return {}
|
||||
return ''
|
||||
|
||||
if end > total_duration:
|
||||
logger.warning(f"结束时间 {format_timestamp(end)} ({end:.3f}秒) 超出视频总时长 {format_timestamp(total_duration)} ({total_duration:.3f}秒),将自动调整为视频结尾")
|
||||
@ -358,55 +416,74 @@ def save_clip_video(timestamp: str, origin_video: str, save_dir: str = "") -> di
|
||||
|
||||
if end <= start:
|
||||
logger.warning(f"结束时间 {format_timestamp(end)} 必须大于起始时间 {format_timestamp(start)}")
|
||||
video.close()
|
||||
return {}
|
||||
return ''
|
||||
|
||||
# 剪辑视频
|
||||
# 计算剪辑时长
|
||||
duration = end - start
|
||||
logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)},时长 {format_timestamp(duration)}")
|
||||
# logger.info(f"开始剪辑视频: {format_timestamp(start)} - {format_timestamp(end)},时长 {format_timestamp(duration)}")
|
||||
|
||||
# 剪辑视频
|
||||
subclip = video.subclip(start, end)
|
||||
# 检测可用的硬件加速选项
|
||||
hwaccel = _detect_hardware_acceleration()
|
||||
hwaccel_args = []
|
||||
if hwaccel:
|
||||
hwaccel_args = ["-hwaccel", hwaccel]
|
||||
logger.info(f"使用硬件加速: {hwaccel}")
|
||||
|
||||
try:
|
||||
# 检查视频是否有音频轨道并写入文件
|
||||
subclip.write_videofile(
|
||||
video_path,
|
||||
codec='libx264',
|
||||
audio_codec='aac',
|
||||
temp_audiofile='temp-audio.m4a',
|
||||
remove_temp=True,
|
||||
audio=(subclip.audio is not None),
|
||||
logger=None
|
||||
)
|
||||
|
||||
# 验证生成的视频文件
|
||||
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
||||
with VideoFileClip(video_path) as clip:
|
||||
if clip.duration > 0 and clip.fps > 0:
|
||||
return {timestamp: video_path}
|
||||
|
||||
raise ValueError("视频文件验证失败")
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"视频文件处理失败: {video_path} => {str(e)}")
|
||||
# 转换为FFmpeg兼容的时间格式(逗号替换为点)
|
||||
ffmpeg_start_time = start_str.replace(',', '.')
|
||||
ffmpeg_end_time = end_str.replace(',', '.')
|
||||
|
||||
# 构建FFmpeg命令
|
||||
ffmpeg_cmd = [
|
||||
"ffmpeg", "-y", *hwaccel_args,
|
||||
"-i", origin_video,
|
||||
"-ss", ffmpeg_start_time,
|
||||
"-to", ffmpeg_end_time,
|
||||
"-c:v", "h264_videotoolbox" if hwaccel == "videotoolbox" else "libx264",
|
||||
"-c:a", "aac",
|
||||
"-strict", "experimental",
|
||||
video_path
|
||||
]
|
||||
|
||||
# 执行FFmpeg命令
|
||||
# logger.info(f"裁剪视频片段: {timestamp} -> {ffmpeg_start_time}到{ffmpeg_end_time}")
|
||||
# logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
|
||||
|
||||
process = subprocess.run(
|
||||
ffmpeg_cmd,
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.PIPE,
|
||||
text=True,
|
||||
check=False # 不抛出异常,我们会检查返回码
|
||||
)
|
||||
|
||||
# 检查是否成功
|
||||
if process.returncode != 0:
|
||||
logger.error(f"视频剪辑失败: {process.stderr}")
|
||||
if os.path.exists(video_path):
|
||||
os.remove(video_path)
|
||||
return ''
|
||||
|
||||
# 验证生成的视频文件
|
||||
if os.path.exists(video_path) and os.path.getsize(video_path) > 0:
|
||||
# 检查视频是否可播放
|
||||
probe_cmd = ["ffprobe", "-v", "error", video_path]
|
||||
validate_result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
if validate_result.returncode == 0:
|
||||
logger.info(f"视频剪辑成功: {video_path}")
|
||||
return video_path
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"视频剪辑失败: \n{str(traceback.format_exc())}")
|
||||
logger.error("视频文件验证失败")
|
||||
if os.path.exists(video_path):
|
||||
os.remove(video_path)
|
||||
finally:
|
||||
# 确保视频对象被正确关闭
|
||||
try:
|
||||
video.close()
|
||||
if 'subclip' in locals():
|
||||
subclip.close()
|
||||
except:
|
||||
pass
|
||||
|
||||
return {}
|
||||
return ''
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"视频剪辑过程中发生错误: \n{str(traceback.format_exc())}")
|
||||
if os.path.exists(video_path):
|
||||
os.remove(video_path)
|
||||
return ''
|
||||
|
||||
|
||||
def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, progress_callback=None) -> dict:
|
||||
@ -428,8 +505,7 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
|
||||
try:
|
||||
saved_video_path = save_clip_video(timestamp=item, origin_video=origin_video, save_dir=material_directory)
|
||||
if saved_video_path:
|
||||
logger.info(f"video saved: {saved_video_path}")
|
||||
video_paths.update(saved_video_path)
|
||||
video_paths.update({index+1:saved_video_path})
|
||||
|
||||
# 更新进度
|
||||
if progress_callback:
|
||||
@ -439,6 +515,7 @@ def clip_videos(task_id: str, timestamp_terms: List[str], origin_video: str, pro
|
||||
return {}
|
||||
|
||||
logger.success(f"裁剪 {len(video_paths)} videos")
|
||||
# logger.debug(json.dumps(video_paths, indent=4, ensure_ascii=False))
|
||||
return video_paths
|
||||
|
||||
|
||||
|
||||
555
app/services/merger_video.py
Normal file
555
app/services/merger_video.py
Normal file
@ -0,0 +1,555 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
'''
|
||||
@Project: NarratoAI
|
||||
@File : merger_video
|
||||
@Author : 小林同学
|
||||
@Date : 2025/5/6 下午7:38
|
||||
'''
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Tuple
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class VideoAspect(Enum):
|
||||
"""视频宽高比枚举"""
|
||||
landscape = "16:9" # 横屏 16:9
|
||||
landscape_2 = "4:3"
|
||||
portrait = "9:16" # 竖屏 9:16
|
||||
portrait_2 = "3:4"
|
||||
square = "1:1" # 方形 1:1
|
||||
|
||||
def to_resolution(self) -> Tuple[int, int]:
|
||||
"""根据宽高比返回标准分辨率"""
|
||||
if self == VideoAspect.portrait:
|
||||
return 1080, 1920 # 竖屏 9:16
|
||||
elif self == VideoAspect.portrait_2:
|
||||
return 720, 1280 # 竖屏 4:3
|
||||
elif self == VideoAspect.landscape:
|
||||
return 1920, 1080 # 横屏 16:9
|
||||
elif self == VideoAspect.landscape_2:
|
||||
return 1280, 720 # 横屏 4:3
|
||||
elif self == VideoAspect.square:
|
||||
return 1080, 1080 # 方形 1:1
|
||||
else:
|
||||
return 1080, 1920 # 默认竖屏
|
||||
|
||||
|
||||
def check_ffmpeg_installation() -> bool:
|
||||
"""
|
||||
检查ffmpeg是否已安装
|
||||
|
||||
Returns:
|
||||
bool: 如果安装则返回True,否则返回False
|
||||
"""
|
||||
try:
|
||||
subprocess.run(['ffmpeg', '-version'], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
|
||||
return True
|
||||
except (subprocess.SubprocessError, FileNotFoundError):
|
||||
logger.error("ffmpeg未安装或不在系统PATH中,请安装ffmpeg")
|
||||
return False
|
||||
|
||||
|
||||
def get_hardware_acceleration_option() -> Optional[str]:
|
||||
"""
|
||||
根据系统环境选择合适的硬件加速选项
|
||||
|
||||
Returns:
|
||||
Optional[str]: 硬件加速参数,如果不支持则返回None
|
||||
"""
|
||||
try:
|
||||
# 检查NVIDIA GPU支持
|
||||
nvidia_check = subprocess.run(
|
||||
['ffmpeg', '-hide_banner', '-hwaccels'],
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
|
||||
)
|
||||
output = nvidia_check.stdout.lower()
|
||||
|
||||
if 'cuda' in output:
|
||||
return 'cuda'
|
||||
elif 'nvenc' in output:
|
||||
return 'nvenc'
|
||||
elif 'qsv' in output: # Intel Quick Sync
|
||||
return 'qsv'
|
||||
elif 'videotoolbox' in output: # macOS
|
||||
return 'videotoolbox'
|
||||
elif 'vaapi' in output: # Linux VA-API
|
||||
return 'vaapi'
|
||||
else:
|
||||
logger.info("没有找到支持的硬件加速器,将使用软件编码")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.warning(f"检测硬件加速器时出错: {str(e)},将使用软件编码")
|
||||
return None
|
||||
|
||||
|
||||
def check_video_has_audio(video_path: str) -> bool:
|
||||
"""
|
||||
检查视频是否包含音频流
|
||||
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
|
||||
Returns:
|
||||
bool: 如果视频包含音频流则返回True,否则返回False
|
||||
"""
|
||||
if not os.path.exists(video_path):
|
||||
logger.warning(f"视频文件不存在: {video_path}")
|
||||
return False
|
||||
|
||||
probe_cmd = [
|
||||
'ffprobe', '-v', 'error',
|
||||
'-select_streams', 'a:0',
|
||||
'-show_entries', 'stream=codec_type',
|
||||
'-of', 'csv=p=0',
|
||||
video_path
|
||||
]
|
||||
|
||||
try:
|
||||
result = subprocess.run(probe_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=False)
|
||||
return result.stdout.strip() == 'audio'
|
||||
except Exception as e:
|
||||
logger.warning(f"检测视频音频流时出错: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def create_ffmpeg_concat_file(video_paths: List[str], concat_file_path: str) -> str:
|
||||
"""
|
||||
创建ffmpeg合并所需的concat文件
|
||||
|
||||
Args:
|
||||
video_paths: 需要合并的视频文件路径列表
|
||||
concat_file_path: concat文件的输出路径
|
||||
|
||||
Returns:
|
||||
str: concat文件的路径
|
||||
"""
|
||||
with open(concat_file_path, 'w', encoding='utf-8') as f:
|
||||
for video_path in video_paths:
|
||||
# 获取绝对路径
|
||||
abs_path = os.path.abspath(video_path)
|
||||
# 在Windows上将反斜杠替换为正斜杠
|
||||
if os.name == 'nt': # Windows系统
|
||||
abs_path = abs_path.replace('\\', '/')
|
||||
else: # Unix/Mac系统
|
||||
# 转义特殊字符
|
||||
abs_path = abs_path.replace('\\', '\\\\').replace(':', '\\:')
|
||||
|
||||
# 处理路径中的单引号 (如果有)
|
||||
abs_path = abs_path.replace("'", "\\'")
|
||||
|
||||
f.write(f"file '{abs_path}'\n")
|
||||
return concat_file_path
|
||||
|
||||
|
||||
def process_single_video(
|
||||
input_path: str,
|
||||
output_path: str,
|
||||
target_width: int,
|
||||
target_height: int,
|
||||
keep_audio: bool = True,
|
||||
hwaccel: Optional[str] = None
|
||||
) -> str:
|
||||
"""
|
||||
处理单个视频:调整分辨率、帧率等
|
||||
|
||||
Args:
|
||||
input_path: 输入视频路径
|
||||
output_path: 输出视频路径
|
||||
target_width: 目标宽度
|
||||
target_height: 目标高度
|
||||
keep_audio: 是否保留音频
|
||||
hwaccel: 硬件加速选项
|
||||
|
||||
Returns:
|
||||
str: 处理后的视频路径
|
||||
"""
|
||||
if not os.path.exists(input_path):
|
||||
raise FileNotFoundError(f"找不到视频文件: {input_path}")
|
||||
|
||||
# 构建基本命令
|
||||
command = ['ffmpeg', '-y']
|
||||
|
||||
# 添加硬件加速参数
|
||||
if hwaccel:
|
||||
if hwaccel == 'cuda' or hwaccel == 'nvenc':
|
||||
command.extend(['-hwaccel', 'cuda'])
|
||||
elif hwaccel == 'qsv':
|
||||
command.extend(['-hwaccel', 'qsv'])
|
||||
elif hwaccel == 'videotoolbox':
|
||||
command.extend(['-hwaccel', 'videotoolbox'])
|
||||
elif hwaccel == 'vaapi':
|
||||
command.extend(['-hwaccel', 'vaapi', '-vaapi_device', '/dev/dri/renderD128'])
|
||||
|
||||
# 输入文件
|
||||
command.extend(['-i', input_path])
|
||||
|
||||
# 处理音频
|
||||
if not keep_audio:
|
||||
command.extend(['-an']) # 移除音频
|
||||
else:
|
||||
# 检查输入视频是否有音频流
|
||||
has_audio = check_video_has_audio(input_path)
|
||||
if has_audio:
|
||||
command.extend(['-c:a', 'aac', '-b:a', '128k']) # 音频编码为AAC
|
||||
else:
|
||||
logger.warning(f"视频 {input_path} 没有音频流,将会忽略音频设置")
|
||||
command.extend(['-an']) # 没有音频流时移除音频设置
|
||||
|
||||
# 视频处理参数:缩放并添加填充以保持比例
|
||||
scale_filter = f"scale={target_width}:{target_height}:force_original_aspect_ratio=decrease"
|
||||
pad_filter = f"pad={target_width}:{target_height}:(ow-iw)/2:(oh-ih)/2"
|
||||
command.extend([
|
||||
'-vf', f"{scale_filter},{pad_filter}",
|
||||
'-r', '30', # 设置帧率为30fps
|
||||
])
|
||||
|
||||
# 选择编码器
|
||||
if hwaccel == 'cuda' or hwaccel == 'nvenc':
|
||||
command.extend(['-c:v', 'h264_nvenc', '-preset', 'p4', '-profile:v', 'high'])
|
||||
elif hwaccel == 'qsv':
|
||||
command.extend(['-c:v', 'h264_qsv', '-preset', 'medium'])
|
||||
elif hwaccel == 'videotoolbox':
|
||||
command.extend(['-c:v', 'h264_videotoolbox', '-profile:v', 'high'])
|
||||
elif hwaccel == 'vaapi':
|
||||
command.extend(['-c:v', 'h264_vaapi', '-profile', '100'])
|
||||
else:
|
||||
command.extend(['-c:v', 'libx264', '-preset', 'medium', '-profile:v', 'high'])
|
||||
|
||||
# 设置视频比特率和其他参数
|
||||
command.extend([
|
||||
'-b:v', '5M',
|
||||
'-maxrate', '8M',
|
||||
'-bufsize', '10M',
|
||||
'-pix_fmt', 'yuv420p', # 兼容性更好的颜色格式
|
||||
])
|
||||
|
||||
# 输出文件
|
||||
command.append(output_path)
|
||||
|
||||
# 执行命令
|
||||
try:
|
||||
subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
return output_path
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"处理视频失败: {e.stderr.decode() if e.stderr else str(e)}")
|
||||
raise RuntimeError(f"处理视频失败: {str(e)}")
|
||||
|
||||
|
||||
def combine_clip_videos(
|
||||
output_video_path: str,
|
||||
video_paths: List[str],
|
||||
video_ost_list: List[int],
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
threads: int = 4,
|
||||
) -> str:
|
||||
"""
|
||||
合并子视频
|
||||
Args:
|
||||
output_video_path: 合并后的存储路径
|
||||
video_paths: 子视频路径列表
|
||||
video_ost_list: 原声播放列表 (0: 不保留原声, 1: 只保留原声, 2: 保留原声并保留解说)
|
||||
video_aspect: 屏幕比例
|
||||
threads: 线程数
|
||||
|
||||
Returns:
|
||||
str: 合并后的视频路径
|
||||
"""
|
||||
# 检查ffmpeg是否安装
|
||||
if not check_ffmpeg_installation():
|
||||
raise RuntimeError("未找到ffmpeg,请先安装")
|
||||
|
||||
# 准备输出目录
|
||||
output_dir = os.path.dirname(output_video_path)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# 获取目标分辨率
|
||||
aspect = VideoAspect(video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
|
||||
# 检测可用的硬件加速选项
|
||||
hwaccel = get_hardware_acceleration_option()
|
||||
if hwaccel:
|
||||
logger.info(f"将使用 {hwaccel} 硬件加速")
|
||||
|
||||
# 重组视频路径和原声设置为一个字典列表结构
|
||||
video_segments = []
|
||||
|
||||
# 检查视频路径和原声设置列表长度是否匹配
|
||||
if len(video_paths) != len(video_ost_list):
|
||||
logger.warning(f"视频路径列表({len(video_paths)})和原声设置列表({len(video_ost_list)})长度不匹配")
|
||||
# 调整长度以匹配较短的列表
|
||||
min_length = min(len(video_paths), len(video_ost_list))
|
||||
video_paths = video_paths[:min_length]
|
||||
video_ost_list = video_ost_list[:min_length]
|
||||
|
||||
# 创建视频处理配置字典列表
|
||||
for i, (video_path, video_ost) in enumerate(zip(video_paths, video_ost_list)):
|
||||
if not os.path.exists(video_path):
|
||||
logger.warning(f"视频不存在,跳过: {video_path}")
|
||||
continue
|
||||
|
||||
# 检查是否有音频流
|
||||
has_audio = check_video_has_audio(video_path)
|
||||
|
||||
# 构建视频片段配置
|
||||
segment = {
|
||||
"index": i,
|
||||
"path": video_path,
|
||||
"ost": video_ost,
|
||||
"has_audio": has_audio,
|
||||
"keep_audio": video_ost > 0 and has_audio # 只有当ost>0且实际有音频时才保留
|
||||
}
|
||||
|
||||
# 记录日志
|
||||
if video_ost > 0 and not has_audio:
|
||||
logger.warning(f"视频 {video_path} 设置为保留原声(ost={video_ost}),但该视频没有音频流")
|
||||
|
||||
video_segments.append(segment)
|
||||
|
||||
# 处理每个视频片段
|
||||
processed_videos = []
|
||||
temp_dir = os.path.join(output_dir, "temp_videos")
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
# 第一阶段:处理所有视频片段到中间文件
|
||||
for segment in video_segments:
|
||||
# 处理单个视频,去除或保留音频
|
||||
temp_output = os.path.join(temp_dir, f"processed_{segment['index']}.mp4")
|
||||
try:
|
||||
process_single_video(
|
||||
input_path=segment['path'],
|
||||
output_path=temp_output,
|
||||
target_width=video_width,
|
||||
target_height=video_height,
|
||||
keep_audio=segment['keep_audio'],
|
||||
hwaccel=hwaccel
|
||||
)
|
||||
processed_videos.append({
|
||||
"index": segment["index"],
|
||||
"path": temp_output,
|
||||
"keep_audio": segment["keep_audio"]
|
||||
})
|
||||
logger.info(f"视频 {segment['index'] + 1}/{len(video_segments)} 处理完成")
|
||||
except Exception as e:
|
||||
logger.error(f"处理视频 {segment['path']} 时出错: {str(e)}")
|
||||
continue
|
||||
|
||||
if not processed_videos:
|
||||
raise ValueError("没有有效的视频片段可以合并")
|
||||
|
||||
# 按原始索引排序处理后的视频
|
||||
processed_videos.sort(key=lambda x: x["index"])
|
||||
|
||||
# 第二阶段:分步骤合并视频 - 避免复杂的filter_complex滤镜
|
||||
try:
|
||||
# 1. 首先,将所有没有音频的视频或音频被禁用的视频合并到一个临时文件中
|
||||
video_paths_only = [video["path"] for video in processed_videos]
|
||||
video_concat_path = os.path.join(temp_dir, "video_concat.mp4")
|
||||
|
||||
# 创建concat文件,用于合并视频流
|
||||
concat_file = os.path.join(temp_dir, "concat_list.txt")
|
||||
create_ffmpeg_concat_file(video_paths_only, concat_file)
|
||||
|
||||
# 合并所有视频流,但不包含音频
|
||||
concat_cmd = [
|
||||
'ffmpeg', '-y',
|
||||
'-f', 'concat',
|
||||
'-safe', '0',
|
||||
'-i', concat_file,
|
||||
'-c:v', 'libx264',
|
||||
'-preset', 'medium',
|
||||
'-profile:v', 'high',
|
||||
'-an', # 不包含音频
|
||||
'-threads', str(threads),
|
||||
video_concat_path
|
||||
]
|
||||
|
||||
subprocess.run(concat_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
logger.info("视频流合并完成")
|
||||
|
||||
# 2. 提取并合并有音频的片段
|
||||
audio_segments = [video for video in processed_videos if video["keep_audio"]]
|
||||
|
||||
if not audio_segments:
|
||||
# 如果没有音频片段,直接使用无音频的合并视频作为最终结果
|
||||
shutil.copy(video_concat_path, output_video_path)
|
||||
logger.info("无音频视频合并完成")
|
||||
return output_video_path
|
||||
|
||||
# 创建音频中间文件
|
||||
audio_files = []
|
||||
for i, segment in enumerate(audio_segments):
|
||||
# 提取音频
|
||||
audio_file = os.path.join(temp_dir, f"audio_{i}.aac")
|
||||
extract_audio_cmd = [
|
||||
'ffmpeg', '-y',
|
||||
'-i', segment["path"],
|
||||
'-vn', # 不包含视频
|
||||
'-c:a', 'aac',
|
||||
'-b:a', '128k',
|
||||
audio_file
|
||||
]
|
||||
subprocess.run(extract_audio_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
audio_files.append({
|
||||
"index": segment["index"],
|
||||
"path": audio_file
|
||||
})
|
||||
logger.info(f"提取音频 {i+1}/{len(audio_segments)} 完成")
|
||||
|
||||
# 3. 计算每个音频片段的时间位置
|
||||
audio_timings = []
|
||||
current_time = 0.0
|
||||
|
||||
# 获取每个视频片段的时长
|
||||
for i, video in enumerate(processed_videos):
|
||||
duration_cmd = [
|
||||
'ffprobe', '-v', 'error',
|
||||
'-show_entries', 'format=duration',
|
||||
'-of', 'csv=p=0',
|
||||
video["path"]
|
||||
]
|
||||
result = subprocess.run(duration_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
||||
duration = float(result.stdout.strip())
|
||||
|
||||
# 如果当前片段需要保留音频,记录时间位置
|
||||
if video["keep_audio"]:
|
||||
for audio in audio_files:
|
||||
if audio["index"] == video["index"]:
|
||||
audio_timings.append({
|
||||
"file": audio["path"],
|
||||
"start": current_time,
|
||||
"index": video["index"]
|
||||
})
|
||||
break
|
||||
|
||||
current_time += duration
|
||||
|
||||
# 4. 创建静音音频轨道作为基础
|
||||
silence_audio = os.path.join(temp_dir, "silence.aac")
|
||||
create_silence_cmd = [
|
||||
'ffmpeg', '-y',
|
||||
'-f', 'lavfi',
|
||||
'-i', f'anullsrc=r=44100:cl=stereo',
|
||||
'-t', str(current_time), # 总时长
|
||||
'-c:a', 'aac',
|
||||
'-b:a', '128k',
|
||||
silence_audio
|
||||
]
|
||||
subprocess.run(create_silence_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
|
||||
# 5. 创建复杂滤镜命令以混合音频
|
||||
filter_script = os.path.join(temp_dir, "filter_script.txt")
|
||||
with open(filter_script, 'w') as f:
|
||||
f.write(f"[0:a]volume=0.0[silence];\n") # 首先静音背景轨道
|
||||
|
||||
# 添加每个音频文件
|
||||
for i, timing in enumerate(audio_timings):
|
||||
f.write(f"[{i+1}:a]adelay={int(timing['start']*1000)}|{int(timing['start']*1000)}[a{i}];\n")
|
||||
|
||||
# 混合所有音频
|
||||
mix_str = "[silence]"
|
||||
for i in range(len(audio_timings)):
|
||||
mix_str += f"[a{i}]"
|
||||
mix_str += f"amix=inputs={len(audio_timings)+1}:duration=longest[aout]"
|
||||
f.write(mix_str)
|
||||
|
||||
# 6. 构建音频合并命令
|
||||
audio_inputs = ['-i', silence_audio]
|
||||
for timing in audio_timings:
|
||||
audio_inputs.extend(['-i', timing["file"]])
|
||||
|
||||
mixed_audio = os.path.join(temp_dir, "mixed_audio.aac")
|
||||
audio_mix_cmd = [
|
||||
'ffmpeg', '-y'
|
||||
] + audio_inputs + [
|
||||
'-filter_complex_script', filter_script,
|
||||
'-map', '[aout]',
|
||||
'-c:a', 'aac',
|
||||
'-b:a', '128k',
|
||||
mixed_audio
|
||||
]
|
||||
|
||||
subprocess.run(audio_mix_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
logger.info("音频混合完成")
|
||||
|
||||
# 7. 将合并的视频和混合的音频组合在一起
|
||||
final_cmd = [
|
||||
'ffmpeg', '-y',
|
||||
'-i', video_concat_path,
|
||||
'-i', mixed_audio,
|
||||
'-c:v', 'copy',
|
||||
'-c:a', 'aac',
|
||||
'-map', '0:v:0',
|
||||
'-map', '1:a:0',
|
||||
'-shortest',
|
||||
output_video_path
|
||||
]
|
||||
|
||||
subprocess.run(final_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
logger.info("视频最终合并完成")
|
||||
|
||||
return output_video_path
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"合并视频过程中出错: {e.stderr.decode() if e.stderr else str(e)}")
|
||||
|
||||
# 尝试备用合并方法 - 最简单的无音频合并
|
||||
logger.info("尝试备用合并方法 - 无音频合并")
|
||||
try:
|
||||
concat_file = os.path.join(temp_dir, "concat_list.txt")
|
||||
video_paths_only = [video["path"] for video in processed_videos]
|
||||
create_ffmpeg_concat_file(video_paths_only, concat_file)
|
||||
|
||||
backup_cmd = [
|
||||
'ffmpeg', '-y',
|
||||
'-f', 'concat',
|
||||
'-safe', '0',
|
||||
'-i', concat_file,
|
||||
'-c:v', 'copy',
|
||||
'-an', # 无音频
|
||||
output_video_path
|
||||
]
|
||||
|
||||
subprocess.run(backup_cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
logger.warning("使用备用方法(无音频)成功合并视频")
|
||||
return output_video_path
|
||||
except Exception as backup_error:
|
||||
logger.error(f"备用合并方法也失败: {str(backup_error)}")
|
||||
raise RuntimeError(f"无法合并视频: {str(backup_error)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"合并视频时出错: {str(e)}")
|
||||
raise
|
||||
finally:
|
||||
# 清理临时文件
|
||||
try:
|
||||
if os.path.exists(temp_dir):
|
||||
shutil.rmtree(temp_dir)
|
||||
logger.info("已清理临时文件")
|
||||
except Exception as e:
|
||||
logger.warning(f"清理临时文件时出错: {str(e)}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
video_paths = [
|
||||
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-00-00-00-00-26.mp4',
|
||||
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-01-15-00-01-29.mp4',
|
||||
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4',
|
||||
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-04-58-00-05-20.mp4',
|
||||
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-05-45-00-05-53.mp4',
|
||||
'/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4'
|
||||
]
|
||||
|
||||
combine_clip_videos(
|
||||
output_video_path="/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/merged_123.mp4",
|
||||
video_paths=video_paths,
|
||||
video_ost_list=[1, 0, 1, 0, 0, 1],
|
||||
video_aspect=VideoAspect.portrait
|
||||
)
|
||||
@ -3,10 +3,11 @@ import json
|
||||
import time
|
||||
import asyncio
|
||||
import requests
|
||||
from app.utils import video_processor
|
||||
from loguru import logger
|
||||
from typing import List, Dict, Any, Callable
|
||||
|
||||
from app.utils import utils, gemini_analyzer, video_processor, video_processor_v2
|
||||
from app.utils import utils, gemini_analyzer, video_processor
|
||||
from app.utils.script_generator import ScriptProcessor
|
||||
from app.config import config
|
||||
|
||||
@ -21,6 +22,7 @@ class ScriptGenerator:
|
||||
video_path: str,
|
||||
video_theme: str = "",
|
||||
custom_prompt: str = "",
|
||||
frame_interval_input: int = 5,
|
||||
skip_seconds: int = 0,
|
||||
threshold: int = 30,
|
||||
vision_batch_size: int = 5,
|
||||
@ -105,20 +107,13 @@ class ScriptGenerator:
|
||||
os.makedirs(video_keyframes_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
if config.frames.get("version") == "v2":
|
||||
processor = video_processor_v2.VideoProcessor(video_path)
|
||||
processor.process_video_pipeline(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=skip_seconds,
|
||||
threshold=threshold
|
||||
)
|
||||
else:
|
||||
processor = video_processor.VideoProcessor(video_path)
|
||||
processor.process_video(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=skip_seconds
|
||||
)
|
||||
|
||||
processor = video_processor.VideoProcessor(video_path)
|
||||
processor.process_video_pipeline(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=skip_seconds,
|
||||
threshold=threshold
|
||||
)
|
||||
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
if filename.endswith('.jpg'):
|
||||
keyframe_files.append(os.path.join(video_keyframes_dir, filename))
|
||||
|
||||
@ -4,11 +4,11 @@ import re
|
||||
import traceback
|
||||
from typing import Optional
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
# from faster_whisper import WhisperModel
|
||||
from timeit import default_timer as timer
|
||||
from loguru import logger
|
||||
import google.generativeai as genai
|
||||
from moviepy.editor import VideoFileClip
|
||||
from moviepy import VideoFileClip
|
||||
import os
|
||||
|
||||
from app.config import config
|
||||
@ -33,7 +33,7 @@ def create(audio_file, subtitle_file: str = ""):
|
||||
"""
|
||||
global model, device, compute_type
|
||||
if not model:
|
||||
model_path = f"{utils.root_dir()}/app/models/faster-whisper-large-v2"
|
||||
model_path = f"{utils.root_dir()}/app/models/faster-whisper-large-v3"
|
||||
model_bin_file = f"{model_path}/model.bin"
|
||||
if not os.path.isdir(model_path) or not os.path.isfile(model_bin_file):
|
||||
logger.error(
|
||||
@ -45,12 +45,25 @@ def create(audio_file, subtitle_file: str = ""):
|
||||
)
|
||||
return None
|
||||
|
||||
# 尝试使用 CUDA,如果失败则回退到 CPU
|
||||
# 首先使用CPU模式,不触发CUDA检查
|
||||
use_cuda = False
|
||||
try:
|
||||
import torch
|
||||
if torch.cuda.is_available():
|
||||
# 在函数中延迟导入torch,而不是在全局范围内
|
||||
# 使用安全的方式检查CUDA可用性
|
||||
def check_cuda_available():
|
||||
try:
|
||||
import torch
|
||||
return torch.cuda.is_available()
|
||||
except (ImportError, RuntimeError) as e:
|
||||
logger.warning(f"检查CUDA可用性时出错: {e}")
|
||||
return False
|
||||
|
||||
# 仅当明确需要时才检查CUDA
|
||||
use_cuda = check_cuda_available()
|
||||
|
||||
if use_cuda:
|
||||
logger.info(f"尝试使用 CUDA 加载模型: {model_path}")
|
||||
try:
|
||||
logger.info(f"尝试使用 CUDA 加载模型: {model_path}")
|
||||
model = WhisperModel(
|
||||
model_size_or_path=model_path,
|
||||
device="cuda",
|
||||
@ -63,18 +76,18 @@ def create(audio_file, subtitle_file: str = ""):
|
||||
except Exception as e:
|
||||
logger.warning(f"CUDA 加载失败,错误信息: {str(e)}")
|
||||
logger.warning("回退到 CPU 模式")
|
||||
device = "cpu"
|
||||
compute_type = "int8"
|
||||
use_cuda = False
|
||||
else:
|
||||
logger.info("未检测到 CUDA,使用 CPU 模式")
|
||||
device = "cpu"
|
||||
compute_type = "int8"
|
||||
except ImportError:
|
||||
logger.warning("未安装 torch,使用 CPU 模式")
|
||||
logger.info("使用 CPU 模式")
|
||||
except Exception as e:
|
||||
logger.warning(f"CUDA检查过程出错: {e}")
|
||||
logger.warning("默认使用CPU模式")
|
||||
use_cuda = False
|
||||
|
||||
# 如果CUDA不可用或加载失败,使用CPU
|
||||
if not use_cuda:
|
||||
device = "cpu"
|
||||
compute_type = "int8"
|
||||
|
||||
if device == "cpu":
|
||||
logger.info(f"使用 CPU 加载模型: {model_path}")
|
||||
model = WhisperModel(
|
||||
model_size_or_path=model_path,
|
||||
@ -403,7 +416,7 @@ def extract_audio_and_create_subtitle(video_file: str, subtitle_file: str = "")
|
||||
logger.info("音频提取完成,开始生成字幕")
|
||||
|
||||
# 使用create函数生成字幕
|
||||
create(audio_file, subtitle_file)
|
||||
create("/Users/apple/Desktop/WhisperX-zhuanlu/1_qyn2-2_Vocals.wav", subtitle_file)
|
||||
|
||||
# 删除临时音频文件
|
||||
if os.path.exists(audio_file):
|
||||
@ -422,8 +435,8 @@ if __name__ == "__main__":
|
||||
task_id = "123456"
|
||||
task_dir = utils.task_dir(task_id)
|
||||
subtitle_file = f"{task_dir}/subtitle_123456.srt"
|
||||
audio_file = f"{task_dir}/audio.wav"
|
||||
video_file = "/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_1702.mp4"
|
||||
audio_file = "/Users/apple/Desktop/WhisperX-zhuanlu/1_qyn2-2_Vocals.wav"
|
||||
video_file = "/Users/apple/Desktop/home/NarratoAI/storage/temp/merge/qyn2-2-720p.mp4"
|
||||
|
||||
extract_audio_and_create_subtitle(video_file, subtitle_file)
|
||||
|
||||
|
||||
202
app/services/subtitle_merger.py
Normal file
202
app/services/subtitle_merger.py
Normal file
@ -0,0 +1,202 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
'''
|
||||
@Project: NarratoAI
|
||||
@File : subtitle_merger
|
||||
@Author : viccy
|
||||
@Date : 2025/5/6 下午4:00
|
||||
'''
|
||||
|
||||
import re
|
||||
import os
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
def parse_time(time_str):
|
||||
"""解析时间字符串为timedelta对象"""
|
||||
hours, minutes, seconds_ms = time_str.split(':')
|
||||
seconds, milliseconds = seconds_ms.split(',')
|
||||
|
||||
td = timedelta(
|
||||
hours=int(hours),
|
||||
minutes=int(minutes),
|
||||
seconds=int(seconds),
|
||||
milliseconds=int(milliseconds)
|
||||
)
|
||||
return td
|
||||
|
||||
|
||||
def format_time(td):
|
||||
"""将timedelta对象格式化为SRT时间字符串"""
|
||||
total_seconds = int(td.total_seconds())
|
||||
hours = total_seconds // 3600
|
||||
minutes = (total_seconds % 3600) // 60
|
||||
seconds = total_seconds % 60
|
||||
milliseconds = td.microseconds // 1000
|
||||
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
||||
|
||||
|
||||
def parse_edited_time_range(time_range_str):
|
||||
"""从editedTimeRange字符串中提取时间范围"""
|
||||
if not time_range_str:
|
||||
return None, None
|
||||
|
||||
parts = time_range_str.split('-')
|
||||
if len(parts) != 2:
|
||||
return None, None
|
||||
|
||||
start_time_str, end_time_str = parts
|
||||
|
||||
# 将HH:MM:SS格式转换为timedelta
|
||||
start_h, start_m, start_s = map(int, start_time_str.split(':'))
|
||||
end_h, end_m, end_s = map(int, end_time_str.split(':'))
|
||||
|
||||
start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s)
|
||||
end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s)
|
||||
|
||||
return start_time, end_time
|
||||
|
||||
|
||||
def merge_subtitle_files(subtitle_items, output_file=None):
|
||||
"""
|
||||
合并多个SRT字幕文件
|
||||
|
||||
参数:
|
||||
subtitle_items: 字典列表,每个字典包含subtitle文件路径和editedTimeRange
|
||||
output_file: 输出文件的路径,如果为None则自动生成
|
||||
|
||||
返回:
|
||||
合并后的字幕文件路径
|
||||
"""
|
||||
# 按照editedTimeRange的开始时间排序
|
||||
sorted_items = sorted(subtitle_items,
|
||||
key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta())
|
||||
|
||||
merged_subtitles = []
|
||||
subtitle_index = 1
|
||||
|
||||
for item in sorted_items:
|
||||
if not item.get('subtitle') or not os.path.exists(item.get('subtitle')):
|
||||
continue
|
||||
|
||||
# 从editedTimeRange获取起始时间偏移
|
||||
offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', ''))
|
||||
|
||||
if offset_time is None:
|
||||
print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围,跳过该项")
|
||||
continue
|
||||
|
||||
with open(item['subtitle'], 'r', encoding='utf-8') as file:
|
||||
content = file.read()
|
||||
|
||||
# 解析字幕文件
|
||||
subtitle_blocks = re.split(r'\n\s*\n', content.strip())
|
||||
|
||||
for block in subtitle_blocks:
|
||||
lines = block.strip().split('\n')
|
||||
if len(lines) < 3: # 确保块有足够的行数
|
||||
continue
|
||||
|
||||
# 解析时间轴行
|
||||
time_line = lines[1]
|
||||
time_parts = time_line.split(' --> ')
|
||||
if len(time_parts) != 2:
|
||||
continue
|
||||
|
||||
start_time = parse_time(time_parts[0])
|
||||
end_time = parse_time(time_parts[1])
|
||||
|
||||
# 应用时间偏移
|
||||
adjusted_start_time = start_time + offset_time
|
||||
adjusted_end_time = end_time + offset_time
|
||||
|
||||
# 重建字幕块
|
||||
adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}"
|
||||
text_lines = lines[2:]
|
||||
|
||||
new_block = [
|
||||
str(subtitle_index),
|
||||
adjusted_time_line,
|
||||
*text_lines
|
||||
]
|
||||
|
||||
merged_subtitles.append('\n'.join(new_block))
|
||||
subtitle_index += 1
|
||||
|
||||
# 确定输出文件路径
|
||||
if output_file is None:
|
||||
dir_path = os.path.dirname(sorted_items[0]['subtitle'])
|
||||
first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0]
|
||||
last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1]
|
||||
|
||||
first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60)
|
||||
last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60)
|
||||
|
||||
first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}"
|
||||
last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}"
|
||||
|
||||
output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt")
|
||||
|
||||
# 合并所有字幕块
|
||||
merged_content = '\n\n'.join(merged_subtitles)
|
||||
|
||||
# 写入合并后的内容
|
||||
with open(output_file, 'w', encoding='utf-8') as file:
|
||||
file.write(merged_content)
|
||||
|
||||
return output_file
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 测试数据
|
||||
test_data = [
|
||||
{'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!',
|
||||
'timestamp': '00:00:00-00:01:15',
|
||||
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
|
||||
'OST': 0,
|
||||
'_id': 1,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
|
||||
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
|
||||
'sourceTimeRange': '00:00:00-00:00:26',
|
||||
'duration': 26,
|
||||
'editedTimeRange': '00:00:00-00:00:26'
|
||||
},
|
||||
{'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!',
|
||||
'timestamp': '00:01:15-00:04:40',
|
||||
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
|
||||
'OST': 0,
|
||||
'_id': 2,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
|
||||
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
|
||||
'sourceTimeRange': '00:01:15-00:01:29',
|
||||
'duration': 14,
|
||||
'editedTimeRange': '00:00:26-00:00:40'
|
||||
},
|
||||
{'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
|
||||
'timestamp': '00:04:58-00:05:45',
|
||||
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
|
||||
'OST': 0,
|
||||
'_id': 4,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
|
||||
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
|
||||
'sourceTimeRange': '00:04:58-00:05:20',
|
||||
'duration': 22,
|
||||
'editedTimeRange': '00:00:57-00:01:19'
|
||||
},
|
||||
{'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
|
||||
'timestamp': '00:05:45-00:06:00',
|
||||
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
|
||||
'OST': 0,
|
||||
'_id': 5,
|
||||
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
|
||||
'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
|
||||
'sourceTimeRange': '00:05:45-00:05:53',
|
||||
'duration': 8,
|
||||
'editedTimeRange': '00:01:19-00:01:27'
|
||||
}
|
||||
]
|
||||
|
||||
output_file = merge_subtitle_files(test_data)
|
||||
print(f"字幕文件已合并至: {output_file}")
|
||||
@ -9,167 +9,177 @@ from loguru import logger
|
||||
from app.config import config
|
||||
from app.models import const
|
||||
from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams
|
||||
from app.services import llm, material, subtitle, video, voice, audio_merger
|
||||
from app.services import (llm, material, subtitle, video, voice, audio_merger,
|
||||
subtitle_merger, clip_video, merger_video, update_script, generate_video)
|
||||
from app.services import state as sm
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
def generate_script(task_id, params):
|
||||
logger.info("\n\n## generating video script")
|
||||
video_script = params.video_script.strip()
|
||||
if not video_script:
|
||||
video_script = llm.generate_script(
|
||||
video_subject=params.video_subject,
|
||||
language=params.video_language,
|
||||
paragraph_number=params.paragraph_number,
|
||||
)
|
||||
else:
|
||||
logger.debug(f"video script: \n{video_script}")
|
||||
# def generate_script(task_id, params):
|
||||
# logger.info("\n\n## generating video script")
|
||||
# video_script = params.video_script.strip()
|
||||
# if not video_script:
|
||||
# video_script = llm.generate_script(
|
||||
# video_subject=params.video_subject,
|
||||
# language=params.video_language,
|
||||
# paragraph_number=params.paragraph_number,
|
||||
# )
|
||||
# else:
|
||||
# logger.debug(f"video script: \n{video_script}")
|
||||
|
||||
if not video_script:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error("failed to generate video script.")
|
||||
return None
|
||||
# if not video_script:
|
||||
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
# logger.error("failed to generate video script.")
|
||||
# return None
|
||||
|
||||
return video_script
|
||||
# return video_script
|
||||
|
||||
|
||||
def generate_terms(task_id, params, video_script):
|
||||
logger.info("\n\n## generating video terms")
|
||||
video_terms = params.video_terms
|
||||
if not video_terms:
|
||||
video_terms = llm.generate_terms(
|
||||
video_subject=params.video_subject, video_script=video_script, amount=5
|
||||
)
|
||||
else:
|
||||
if isinstance(video_terms, str):
|
||||
video_terms = [term.strip() for term in re.split(r"[,,]", video_terms)]
|
||||
elif isinstance(video_terms, list):
|
||||
video_terms = [term.strip() for term in video_terms]
|
||||
else:
|
||||
raise ValueError("video_terms must be a string or a list of strings.")
|
||||
# def generate_terms(task_id, params, video_script):
|
||||
# logger.info("\n\n## generating video terms")
|
||||
# video_terms = params.video_terms
|
||||
# if not video_terms:
|
||||
# video_terms = llm.generate_terms(
|
||||
# video_subject=params.video_subject, video_script=video_script, amount=5
|
||||
# )
|
||||
# else:
|
||||
# if isinstance(video_terms, str):
|
||||
# video_terms = [term.strip() for term in re.split(r"[,,]", video_terms)]
|
||||
# elif isinstance(video_terms, list):
|
||||
# video_terms = [term.strip() for term in video_terms]
|
||||
# else:
|
||||
# raise ValueError("video_terms must be a string or a list of strings.")
|
||||
|
||||
logger.debug(f"video terms: {utils.to_json(video_terms)}")
|
||||
# logger.debug(f"video terms: {utils.to_json(video_terms)}")
|
||||
|
||||
if not video_terms:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error("failed to generate video terms.")
|
||||
return None
|
||||
# if not video_terms:
|
||||
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
# logger.error("failed to generate video terms.")
|
||||
# return None
|
||||
|
||||
return video_terms
|
||||
# return video_terms
|
||||
|
||||
|
||||
def save_script_data(task_id, video_script, video_terms, params):
|
||||
script_file = path.join(utils.task_dir(task_id), "script.json")
|
||||
script_data = {
|
||||
"script": video_script,
|
||||
"search_terms": video_terms,
|
||||
"params": params,
|
||||
}
|
||||
# def save_script_data(task_id, video_script, video_terms, params):
|
||||
# script_file = path.join(utils.task_dir(task_id), "script.json")
|
||||
# script_data = {
|
||||
# "script": video_script,
|
||||
# "search_terms": video_terms,
|
||||
# "params": params,
|
||||
# }
|
||||
|
||||
with open(script_file, "w", encoding="utf-8") as f:
|
||||
f.write(utils.to_json(script_data))
|
||||
# with open(script_file, "w", encoding="utf-8") as f:
|
||||
# f.write(utils.to_json(script_data))
|
||||
|
||||
|
||||
def generate_audio(task_id, params, video_script):
|
||||
logger.info("\n\n## generating audio")
|
||||
audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
|
||||
sub_maker = voice.tts(
|
||||
text=video_script,
|
||||
voice_name=voice.parse_voice_name(params.voice_name),
|
||||
voice_rate=params.voice_rate,
|
||||
voice_file=audio_file,
|
||||
)
|
||||
if sub_maker is None:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
"""failed to generate audio:
|
||||
1. check if the language of the voice matches the language of the video script.
|
||||
2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
|
||||
""".strip()
|
||||
)
|
||||
return None, None, None
|
||||
# def generate_audio(task_id, params, video_script):
|
||||
# logger.info("\n\n## generating audio")
|
||||
# audio_file = path.join(utils.task_dir(task_id), "audio.mp3")
|
||||
# sub_maker = voice.tts(
|
||||
# text=video_script,
|
||||
# voice_name=voice.parse_voice_name(params.voice_name),
|
||||
# voice_rate=params.voice_rate,
|
||||
# voice_file=audio_file,
|
||||
# )
|
||||
# if sub_maker is None:
|
||||
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
# logger.error(
|
||||
# """failed to generate audio:
|
||||
# 1. check if the language of the voice matches the language of the video script.
|
||||
# 2. check if the network is available. If you are in China, it is recommended to use a VPN and enable the global traffic mode.
|
||||
# """.strip()
|
||||
# )
|
||||
# return None, None, None
|
||||
|
||||
audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
|
||||
return audio_file, audio_duration, sub_maker
|
||||
# audio_duration = math.ceil(voice.get_audio_duration(sub_maker))
|
||||
# return audio_file, audio_duration, sub_maker
|
||||
|
||||
|
||||
def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
|
||||
if not params.subtitle_enabled:
|
||||
return ""
|
||||
# def generate_subtitle(task_id, params, video_script, sub_maker, audio_file):
|
||||
# if not params.subtitle_enabled:
|
||||
# return ""
|
||||
|
||||
subtitle_path = path.join(utils.task_dir(task_id), "subtitle111.srt")
|
||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
|
||||
# subtitle_path = path.join(utils.task_dir(task_id), "subtitle111.srt")
|
||||
# subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
# logger.info(f"\n\n## generating subtitle, provider: {subtitle_provider}")
|
||||
|
||||
subtitle_fallback = False
|
||||
if subtitle_provider == "edge":
|
||||
voice.create_subtitle(
|
||||
text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
|
||||
)
|
||||
if not os.path.exists(subtitle_path):
|
||||
subtitle_fallback = True
|
||||
logger.warning("subtitle file not found, fallback to whisper")
|
||||
# subtitle_fallback = False
|
||||
# if subtitle_provider == "edge":
|
||||
# voice.create_subtitle(
|
||||
# text=video_script, sub_maker=sub_maker, subtitle_file=subtitle_path
|
||||
# )
|
||||
# if not os.path.exists(subtitle_path):
|
||||
# subtitle_fallback = True
|
||||
# logger.warning("subtitle file not found, fallback to whisper")
|
||||
|
||||
if subtitle_provider == "whisper" or subtitle_fallback:
|
||||
subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
||||
logger.info("\n\n## correcting subtitle")
|
||||
subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
|
||||
# if subtitle_provider == "whisper" or subtitle_fallback:
|
||||
# subtitle.create(audio_file=audio_file, subtitle_file=subtitle_path)
|
||||
# logger.info("\n\n## correcting subtitle")
|
||||
# subtitle.correct(subtitle_file=subtitle_path, video_script=video_script)
|
||||
|
||||
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
||||
if not subtitle_lines:
|
||||
logger.warning(f"subtitle file is invalid: {subtitle_path}")
|
||||
return ""
|
||||
# subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
||||
# if not subtitle_lines:
|
||||
# logger.warning(f"subtitle file is invalid: {subtitle_path}")
|
||||
# return ""
|
||||
|
||||
return subtitle_path
|
||||
# return subtitle_path
|
||||
|
||||
|
||||
def get_video_materials(task_id, params, video_terms, audio_duration):
|
||||
if params.video_source == "local":
|
||||
logger.info("\n\n## preprocess local materials")
|
||||
materials = video.preprocess_video(
|
||||
materials=params.video_materials, clip_duration=params.video_clip_duration
|
||||
)
|
||||
if not materials:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
"no valid materials found, please check the materials and try again."
|
||||
)
|
||||
return None
|
||||
return [material_info.url for material_info in materials]
|
||||
else:
|
||||
logger.info(f"\n\n## downloading videos from {params.video_source}")
|
||||
downloaded_videos = material.download_videos(
|
||||
task_id=task_id,
|
||||
search_terms=video_terms,
|
||||
source=params.video_source,
|
||||
video_aspect=params.video_aspect,
|
||||
video_contact_mode=params.video_concat_mode,
|
||||
audio_duration=audio_duration * params.video_count,
|
||||
max_clip_duration=params.video_clip_duration,
|
||||
)
|
||||
if not downloaded_videos:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
"failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
|
||||
)
|
||||
return None
|
||||
return downloaded_videos
|
||||
# def get_video_materials(task_id, params, video_terms, audio_duration):
|
||||
# if params.video_source == "local":
|
||||
# logger.info("\n\n## preprocess local materials")
|
||||
# materials = video.preprocess_video(
|
||||
# materials=params.video_materials, clip_duration=params.video_clip_duration
|
||||
# )
|
||||
# if not materials:
|
||||
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
# logger.error(
|
||||
# "no valid materials found, please check the materials and try again."
|
||||
# )
|
||||
# return None
|
||||
# return [material_info.url for material_info in materials]
|
||||
# else:
|
||||
# logger.info(f"\n\n## downloading videos from {params.video_source}")
|
||||
# downloaded_videos = material.download_videos(
|
||||
# task_id=task_id,
|
||||
# search_terms=video_terms,
|
||||
# source=params.video_source,
|
||||
# video_aspect=params.video_aspect,
|
||||
# video_contact_mode=params.video_concat_mode,
|
||||
# audio_duration=audio_duration * params.video_count,
|
||||
# max_clip_duration=params.video_clip_duration,
|
||||
# )
|
||||
# if not downloaded_videos:
|
||||
# sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
# logger.error(
|
||||
# "failed to download videos, maybe the network is not available. if you are in China, please use a VPN."
|
||||
# )
|
||||
# return None
|
||||
# return downloaded_videos
|
||||
|
||||
|
||||
def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: dict):
|
||||
"""后台任务(自动剪辑视频进行剪辑)"""
|
||||
"""
|
||||
后台任务(自动剪辑视频进行剪辑)
|
||||
Args:
|
||||
task_id: 任务ID
|
||||
params: 视频参数
|
||||
subclip_path_videos: 视频片段路径
|
||||
"""
|
||||
global merged_audio_path, merged_subtitle_path
|
||||
|
||||
logger.info(f"\n\n## 开始任务: {task_id}")
|
||||
|
||||
# 初始化 ImageMagick
|
||||
if not utils.init_imagemagick():
|
||||
logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=5)
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=0)
|
||||
|
||||
# tts 角色名称
|
||||
voice_name = voice.parse_voice_name(params.voice_name)
|
||||
# # 初始化 ImageMagick
|
||||
# if not utils.init_imagemagick():
|
||||
# logger.warning("ImageMagick 初始化失败,字幕可能无法正常显示")
|
||||
|
||||
# # tts 角色名称
|
||||
# voice_name = voice.parse_voice_name(params.voice_name)
|
||||
"""
|
||||
1. 加载剪辑脚本
|
||||
"""
|
||||
logger.info("\n\n## 1. 加载视频脚本")
|
||||
video_script_path = path.join(params.video_clip_json_path)
|
||||
|
||||
@ -185,174 +195,144 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
|
||||
logger.debug(f"解说完整脚本: \n{video_script}")
|
||||
logger.debug(f"解说 OST 列表: \n{video_ost}")
|
||||
logger.debug(f"解说时间戳列表: \n{time_list}")
|
||||
|
||||
# 获取视频总时长(单位 s)
|
||||
last_timestamp = list_script[-1]['new_timestamp']
|
||||
end_time = last_timestamp.split("-")[1]
|
||||
total_duration = utils.time_to_seconds(end_time)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"无法读取视频json脚本,请检查配置是否正确。{e}")
|
||||
raise ValueError("无法读取视频json脚本,请检查配置是否正确")
|
||||
logger.error(f"无法读取视频json脚本,请检查脚本格式是否正确")
|
||||
raise ValueError("无法读取视频json脚本,请检查脚本格式是否正确")
|
||||
else:
|
||||
logger.error(f"video_script_path: {video_script_path} \n\n", traceback.format_exc())
|
||||
raise ValueError("解说脚本不存在!请检查配置是否正确。")
|
||||
|
||||
"""
|
||||
2. 使用 TTS 生成音频素材
|
||||
"""
|
||||
logger.info("\n\n## 2. 根据OST设置生成音频列表")
|
||||
# 只为OST=0或2的片段生成TTS音频
|
||||
# 只为OST=0 or 2的判断生成音频, OST=0 仅保留解说 OST=2 保留解说和原声
|
||||
tts_segments = [
|
||||
segment for segment in list_script
|
||||
if segment['OST'] in [0, 2]
|
||||
]
|
||||
logger.debug(f"需要生成TTS的片段数: {len(tts_segments)}")
|
||||
|
||||
# 初始化音频文件路径
|
||||
audio_files = []
|
||||
final_audio = ""
|
||||
|
||||
|
||||
tts_results = voice.tts_multiple(
|
||||
task_id=task_id,
|
||||
list_script=tts_segments, # 只传入需要TTS的片段
|
||||
voice_name=params.voice_name,
|
||||
voice_rate=params.voice_rate,
|
||||
voice_pitch=params.voice_pitch,
|
||||
)
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
|
||||
|
||||
# """
|
||||
# 3. (可选) 使用 whisper 生成字幕
|
||||
# """
|
||||
# if merged_subtitle_path is None:
|
||||
# if audio_files:
|
||||
# merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
|
||||
# subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
# logger.info(f"\n\n使用 {subtitle_provider} 生成字幕")
|
||||
#
|
||||
# subtitle.create(
|
||||
# audio_file=merged_audio_path,
|
||||
# subtitle_file=merged_subtitle_path,
|
||||
# )
|
||||
# subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path)
|
||||
# if not subtitle_lines:
|
||||
# logger.warning(f"字幕文件无效: {merged_subtitle_path}")
|
||||
#
|
||||
# sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
|
||||
|
||||
"""
|
||||
3. 裁剪视频 - 将超出音频长度的视频进行裁剪
|
||||
"""
|
||||
logger.info("\n\n## 3. 裁剪视频")
|
||||
video_clip_result = clip_video.clip_video(params.video_origin_path, tts_results)
|
||||
# 更新 list_script 中的时间戳
|
||||
tts_clip_result = {tts_result['_id']: tts_result['audio_file'] for tts_result in tts_results}
|
||||
subclip_clip_result = {
|
||||
tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results
|
||||
}
|
||||
new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result)
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
|
||||
|
||||
"""
|
||||
4. 合并音频和字幕
|
||||
"""
|
||||
logger.info("\n\n## 4. 合并音频和字幕")
|
||||
total_duration = sum([script["duration"] for script in new_script_list])
|
||||
if tts_segments:
|
||||
audio_files, sub_maker_list = voice.tts_multiple(
|
||||
task_id=task_id,
|
||||
list_script=tts_segments, # 只传入需要TTS的片段
|
||||
voice_name=voice_name,
|
||||
voice_rate=params.voice_rate,
|
||||
voice_pitch=params.voice_pitch,
|
||||
force_regenerate=True
|
||||
)
|
||||
if audio_files is None:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
|
||||
return
|
||||
|
||||
if audio_files:
|
||||
logger.info(f"合并音频文件: {audio_files}")
|
||||
try:
|
||||
# 传入OST信息以便正确处理音频
|
||||
final_audio = audio_merger.merge_audio_files(
|
||||
task_id=task_id,
|
||||
audio_files=audio_files,
|
||||
total_duration=total_duration,
|
||||
list_script=list_script # 传入完整脚本以便处理OST
|
||||
)
|
||||
logger.info("音频文件合并成功")
|
||||
except Exception as e:
|
||||
logger.error(f"合并音频文件失败: {str(e)}")
|
||||
final_audio = ""
|
||||
else:
|
||||
# 如果没有需要生成TTS的片段,创建一个空白音频文件
|
||||
# 这样可以确保后续的音频处理能正确进行
|
||||
logger.info("没有需要生成TTS的片段,将保留原声和背景音乐")
|
||||
final_audio = path.join(utils.task_dir(task_id), "empty.mp3")
|
||||
try:
|
||||
from moviepy.editor import AudioClip
|
||||
# 创建一个与视频等长的空白音频
|
||||
empty_audio = AudioClip(make_frame=lambda t: 0, duration=total_duration)
|
||||
empty_audio.write_audiofile(final_audio, fps=44100)
|
||||
logger.info(f"已创建空白音频文件: {final_audio}")
|
||||
except Exception as e:
|
||||
logger.error(f"创建空白音频文件失败: {str(e)}")
|
||||
final_audio = ""
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=30)
|
||||
|
||||
subtitle_path = ""
|
||||
if params.subtitle_enabled:
|
||||
if audio_files:
|
||||
subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
|
||||
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
|
||||
logger.info(f"\n\n## 3. 生成字幕、提供程序是: {subtitle_provider}")
|
||||
|
||||
subtitle.create(
|
||||
audio_file=final_audio,
|
||||
subtitle_file=subtitle_path,
|
||||
# 合并音频文件
|
||||
merged_audio_path = audio_merger.merge_audio_files(
|
||||
task_id=task_id,
|
||||
total_duration=total_duration,
|
||||
list_script=new_script_list
|
||||
)
|
||||
logger.info(f"音频文件合并成功->{merged_audio_path}")
|
||||
# 合并字幕文件
|
||||
merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list)
|
||||
logger.info(f"字幕文件合并成功->{merged_subtitle_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"合并音频文件失败: {str(e)}")
|
||||
else:
|
||||
logger.warning("没有需要合并的音频/字幕")
|
||||
merged_audio_path = ""
|
||||
merged_subtitle_path = ""
|
||||
|
||||
subtitle_lines = subtitle.file_to_subtitles(subtitle_path)
|
||||
if not subtitle_lines:
|
||||
logger.warning(f"字幕文件无效: {subtitle_path}")
|
||||
subtitle_path = ""
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
|
||||
|
||||
logger.info("\n\n## 4. 裁剪视频")
|
||||
subclip_videos = [x for x in subclip_path_videos.values()]
|
||||
# logger.debug(f"\n\n## 裁剪后的视频文件列表: \n{subclip_videos}")
|
||||
|
||||
if not subclip_videos:
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_FAILED)
|
||||
logger.error(
|
||||
"裁剪视频失败,可能是 ImageMagick 不可用")
|
||||
return
|
||||
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=50)
|
||||
|
||||
"""
|
||||
5. 合并视频
|
||||
"""
|
||||
final_video_paths = []
|
||||
combined_video_paths = []
|
||||
|
||||
_progress = 50
|
||||
index = 1
|
||||
combined_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
|
||||
combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4")
|
||||
logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}")
|
||||
# 如果 new_script_list 中没有 video,则使用 subclip_path_videos 中的视频
|
||||
video_clips = [new_script['video'] if new_script.get('video') else subclip_path_videos.get(new_script.get('_id', '')) for new_script in new_script_list]
|
||||
|
||||
video.combine_clip_videos(
|
||||
combined_video_path=combined_video_path,
|
||||
video_paths=subclip_videos,
|
||||
merger_video.combine_clip_videos(
|
||||
output_video_path=combined_video_path,
|
||||
video_paths=video_clips,
|
||||
video_ost_list=video_ost,
|
||||
list_script=list_script,
|
||||
video_aspect=params.video_aspect,
|
||||
threads=params.n_threads # 多线程
|
||||
threads=params.n_threads
|
||||
)
|
||||
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=80)
|
||||
|
||||
_progress += 50 / 2
|
||||
sm.state.update_task(task_id, progress=_progress)
|
||||
"""
|
||||
6. 合并字幕/BGM/配音/视频
|
||||
"""
|
||||
output_video_path = path.join(utils.task_dir(task_id), f"combined.mp4")
|
||||
logger.info(f"\n\n## 6. 最后一步: 合并字幕/BGM/配音/视频 -> {output_video_path}")
|
||||
|
||||
final_video_path = path.join(utils.task_dir(task_id), f"final-{index}.mp4")
|
||||
# bgm_path = '/Users/apple/Desktop/home/NarratoAI/resource/songs/bgm.mp3'
|
||||
bgm_path = utils.get_bgm_file()
|
||||
|
||||
logger.info(f"\n\n## 6. 最后合成: {index} => {final_video_path}")
|
||||
|
||||
# 获取背景音乐
|
||||
bgm_path = None
|
||||
if params.bgm_type or params.bgm_file:
|
||||
try:
|
||||
bgm_path = utils.get_bgm_file(bgm_type=params.bgm_type, bgm_file=params.bgm_file)
|
||||
if bgm_path:
|
||||
logger.info(f"使用背景音乐: {bgm_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"获取背景音乐失败: {str(e)}")
|
||||
|
||||
# 示例:自定义字幕样式
|
||||
subtitle_style = {
|
||||
'fontsize': params.font_size, # 字体大小
|
||||
'color': params.text_fore_color, # 字体颜色
|
||||
'stroke_color': params.stroke_color, # 描边颜色
|
||||
'stroke_width': params.stroke_width, # 描边宽度, 范围0-10
|
||||
'bg_color': params.text_back_color, # 半透明黑色背景
|
||||
'position': (params.subtitle_position, 0.2), # 距离顶部60%的位置
|
||||
'method': 'caption' # 渲染方法
|
||||
# 调用示例
|
||||
options = {
|
||||
'voice_volume': params.tts_volume, # 配音音量
|
||||
'bgm_volume': params.bgm_volume, # 背景音乐音量
|
||||
'original_audio_volume': params.original_volume, # 视频原声音量,0表示不保留
|
||||
'keep_original_audio': True, # 是否保留原声
|
||||
'subtitle_font': params.font_name, # 这里使用相对字体路径,会自动在 font_dir() 目录下查找
|
||||
'subtitle_font_size': params.font_size,
|
||||
'subtitle_color': params.text_fore_color,
|
||||
'subtitle_bg_color': None, # 直接使用None表示透明背景
|
||||
'subtitle_position': params.subtitle_position,
|
||||
'custom_position': params.custom_position,
|
||||
'threads': params.n_threads
|
||||
}
|
||||
|
||||
# 示例:自定义音量配置
|
||||
volume_config = {
|
||||
'original': params.original_volume, # 原声音量80%
|
||||
'bgm': params.bgm_volume, # BGM音量20%
|
||||
'narration': params.tts_volume or params.voice_volume, # 解说音量100%
|
||||
}
|
||||
font_path = utils.font_dir(params.font_name)
|
||||
video.generate_video_v3(
|
||||
generate_video.merge_materials(
|
||||
video_path=combined_video_path,
|
||||
subtitle_path=subtitle_path,
|
||||
audio_path=merged_audio_path,
|
||||
subtitle_path=merged_subtitle_path,
|
||||
bgm_path=bgm_path,
|
||||
narration_path=final_audio,
|
||||
output_path=final_video_path,
|
||||
volume_config=volume_config, # 添加音量配置
|
||||
subtitle_style=subtitle_style,
|
||||
font_path=font_path
|
||||
output_path=output_video_path,
|
||||
options=options
|
||||
)
|
||||
|
||||
_progress += 50 / 2
|
||||
sm.state.update_task(task_id, progress=_progress)
|
||||
|
||||
final_video_paths.append(final_video_path)
|
||||
final_video_paths.append(output_video_path)
|
||||
combined_video_paths.append(combined_video_path)
|
||||
|
||||
logger.success(f"任务 {task_id} 已完成, 生成 {len(final_video_paths)} 个视频.")
|
||||
@ -400,35 +380,19 @@ def validate_params(video_path, audio_path, output_file, params):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# task_id = "test123"
|
||||
# subclip_path_videos = {'00:41-01:58': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_41-01_58.mp4',
|
||||
# '00:06-00:15': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_06-00_15.mp4',
|
||||
# '01:10-01:17': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_10-01_17.mp4',
|
||||
# '00:47-01:03': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-00_47-01_03.mp4',
|
||||
# '01:03-01:10': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-01_03-01_10.mp4',
|
||||
# '02:40-03:08': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-02_40-03_08.mp4',
|
||||
# '03:02-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_02-03_20.mp4',
|
||||
# '03:18-03:20': 'E:\\projects\\NarratoAI\\storage\\cache_videos/vid-03_18-03_20.mp4'}
|
||||
#
|
||||
# params = VideoClipParams(
|
||||
# video_clip_json_path="E:\\projects\\NarratoAI\\resource/scripts/test003.json",
|
||||
# video_origin_path="E:\\projects\\NarratoAI\\resource/videos/1.mp4",
|
||||
# )
|
||||
# start_subclip(task_id, params, subclip_path_videos=subclip_path_videos)
|
||||
task_id = "demo"
|
||||
|
||||
task_id = "test456"
|
||||
subclip_path_videos = {'01:10-01:17': './storage/cache_videos/vid-01_10-01_17.mp4',
|
||||
'01:58-02:04': './storage/cache_videos/vid-01_58-02_04.mp4',
|
||||
'02:25-02:31': './storage/cache_videos/vid-02_25-02_31.mp4',
|
||||
'01:28-01:33': './storage/cache_videos/vid-01_28-01_33.mp4',
|
||||
'03:14-03:18': './storage/cache_videos/vid-03_14-03_18.mp4',
|
||||
'00:24-00:28': './storage/cache_videos/vid-00_24-00_28.mp4',
|
||||
'03:02-03:08': './storage/cache_videos/vid-03_02-03_08.mp4',
|
||||
'00:41-00:44': './storage/cache_videos/vid-00_41-00_44.mp4',
|
||||
'02:12-02:25': './storage/cache_videos/vid-02_12-02_25.mp4'}
|
||||
# 提前裁剪是为了方便检查视频
|
||||
subclip_path_videos = {
|
||||
1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-00-05-390@00-00-57-980.mp4',
|
||||
2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-00-28-900@00-00-43-700.mp4',
|
||||
3: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-01-17-840@00-01-27-600.mp4',
|
||||
4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-02-35-460@00-02-52-380.mp4',
|
||||
5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/113343d127b5a09d0bf84b68bd1b3b97/vid_00-06-59-520@00-07-29-500.mp4',
|
||||
}
|
||||
|
||||
params = VideoClipParams(
|
||||
video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/test004.json",
|
||||
video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/1.mp4",
|
||||
video_clip_json_path="/Users/apple/Desktop/home/NarratoAI/resource/scripts/2025-0507-223311.json",
|
||||
video_origin_path="/Users/apple/Desktop/home/NarratoAI/resource/videos/merged_video_4938.mp4",
|
||||
)
|
||||
start_subclip(task_id, params, subclip_path_videos=subclip_path_videos)
|
||||
start_subclip(task_id, params, subclip_path_videos)
|
||||
|
||||
266
app/services/update_script.py
Normal file
266
app/services/update_script.py
Normal file
@ -0,0 +1,266 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: UTF-8 -*-
|
||||
|
||||
'''
|
||||
@Project: NarratoAI
|
||||
@File : update_script
|
||||
@Author : 小林同学
|
||||
@Date : 2025/5/6 下午11:00
|
||||
'''
|
||||
|
||||
import re
|
||||
import os
|
||||
from typing import Dict, List, Any, Tuple, Union
|
||||
|
||||
|
||||
def extract_timestamp_from_video_path(video_path: str) -> str:
|
||||
"""
|
||||
从视频文件路径中提取时间戳
|
||||
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
|
||||
Returns:
|
||||
提取出的时间戳,格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss'
|
||||
"""
|
||||
# 使用正则表达式从文件名中提取时间戳
|
||||
filename = os.path.basename(video_path)
|
||||
|
||||
# 匹配新格式: vid_00-00-00-000@00-00-20-250.mp4
|
||||
match_new = re.search(r'vid_(\d{2})-(\d{2})-(\d{2})-(\d{3})@(\d{2})-(\d{2})-(\d{2})-(\d{3})\.mp4', filename)
|
||||
if match_new:
|
||||
# 提取并格式化时间戳(包含毫秒)
|
||||
start_h, start_m, start_s, start_ms = match_new.group(1), match_new.group(2), match_new.group(3), match_new.group(4)
|
||||
end_h, end_m, end_s, end_ms = match_new.group(5), match_new.group(6), match_new.group(7), match_new.group(8)
|
||||
return f"{start_h}:{start_m}:{start_s},{start_ms}-{end_h}:{end_m}:{end_s},{end_ms}"
|
||||
|
||||
# 匹配旧格式: vid-00-00-00-00-00-00.mp4
|
||||
match_old = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename)
|
||||
if match_old:
|
||||
# 提取并格式化时间戳
|
||||
start_time = match_old.group(1).replace('-', ':')
|
||||
end_time = match_old.group(2).replace('-', ':')
|
||||
return f"{start_time}-{end_time}"
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def calculate_duration(timestamp: str) -> float:
|
||||
"""
|
||||
计算时间戳范围的持续时间(秒)
|
||||
|
||||
Args:
|
||||
timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 或 'HH:MM:SS,sss-HH:MM:SS,sss' 的时间戳
|
||||
|
||||
Returns:
|
||||
持续时间(秒)
|
||||
"""
|
||||
try:
|
||||
start_time, end_time = timestamp.split('-')
|
||||
|
||||
# 处理毫秒部分
|
||||
if ',' in start_time:
|
||||
start_parts = start_time.split(',')
|
||||
start_time_parts = start_parts[0].split(':')
|
||||
start_ms = float('0.' + start_parts[1]) if len(start_parts) > 1 else 0
|
||||
start_h, start_m, start_s = map(int, start_time_parts)
|
||||
else:
|
||||
start_h, start_m, start_s = map(int, start_time.split(':'))
|
||||
start_ms = 0
|
||||
|
||||
if ',' in end_time:
|
||||
end_parts = end_time.split(',')
|
||||
end_time_parts = end_parts[0].split(':')
|
||||
end_ms = float('0.' + end_parts[1]) if len(end_parts) > 1 else 0
|
||||
end_h, end_m, end_s = map(int, end_time_parts)
|
||||
else:
|
||||
end_h, end_m, end_s = map(int, end_time.split(':'))
|
||||
end_ms = 0
|
||||
|
||||
# 转换为秒
|
||||
start_seconds = start_h * 3600 + start_m * 60 + start_s + start_ms
|
||||
end_seconds = end_h * 3600 + end_m * 60 + end_s + end_ms
|
||||
|
||||
# 计算时间差(秒)
|
||||
return round(end_seconds - start_seconds, 2)
|
||||
except (ValueError, AttributeError):
|
||||
return 0.0
|
||||
|
||||
|
||||
def update_script_timestamps(
|
||||
script_list: List[Dict[str, Any]],
|
||||
video_result: Dict[Union[str, int], str],
|
||||
audio_result: Dict[Union[str, int], str] = None,
|
||||
subtitle_result: Dict[Union[str, int], str] = None,
|
||||
calculate_edited_timerange: bool = True
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
根据 video_result 中的视频文件更新 script_list 中的时间戳,添加持续时间,
|
||||
并根据 audio_result 添加音频路径,根据 subtitle_result 添加字幕路径
|
||||
|
||||
Args:
|
||||
script_list: 原始脚本列表
|
||||
video_result: 视频结果字典,键为原时间戳或_id,值为视频文件路径
|
||||
audio_result: 音频结果字典,键为原时间戳或_id,值为音频文件路径
|
||||
subtitle_result: 字幕结果字典,键为原时间戳或_id,值为字幕文件路径
|
||||
calculate_edited_timerange: 是否计算并添加成品视频中的时间范围
|
||||
|
||||
Returns:
|
||||
更新后的脚本列表
|
||||
"""
|
||||
# 创建副本,避免修改原始数据
|
||||
updated_script = []
|
||||
|
||||
# 建立ID和时间戳到视频路径和新时间戳的映射
|
||||
id_timestamp_mapping = {}
|
||||
for key, video_path in video_result.items():
|
||||
new_timestamp = extract_timestamp_from_video_path(video_path)
|
||||
if new_timestamp:
|
||||
id_timestamp_mapping[key] = {
|
||||
'new_timestamp': new_timestamp,
|
||||
'video_path': video_path
|
||||
}
|
||||
|
||||
# 计算累积时长,用于生成成品视频中的时间范围
|
||||
accumulated_duration = 0.0
|
||||
|
||||
# 更新脚本中的时间戳
|
||||
for item in script_list:
|
||||
item_copy = item.copy()
|
||||
item_id = item_copy.get('_id')
|
||||
orig_timestamp = item_copy.get('timestamp', '')
|
||||
|
||||
# 初始化音频和字幕路径为空字符串
|
||||
item_copy['audio'] = ""
|
||||
item_copy['subtitle'] = ""
|
||||
item_copy['video'] = "" # 初始化视频路径为空字符串
|
||||
|
||||
# 如果提供了音频结果字典且ID存在于音频结果中,直接使用对应的音频路径
|
||||
if audio_result:
|
||||
if item_id and item_id in audio_result:
|
||||
item_copy['audio'] = audio_result[item_id]
|
||||
elif orig_timestamp in audio_result:
|
||||
item_copy['audio'] = audio_result[orig_timestamp]
|
||||
|
||||
# 如果提供了字幕结果字典且ID存在于字幕结果中,直接使用对应的字幕路径
|
||||
if subtitle_result:
|
||||
if item_id and item_id in subtitle_result:
|
||||
item_copy['subtitle'] = subtitle_result[item_id]
|
||||
elif orig_timestamp in subtitle_result:
|
||||
item_copy['subtitle'] = subtitle_result[orig_timestamp]
|
||||
|
||||
# 添加视频路径
|
||||
if item_id and item_id in video_result:
|
||||
item_copy['video'] = video_result[item_id]
|
||||
elif orig_timestamp in video_result:
|
||||
item_copy['video'] = video_result[orig_timestamp]
|
||||
|
||||
# 更新时间戳和计算持续时间
|
||||
current_duration = 0.0
|
||||
if item_id and item_id in id_timestamp_mapping:
|
||||
# 根据ID找到对应的新时间戳
|
||||
item_copy['sourceTimeRange'] = id_timestamp_mapping[item_id]['new_timestamp']
|
||||
current_duration = calculate_duration(item_copy['sourceTimeRange'])
|
||||
item_copy['duration'] = current_duration
|
||||
elif orig_timestamp in id_timestamp_mapping:
|
||||
# 根据原始时间戳找到对应的新时间戳
|
||||
item_copy['sourceTimeRange'] = id_timestamp_mapping[orig_timestamp]['new_timestamp']
|
||||
current_duration = calculate_duration(item_copy['sourceTimeRange'])
|
||||
item_copy['duration'] = current_duration
|
||||
elif orig_timestamp:
|
||||
# 对于未更新的时间戳,也计算并添加持续时间
|
||||
item_copy['sourceTimeRange'] = orig_timestamp
|
||||
current_duration = calculate_duration(orig_timestamp)
|
||||
item_copy['duration'] = current_duration
|
||||
|
||||
# 计算片段在成品视频中的时间范围
|
||||
if calculate_edited_timerange and current_duration > 0:
|
||||
start_time_seconds = accumulated_duration
|
||||
end_time_seconds = accumulated_duration + current_duration
|
||||
|
||||
# 将秒数转换为 HH:MM:SS 格式
|
||||
start_h = int(start_time_seconds // 3600)
|
||||
start_m = int((start_time_seconds % 3600) // 60)
|
||||
start_s = int(start_time_seconds % 60)
|
||||
|
||||
end_h = int(end_time_seconds // 3600)
|
||||
end_m = int((end_time_seconds % 3600) // 60)
|
||||
end_s = int(end_time_seconds % 60)
|
||||
|
||||
item_copy['editedTimeRange'] = f"{start_h:02d}:{start_m:02d}:{start_s:02d}-{end_h:02d}:{end_m:02d}:{end_s:02d}"
|
||||
|
||||
# 更新累积时长
|
||||
accumulated_duration = end_time_seconds
|
||||
|
||||
updated_script.append(item_copy)
|
||||
|
||||
return updated_script
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
list_script = [
|
||||
{
|
||||
'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!',
|
||||
'timestamp': '00:00:00,001-00:01:15,001',
|
||||
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
|
||||
'OST': 0,
|
||||
'_id': 1
|
||||
},
|
||||
{
|
||||
'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!',
|
||||
'timestamp': '00:01:15,001-00:04:40,001',
|
||||
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
|
||||
'OST': 0,
|
||||
'_id': 2
|
||||
},
|
||||
{
|
||||
'picture': '画面切到王启年小心翼翼地向范闲汇报。',
|
||||
'timestamp': '00:04:41,001-00:04:58,001',
|
||||
'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
|
||||
'OST': 1,
|
||||
'_id': 3
|
||||
},
|
||||
{
|
||||
'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
|
||||
'timestamp': '00:04:58,001-00:05:45,001',
|
||||
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
|
||||
'OST': 0,
|
||||
'_id': 4
|
||||
},
|
||||
{
|
||||
'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
|
||||
'timestamp': '00:05:45,001-00:06:00,001',
|
||||
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
|
||||
'OST': 0,
|
||||
'_id': 5
|
||||
},
|
||||
{
|
||||
'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。',
|
||||
'timestamp': '00:06:00,001-00:06:03,001',
|
||||
'narration': '抓刺客',
|
||||
'OST': 1,
|
||||
'_id': 6
|
||||
}]
|
||||
video_res = {
|
||||
1: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-00-00-000@00-00-20-250.mp4',
|
||||
2: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-00-30-000@00-00-48-950.mp4',
|
||||
4: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-01-00-000@00-01-15-688.mp4',
|
||||
5: '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/fc3db5844d1ba7d7d838be52c0dac1bd/vid_00-01-30-000@00-01-49-512.mp4'}
|
||||
audio_res = {
|
||||
1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3',
|
||||
2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3',
|
||||
4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3',
|
||||
5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}
|
||||
sub_res = {
|
||||
1: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt',
|
||||
2: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt',
|
||||
4: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt',
|
||||
5: '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt'}
|
||||
|
||||
# 更新并打印结果
|
||||
updated_list_script = update_script_timestamps(list_script, video_res, audio_res, sub_res)
|
||||
for item in updated_list_script:
|
||||
print(
|
||||
f"ID: {item['_id']} | Picture: {item['picture'][:20]}... | Timestamp: {item['timestamp']} | " +
|
||||
f"SourceTimeRange: {item['sourceTimeRange']} | EditedTimeRange: {item.get('editedTimeRange', '')} | " +
|
||||
f"Duration: {item['duration']} 秒 | Audio: {item['audio']} | Video: {item['video']} | Subtitle: {item['subtitle']}")
|
||||
@ -1,13 +1,13 @@
|
||||
import traceback
|
||||
|
||||
import pysrt
|
||||
# import pysrt
|
||||
from typing import Optional
|
||||
from typing import List
|
||||
from loguru import logger
|
||||
from moviepy.editor import *
|
||||
from moviepy import *
|
||||
from PIL import ImageFont
|
||||
from contextlib import contextmanager
|
||||
from moviepy.editor import (
|
||||
from moviepy import (
|
||||
VideoFileClip,
|
||||
AudioFileClip,
|
||||
TextClip,
|
||||
@ -105,86 +105,6 @@ def manage_clip(clip):
|
||||
del clip
|
||||
|
||||
|
||||
def combine_clip_videos(combined_video_path: str,
|
||||
video_paths: List[str],
|
||||
video_ost_list: List[int],
|
||||
list_script: list,
|
||||
video_aspect: VideoAspect = VideoAspect.portrait,
|
||||
threads: int = 2,
|
||||
) -> str:
|
||||
"""
|
||||
合并子视频
|
||||
Args:
|
||||
combined_video_path: 合并后的存储路径
|
||||
video_paths: 子视频路径列表
|
||||
video_ost_list: 原声播放列表 (0: 不保留原声, 1: 只保留原声, 2: 保留原声并保留解说)
|
||||
list_script: 剪辑脚本
|
||||
video_aspect: 屏幕比例
|
||||
threads: 线程数
|
||||
|
||||
Returns:
|
||||
str: 合并后的视频路径
|
||||
"""
|
||||
from app.utils.utils import calculate_total_duration
|
||||
audio_duration = calculate_total_duration(list_script)
|
||||
logger.info(f"音频的最大持续时间: {audio_duration} s")
|
||||
|
||||
output_dir = os.path.dirname(combined_video_path)
|
||||
aspect = VideoAspect(video_aspect)
|
||||
video_width, video_height = aspect.to_resolution()
|
||||
|
||||
clips = []
|
||||
for video_path, video_ost in zip(video_paths, video_ost_list):
|
||||
try:
|
||||
clip = VideoFileClip(video_path)
|
||||
|
||||
if video_ost == 0: # 不保留原声
|
||||
clip = clip.without_audio()
|
||||
# video_ost 为 1 或 2 时都保留原声,不需要特殊处理
|
||||
|
||||
clip = clip.set_fps(30)
|
||||
|
||||
# 处理视频尺寸
|
||||
clip_w, clip_h = clip.size
|
||||
if clip_w != video_width or clip_h != video_height:
|
||||
clip = resize_video_with_padding(
|
||||
clip,
|
||||
target_width=video_width,
|
||||
target_height=video_height
|
||||
)
|
||||
logger.info(f"视频 {video_path} 已调整尺寸为 {video_width} x {video_height}")
|
||||
|
||||
clips.append(clip)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"处理视频 {video_path} 时出错: {str(e)}")
|
||||
continue
|
||||
|
||||
if not clips:
|
||||
raise ValueError("没有有效的视频片段可以合并")
|
||||
|
||||
try:
|
||||
video_clip = concatenate_videoclips(clips)
|
||||
video_clip = video_clip.set_fps(30)
|
||||
|
||||
logger.info("开始合并视频... (过程中出现 UserWarning: 不必理会)")
|
||||
video_clip.write_videofile(
|
||||
filename=combined_video_path,
|
||||
threads=threads,
|
||||
audio_codec="aac",
|
||||
fps=30,
|
||||
temp_audiofile=os.path.join(output_dir, "temp-audio.m4a")
|
||||
)
|
||||
finally:
|
||||
# 确保资源被正确放
|
||||
video_clip.close()
|
||||
for clip in clips:
|
||||
clip.close()
|
||||
|
||||
logger.success("视频合并完成")
|
||||
return combined_video_path
|
||||
|
||||
|
||||
def resize_video_with_padding(clip, target_width: int, target_height: int):
|
||||
"""
|
||||
调整视频尺寸并添加黑边
|
||||
@ -443,4 +363,3 @@ def generate_video_v3(
|
||||
bgm.close()
|
||||
if narration_path:
|
||||
narration.close()
|
||||
|
||||
|
||||
@ -4,8 +4,6 @@ from loguru import logger
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
|
||||
from app.services import material
|
||||
from app.models.schema import VideoClipParams
|
||||
from app.utils import utils
|
||||
|
||||
|
||||
class VideoService:
|
||||
|
||||
@ -5,10 +5,11 @@ import traceback
|
||||
import edge_tts
|
||||
import asyncio
|
||||
from loguru import logger
|
||||
from typing import List
|
||||
from typing import List, Union
|
||||
from datetime import datetime
|
||||
from xml.sax.saxutils import unescape
|
||||
from edge_tts import submaker, SubMaker
|
||||
from edge_tts.submaker import mktimestamp
|
||||
from moviepy.video.tools import subtitles
|
||||
import time
|
||||
|
||||
@ -1036,7 +1037,7 @@ def is_azure_v2_voice(voice_name: str):
|
||||
|
||||
def tts(
|
||||
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
|
||||
) -> [SubMaker, None]:
|
||||
) -> Union[SubMaker, None]:
|
||||
if is_azure_v2_voice(voice_name):
|
||||
return azure_tts_v2(text, voice_name, voice_file)
|
||||
return azure_tts_v1(text, voice_name, voice_rate, voice_pitch, voice_file)
|
||||
@ -1064,7 +1065,7 @@ def convert_pitch_to_percent(rate: float) -> str:
|
||||
|
||||
def azure_tts_v1(
|
||||
text: str, voice_name: str, voice_rate: float, voice_pitch: float, voice_file: str
|
||||
) -> [SubMaker, None]:
|
||||
) -> Union[SubMaker, None]:
|
||||
voice_name = parse_voice_name(voice_name)
|
||||
text = text.strip()
|
||||
rate_str = convert_rate_to_percent(voice_rate)
|
||||
@ -1087,11 +1088,6 @@ def azure_tts_v1(
|
||||
)
|
||||
return sub_maker, audio_data
|
||||
|
||||
# 判断音频文件是否已存在
|
||||
if os.path.exists(voice_file):
|
||||
logger.info(f"voice file exists, skip tts: {voice_file}")
|
||||
continue
|
||||
|
||||
# 获取音频数据和字幕信息
|
||||
sub_maker, audio_data = asyncio.run(_do())
|
||||
|
||||
@ -1105,8 +1101,6 @@ def azure_tts_v1(
|
||||
# 数据有效,写入文件
|
||||
with open(voice_file, "wb") as file:
|
||||
file.write(audio_data)
|
||||
|
||||
logger.info(f"completed, output file: {voice_file}")
|
||||
return sub_maker
|
||||
except Exception as e:
|
||||
logger.error(f"生成音频文件时出错: {str(e)}")
|
||||
@ -1115,7 +1109,7 @@ def azure_tts_v1(
|
||||
return None
|
||||
|
||||
|
||||
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None]:
|
||||
def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> Union[SubMaker, None]:
|
||||
voice_name = is_azure_v2_voice(voice_name)
|
||||
if not voice_name:
|
||||
logger.error(f"invalid voice name: {voice_name}")
|
||||
@ -1203,11 +1197,14 @@ def azure_tts_v2(text: str, voice_name: str, voice_file: str) -> [SubMaker, None
|
||||
|
||||
|
||||
def _format_text(text: str) -> str:
|
||||
# text = text.replace("\n", " ")
|
||||
text = text.replace("\n", " ")
|
||||
text = text.replace("\"", " ")
|
||||
text = text.replace("[", " ")
|
||||
text = text.replace("]", " ")
|
||||
text = text.replace("(", " ")
|
||||
text = text.replace(")", " ")
|
||||
text = text.replace(")", " ")
|
||||
text = text.replace("(", " ")
|
||||
text = text.replace("{", " ")
|
||||
text = text.replace("}", " ")
|
||||
text = text.strip()
|
||||
@ -1240,7 +1237,7 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
|
||||
if script_item['OST']:
|
||||
continue
|
||||
|
||||
start_time, end_time = script_item['new_timestamp'].split('-')
|
||||
start_time, end_time = script_item['timestamp'].split('-')
|
||||
if sub_maker_index >= len(sub_maker_list):
|
||||
logger.error(f"Sub maker list index out of range: {sub_maker_index}")
|
||||
break
|
||||
@ -1317,6 +1314,99 @@ def create_subtitle_from_multiple(text: str, sub_maker_list: List[SubMaker], lis
|
||||
traceback.print_exc()
|
||||
|
||||
|
||||
def create_subtitle(sub_maker: submaker.SubMaker, text: str, subtitle_file: str):
|
||||
"""
|
||||
优化字幕文件
|
||||
1. 将字幕文件按照标点符号分割成多行
|
||||
2. 逐行匹配字幕文件中的文本
|
||||
3. 生成新的字幕文件
|
||||
"""
|
||||
|
||||
text = _format_text(text)
|
||||
|
||||
def formatter(idx: int, start_time: float, end_time: float, sub_text: str) -> str:
|
||||
"""
|
||||
1
|
||||
00:00:00,000 --> 00:00:02,360
|
||||
跑步是一项简单易行的运动
|
||||
"""
|
||||
start_t = mktimestamp(start_time).replace(".", ",")
|
||||
end_t = mktimestamp(end_time).replace(".", ",")
|
||||
return f"{idx}\n" f"{start_t} --> {end_t}\n" f"{sub_text}\n"
|
||||
|
||||
start_time = -1.0
|
||||
sub_items = []
|
||||
sub_index = 0
|
||||
|
||||
script_lines = utils.split_string_by_punctuations(text)
|
||||
|
||||
def match_line(_sub_line: str, _sub_index: int):
|
||||
if len(script_lines) <= _sub_index:
|
||||
return ""
|
||||
|
||||
_line = script_lines[_sub_index]
|
||||
if _sub_line == _line:
|
||||
return script_lines[_sub_index].strip()
|
||||
|
||||
_sub_line_ = re.sub(r"[^\w\s]", "", _sub_line)
|
||||
_line_ = re.sub(r"[^\w\s]", "", _line)
|
||||
if _sub_line_ == _line_:
|
||||
return _line_.strip()
|
||||
|
||||
_sub_line_ = re.sub(r"\W+", "", _sub_line)
|
||||
_line_ = re.sub(r"\W+", "", _line)
|
||||
if _sub_line_ == _line_:
|
||||
return _line.strip()
|
||||
|
||||
return ""
|
||||
|
||||
sub_line = ""
|
||||
|
||||
try:
|
||||
for _, (offset, sub) in enumerate(zip(sub_maker.offset, sub_maker.subs)):
|
||||
_start_time, end_time = offset
|
||||
if start_time < 0:
|
||||
start_time = _start_time
|
||||
|
||||
sub = unescape(sub)
|
||||
sub_line += sub
|
||||
sub_text = match_line(sub_line, sub_index)
|
||||
if sub_text:
|
||||
sub_index += 1
|
||||
line = formatter(
|
||||
idx=sub_index,
|
||||
start_time=start_time,
|
||||
end_time=end_time,
|
||||
sub_text=sub_text,
|
||||
)
|
||||
sub_items.append(line)
|
||||
start_time = -1.0
|
||||
sub_line = ""
|
||||
|
||||
if len(sub_items) == len(script_lines):
|
||||
with open(subtitle_file, "w", encoding="utf-8") as file:
|
||||
file.write("\n".join(sub_items) + "\n")
|
||||
try:
|
||||
sbs = subtitles.file_to_subtitles(subtitle_file, encoding="utf-8")
|
||||
duration = max([tb for ((ta, tb), txt) in sbs])
|
||||
logger.info(
|
||||
f"已创建字幕文件: {subtitle_file}, duration: {duration}"
|
||||
)
|
||||
return subtitle_file, duration
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
os.remove(subtitle_file)
|
||||
else:
|
||||
logger.error(
|
||||
f"字幕创建失败, 字幕长度: {len(sub_items)}, script_lines len: {len(script_lines)}"
|
||||
f"\nsub_items:{json.dumps(sub_items, indent=4, ensure_ascii=False)}"
|
||||
f"\nscript_lines:{json.dumps(script_lines, indent=4, ensure_ascii=False)}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"failed, error: {str(e)}")
|
||||
|
||||
|
||||
def get_audio_duration(sub_maker: submaker.SubMaker):
|
||||
"""
|
||||
获取音频时长
|
||||
@ -1326,7 +1416,7 @@ def get_audio_duration(sub_maker: submaker.SubMaker):
|
||||
return sub_maker.offset[-1][1] / 10000000
|
||||
|
||||
|
||||
def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float, force_regenerate: bool = True):
|
||||
def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: float, voice_pitch: float):
|
||||
"""
|
||||
根据JSON文件中的多段文本进行TTS转换
|
||||
|
||||
@ -1334,25 +1424,18 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
:param list_script: 脚本列表
|
||||
:param voice_name: 语音名称
|
||||
:param voice_rate: 语音速率
|
||||
:param force_regenerate: 是否强制重新生成已存在的音频文件
|
||||
:return: 生成的音频文件列表
|
||||
"""
|
||||
voice_name = parse_voice_name(voice_name)
|
||||
output_dir = utils.task_dir(task_id)
|
||||
audio_files = []
|
||||
sub_maker_list = []
|
||||
tts_results = []
|
||||
|
||||
for item in list_script:
|
||||
if item['OST'] != 1:
|
||||
# 将时间戳中的冒号替换为下划线
|
||||
timestamp = item['new_timestamp'].replace(':', '_')
|
||||
timestamp = item['timestamp'].replace(':', '_')
|
||||
audio_file = os.path.join(output_dir, f"audio_{timestamp}.mp3")
|
||||
|
||||
# 检查文件是否已存在,如存在且不强制重新生成,则跳过
|
||||
if os.path.exists(audio_file) and not force_regenerate:
|
||||
logger.info(f"音频文件已存在,跳过生成: {audio_file}")
|
||||
audio_files.append(audio_file)
|
||||
continue
|
||||
subtitle_file = os.path.join(output_dir, f"subtitle_{timestamp}.srt")
|
||||
|
||||
text = item['narration']
|
||||
|
||||
@ -1369,9 +1452,18 @@ def tts_multiple(task_id: str, list_script: list, voice_name: str, voice_rate: f
|
||||
f"如果您在中国,请使用VPN; "
|
||||
f"或者使用其他 tts 引擎")
|
||||
continue
|
||||
else:
|
||||
# 为当前片段生成字幕文件
|
||||
_, duration = create_subtitle(sub_maker=sub_maker, text=text, subtitle_file=subtitle_file)
|
||||
|
||||
audio_files.append(audio_file)
|
||||
sub_maker_list.append(sub_maker)
|
||||
tts_results.append({
|
||||
"_id": item['_id'],
|
||||
"timestamp": item['timestamp'],
|
||||
"audio_file": audio_file,
|
||||
"subtitle_file": subtitle_file,
|
||||
"duration": duration,
|
||||
"text": text,
|
||||
})
|
||||
logger.info(f"已生成音频文件: {audio_file}")
|
||||
|
||||
return audio_files, sub_maker_list
|
||||
return tts_results
|
||||
|
||||
@ -61,7 +61,6 @@ class VisionAnalyzer:
|
||||
try:
|
||||
# 加载图片
|
||||
if isinstance(images[0], str):
|
||||
logger.info("正在加载图片...")
|
||||
images = self.load_images(images)
|
||||
|
||||
# 验证图片列表
|
||||
@ -81,11 +80,14 @@ class VisionAnalyzer:
|
||||
|
||||
images = valid_images
|
||||
results = []
|
||||
total_batches = (len(images) + batch_size - 1) // batch_size
|
||||
# 视频帧总数除以批量处理大小,如果有小数则+1
|
||||
batches_needed = len(images) // batch_size
|
||||
if len(images) % batch_size > 0:
|
||||
batches_needed += 1
|
||||
|
||||
logger.debug(f"视频帧总数:{len(images)}, 每批处理 {batch_size} 帧, 需要访问 VLM {batches_needed} 次")
|
||||
|
||||
logger.debug(f"共 {total_batches} 个批次,每批次 {batch_size} 张图片")
|
||||
|
||||
with tqdm(total=total_batches, desc="分析进度") as pbar:
|
||||
with tqdm(total=batches_needed, desc="分析进度") as pbar:
|
||||
for i in range(0, len(images), batch_size):
|
||||
batch = images[i:i + batch_size]
|
||||
retry_count = 0
|
||||
@ -93,8 +95,8 @@ class VisionAnalyzer:
|
||||
while retry_count < 3:
|
||||
try:
|
||||
# 在每个批次处理前添加小延迟
|
||||
if i > 0:
|
||||
await asyncio.sleep(2)
|
||||
# if i > 0:
|
||||
# await asyncio.sleep(2)
|
||||
|
||||
# 确保每个批次的图片都是有效的
|
||||
valid_batch = [img for img in batch if isinstance(img, PIL.Image.Image)]
|
||||
|
||||
@ -30,7 +30,7 @@ class QwenAnalyzer:
|
||||
|
||||
self.model_name = model_name
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
self.base_url = base_url
|
||||
|
||||
# 配置API客户端
|
||||
self._configure_client()
|
||||
@ -80,7 +80,7 @@ class QwenAnalyzer:
|
||||
# 添加文本提示
|
||||
content.append({
|
||||
"type": "text",
|
||||
"text": prompt
|
||||
"text": prompt % (len(content), len(content), len(content))
|
||||
})
|
||||
|
||||
# 调用API
|
||||
@ -102,7 +102,7 @@ class QwenAnalyzer:
|
||||
async def analyze_images(self,
|
||||
images: Union[List[str], List[PIL.Image.Image]],
|
||||
prompt: str,
|
||||
batch_size: int = 5) -> List[Dict]:
|
||||
batch_size: int) -> List[Dict]:
|
||||
"""
|
||||
批量分析多张图片
|
||||
Args:
|
||||
@ -118,7 +118,6 @@ class QwenAnalyzer:
|
||||
|
||||
# 加载图片
|
||||
if isinstance(images[0], str):
|
||||
logger.info("正在加载图片...")
|
||||
images = self.load_images(images)
|
||||
|
||||
# 验证图片列表
|
||||
@ -141,9 +140,14 @@ class QwenAnalyzer:
|
||||
|
||||
images = valid_images
|
||||
results = []
|
||||
total_batches = (len(images) + batch_size - 1) // batch_size
|
||||
# 视频帧总数除以批量处理大小,如果有小数则+1
|
||||
batches_needed = len(images) // batch_size
|
||||
if len(images) % batch_size > 0:
|
||||
batches_needed += 1
|
||||
|
||||
logger.debug(f"视频帧总数:{len(images)}, 每批处理 {batch_size} 帧, 需要访问 VLM {batches_needed} 次")
|
||||
|
||||
with tqdm(total=total_batches, desc="分析进度") as pbar:
|
||||
with tqdm(total=batches_needed, desc="分析进度") as pbar:
|
||||
for i in range(0, len(images), batch_size):
|
||||
batch = images[i:i + batch_size]
|
||||
batch_paths = valid_paths[i:i + batch_size] if valid_paths else None
|
||||
@ -151,9 +155,9 @@ class QwenAnalyzer:
|
||||
|
||||
while retry_count < 3:
|
||||
try:
|
||||
# 在每个批次处理前<EFBFBD><EFBFBD>加小延迟
|
||||
if i > 0:
|
||||
await asyncio.sleep(2)
|
||||
# 在每个批次处理前添加小延迟
|
||||
# if i > 0:
|
||||
# await asyncio.sleep(0.5)
|
||||
|
||||
# 确保每个批次的图片都是有效的
|
||||
valid_batch = [img for img in batch if isinstance(img, PIL.Image.Image)]
|
||||
@ -209,7 +213,7 @@ class QwenAnalyzer:
|
||||
for i, result in enumerate(results):
|
||||
response_text = result['response']
|
||||
|
||||
# 如果有图片路径信息,<EFBFBD><EFBFBD><EFBFBD>用它来生成文件名
|
||||
# 如果有图片路径信息,用它来生成文件名
|
||||
if result.get('image_paths'):
|
||||
image_paths = result['image_paths']
|
||||
img_name_start = Path(image_paths[0]).stem.split('_')[-1]
|
||||
|
||||
@ -2,7 +2,7 @@ import os
|
||||
import json
|
||||
import traceback
|
||||
from loguru import logger
|
||||
import tiktoken
|
||||
# import tiktoken
|
||||
from typing import List, Dict
|
||||
from datetime import datetime
|
||||
from openai import OpenAI
|
||||
@ -94,12 +94,12 @@ class OpenAIGenerator(BaseGenerator):
|
||||
"user": "script_generator"
|
||||
}
|
||||
|
||||
# 初始化token计数器
|
||||
try:
|
||||
self.encoding = tiktoken.encoding_for_model(self.model_name)
|
||||
except KeyError:
|
||||
logger.warning(f"未找到模型 {self.model_name} 的专用编码器,使用默认编码器")
|
||||
self.encoding = tiktoken.get_encoding("cl100k_base")
|
||||
# # 初始化token计数器
|
||||
# try:
|
||||
# self.encoding = tiktoken.encoding_for_model(self.model_name)
|
||||
# except KeyError:
|
||||
# logger.warning(f"未找到模型 {self.model_name} 的专用编码器,使用默认编码器")
|
||||
# self.encoding = tiktoken.get_encoding("cl100k_base")
|
||||
|
||||
def _generate(self, messages: list, params: dict) -> any:
|
||||
"""实现OpenAI特定的生成逻辑"""
|
||||
|
||||
@ -197,6 +197,28 @@ def time_convert_seconds_to_hmsm(seconds) -> str:
|
||||
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
|
||||
|
||||
|
||||
def format_time(seconds: float) -> str:
|
||||
"""
|
||||
将秒数转换为格式化的时间字符串 (HH:MM:SS,mmm)
|
||||
|
||||
参数:
|
||||
seconds: 需要转换的秒数,可以是整数或浮点数
|
||||
|
||||
返回:
|
||||
格式化的时间字符串,格式为 HH:MM:SS,mmm
|
||||
"""
|
||||
# 计算小时、分钟、秒和毫秒
|
||||
hours = int(seconds // 3600)
|
||||
remaining_seconds = seconds % 3600
|
||||
minutes = int(remaining_seconds // 60)
|
||||
remaining_seconds = remaining_seconds % 60
|
||||
secs = int(remaining_seconds)
|
||||
milliseconds = int((remaining_seconds - secs) * 1000)
|
||||
|
||||
# 格式化为时间字符串
|
||||
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, secs, milliseconds)
|
||||
|
||||
|
||||
def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
|
||||
start_time = time_convert_seconds_to_hmsm(start_time)
|
||||
end_time = time_convert_seconds_to_hmsm(end_time)
|
||||
@ -506,7 +528,7 @@ def cut_video(params, progress_callback=None):
|
||||
st.session_state['subclip_videos'] = subclip_videos
|
||||
for i, video_script in enumerate(video_script_list):
|
||||
try:
|
||||
video_script['path'] = subclip_videos[video_script['timestamp']]
|
||||
video_script['path'] = subclip_videos[i+1]
|
||||
except KeyError as err:
|
||||
logger.error(f"裁剪视频失败: {err}")
|
||||
|
||||
|
||||
@ -1,237 +1,339 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from sklearn.cluster import MiniBatchKMeans
|
||||
"""
|
||||
视频帧提取工具
|
||||
|
||||
这个模块提供了简单高效的视频帧提取功能。主要特点:
|
||||
1. 使用ffmpeg进行视频处理,支持硬件加速
|
||||
2. 按指定时间间隔提取视频关键帧
|
||||
3. 支持多种视频格式
|
||||
4. 支持高清视频帧输出
|
||||
5. 直接从原视频提取高质量关键帧
|
||||
|
||||
不依赖OpenCV和sklearn等库,只使用ffmpeg作为外部依赖,降低了安装和使用的复杂度。
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
from typing import List, Tuple, Generator
|
||||
import time
|
||||
import subprocess
|
||||
from typing import List, Dict
|
||||
from loguru import logger
|
||||
import gc
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class VideoProcessor:
|
||||
def __init__(self, video_path: str, batch_size: int = 100):
|
||||
def __init__(self, video_path: str):
|
||||
"""
|
||||
初始化视频处理器
|
||||
|
||||
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
batch_size: 批处理大小,控制内存使用
|
||||
"""
|
||||
if not os.path.exists(video_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_path}")
|
||||
|
||||
|
||||
self.video_path = video_path
|
||||
self.batch_size = batch_size
|
||||
self.cap = cv2.VideoCapture(video_path)
|
||||
|
||||
if not self.cap.isOpened():
|
||||
raise RuntimeError(f"无法打开视频文件: {video_path}")
|
||||
|
||||
self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
|
||||
self.video_info = self._get_video_info()
|
||||
self.fps = float(self.video_info.get('fps', 25))
|
||||
self.duration = float(self.video_info.get('duration', 0))
|
||||
self.width = int(self.video_info.get('width', 0))
|
||||
self.height = int(self.video_info.get('height', 0))
|
||||
self.total_frames = int(self.fps * self.duration)
|
||||
|
||||
def __del__(self):
|
||||
"""析构函数,确保视频资源被释放"""
|
||||
if hasattr(self, 'cap'):
|
||||
self.cap.release()
|
||||
gc.collect()
|
||||
def _get_video_info(self) -> Dict[str, str]:
|
||||
"""
|
||||
使用ffprobe获取视频信息
|
||||
|
||||
def preprocess_video(self) -> Generator[Tuple[int, np.ndarray], None, None]:
|
||||
"""
|
||||
使用生成器方式分批读取视频帧
|
||||
|
||||
Yields:
|
||||
Tuple[int, np.ndarray]: (帧索引, 视频帧)
|
||||
"""
|
||||
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
|
||||
frame_idx = 0
|
||||
|
||||
while self.cap.isOpened():
|
||||
ret, frame = self.cap.read()
|
||||
if not ret:
|
||||
break
|
||||
|
||||
# 降低分辨率以减少内存使用
|
||||
frame = cv2.resize(frame, (0, 0), fx=0.5, fy=0.5)
|
||||
yield frame_idx, frame
|
||||
|
||||
frame_idx += 1
|
||||
|
||||
# 定期进行垃圾回收
|
||||
if frame_idx % 1000 == 0:
|
||||
gc.collect()
|
||||
|
||||
def detect_shot_boundaries(self, threshold: int = 70) -> List[int]:
|
||||
"""
|
||||
使用批处理方式检测镜头边界
|
||||
|
||||
Args:
|
||||
threshold: 差异阈值
|
||||
|
||||
Returns:
|
||||
List[int]: 镜头边界帧的索引列表
|
||||
Dict[str, str]: 包含视频基本信息的字典
|
||||
"""
|
||||
shot_boundaries = []
|
||||
prev_frame = None
|
||||
prev_idx = -1
|
||||
|
||||
pbar = tqdm(self.preprocess_video(),
|
||||
total=self.total_frames,
|
||||
desc="检测镜头边界",
|
||||
unit="帧")
|
||||
|
||||
for frame_idx, curr_frame in pbar:
|
||||
if prev_frame is not None:
|
||||
prev_gray = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
|
||||
curr_gray = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
|
||||
|
||||
diff = np.mean(np.abs(curr_gray.astype(float) - prev_gray.astype(float)))
|
||||
if diff > threshold:
|
||||
shot_boundaries.append(frame_idx)
|
||||
pbar.set_postfix({"检测到边界": len(shot_boundaries)})
|
||||
|
||||
prev_frame = curr_frame.copy()
|
||||
prev_idx = frame_idx
|
||||
|
||||
del curr_frame
|
||||
if frame_idx % 100 == 0:
|
||||
gc.collect()
|
||||
|
||||
return shot_boundaries
|
||||
cmd = [
|
||||
"ffprobe",
|
||||
"-v", "error",
|
||||
"-select_streams", "v:0",
|
||||
"-show_entries", "stream=width,height,r_frame_rate,duration",
|
||||
"-of", "default=noprint_wrappers=1:nokey=0",
|
||||
self.video_path
|
||||
]
|
||||
|
||||
def process_shot(self, shot_frames: List[Tuple[int, np.ndarray]]) -> Tuple[np.ndarray, int]:
|
||||
"""
|
||||
处理单个镜头的帧
|
||||
|
||||
Args:
|
||||
shot_frames: 镜头中的帧列表
|
||||
try:
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, check=True)
|
||||
lines = result.stdout.strip().split('\n')
|
||||
info = {}
|
||||
for line in lines:
|
||||
if '=' in line:
|
||||
key, value = line.split('=', 1)
|
||||
info[key] = value
|
||||
|
||||
# 处理帧率(可能是分数形式)
|
||||
if 'r_frame_rate' in info:
|
||||
try:
|
||||
num, den = map(int, info['r_frame_rate'].split('/'))
|
||||
info['fps'] = str(num / den)
|
||||
except ValueError:
|
||||
info['fps'] = info.get('r_frame_rate', '25')
|
||||
|
||||
return info
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"获取视频信息失败: {e.stderr}")
|
||||
return {
|
||||
'width': '1280',
|
||||
'height': '720',
|
||||
'fps': '25',
|
||||
'duration': '0'
|
||||
}
|
||||
|
||||
def extract_frames_by_interval(self, output_dir: str, interval_seconds: float = 5.0,
|
||||
use_hw_accel: bool = True) -> List[int]:
|
||||
"""
|
||||
按指定时间间隔提取视频帧
|
||||
|
||||
Args:
|
||||
output_dir: 输出目录
|
||||
interval_seconds: 帧提取间隔(秒)
|
||||
use_hw_accel: 是否使用硬件加速
|
||||
|
||||
Returns:
|
||||
Tuple[np.ndarray, int]: (关键帧, 帧索引)
|
||||
List[int]: 提取的帧号列表
|
||||
"""
|
||||
if not shot_frames:
|
||||
return None, -1
|
||||
|
||||
frame_features = []
|
||||
frame_indices = []
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
for idx, frame in tqdm(shot_frames,
|
||||
desc="处理镜头帧",
|
||||
unit="帧",
|
||||
leave=False):
|
||||
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
|
||||
resized_gray = cv2.resize(gray, (32, 32))
|
||||
frame_features.append(resized_gray.flatten())
|
||||
frame_indices.append(idx)
|
||||
|
||||
frame_features = np.array(frame_features)
|
||||
# 计算起始时间和帧提取点
|
||||
start_time = 0
|
||||
end_time = self.duration
|
||||
extraction_times = []
|
||||
|
||||
kmeans = MiniBatchKMeans(n_clusters=1, batch_size=min(len(frame_features), 100),
|
||||
random_state=0).fit(frame_features)
|
||||
current_time = start_time
|
||||
while current_time < end_time:
|
||||
extraction_times.append(current_time)
|
||||
current_time += interval_seconds
|
||||
|
||||
center_idx = np.argmin(np.sum((frame_features - kmeans.cluster_centers_[0]) ** 2, axis=1))
|
||||
|
||||
return shot_frames[center_idx][1], frame_indices[center_idx]
|
||||
if not extraction_times:
|
||||
logger.warning("未找到需要提取的帧")
|
||||
return []
|
||||
|
||||
def extract_keyframes(self, shot_boundaries: List[int]) -> Generator[Tuple[np.ndarray, int], None, None]:
|
||||
"""
|
||||
使用生成器方式提取关键帧
|
||||
# 确定硬件加速器选项
|
||||
hw_accel = []
|
||||
if use_hw_accel:
|
||||
# 尝试检测可用的硬件加速器
|
||||
hw_accel_options = self._detect_hw_accelerator()
|
||||
if hw_accel_options:
|
||||
hw_accel = hw_accel_options
|
||||
logger.info(f"使用硬件加速: {' '.join(hw_accel)}")
|
||||
else:
|
||||
logger.warning("未检测到可用的硬件加速器,使用软件解码")
|
||||
|
||||
Args:
|
||||
shot_boundaries: 镜头边界列表
|
||||
# 提取帧
|
||||
frame_numbers = []
|
||||
for i, timestamp in enumerate(tqdm(extraction_times, desc="提取视频帧")):
|
||||
frame_number = int(timestamp * self.fps)
|
||||
frame_numbers.append(frame_number)
|
||||
|
||||
Yields:
|
||||
Tuple[np.ndarray, int]: (关键帧, 帧索引)
|
||||
"""
|
||||
shot_frames = []
|
||||
current_shot_start = 0
|
||||
# 格式化时间戳字符串 (HHMMSSmmm)
|
||||
hours = int(timestamp // 3600)
|
||||
minutes = int((timestamp % 3600) // 60)
|
||||
seconds = int(timestamp % 60)
|
||||
milliseconds = int((timestamp % 1) * 1000)
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
|
||||
|
||||
output_path = os.path.join(output_dir, f"keyframe_{frame_number:06d}_{time_str}.jpg")
|
||||
|
||||
# 使用ffmpeg提取单帧
|
||||
cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel", "error",
|
||||
]
|
||||
|
||||
# 添加硬件加速参数
|
||||
cmd.extend(hw_accel)
|
||||
|
||||
cmd.extend([
|
||||
"-ss", str(timestamp),
|
||||
"-i", self.video_path,
|
||||
"-vframes", "1",
|
||||
"-q:v", "1", # 最高质量
|
||||
"-y",
|
||||
output_path
|
||||
])
|
||||
|
||||
try:
|
||||
subprocess.run(cmd, check=True, capture_output=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.warning(f"提取帧 {frame_number} 失败: {e.stderr}")
|
||||
|
||||
for frame_idx, frame in self.preprocess_video():
|
||||
if frame_idx in shot_boundaries:
|
||||
if shot_frames:
|
||||
keyframe, keyframe_idx = self.process_shot(shot_frames)
|
||||
if keyframe is not None:
|
||||
yield keyframe, keyframe_idx
|
||||
|
||||
# 清理内存
|
||||
shot_frames.clear()
|
||||
gc.collect()
|
||||
logger.info(f"成功提取了 {len(frame_numbers)} 个视频帧")
|
||||
return frame_numbers
|
||||
|
||||
def _detect_hw_accelerator(self) -> List[str]:
|
||||
"""
|
||||
检测系统可用的硬件加速器
|
||||
|
||||
Returns:
|
||||
List[str]: 硬件加速器ffmpeg命令参数
|
||||
"""
|
||||
# 检测操作系统
|
||||
import platform
|
||||
system = platform.system().lower()
|
||||
|
||||
# 测试不同的硬件加速器
|
||||
accelerators = []
|
||||
|
||||
if system == 'darwin': # macOS
|
||||
# 测试 videotoolbox (Apple 硬件加速)
|
||||
test_cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel", "error",
|
||||
"-hwaccel", "videotoolbox",
|
||||
"-i", self.video_path,
|
||||
"-t", "0.1",
|
||||
"-f", "null",
|
||||
"-"
|
||||
]
|
||||
try:
|
||||
subprocess.run(test_cmd, capture_output=True, check=True)
|
||||
return ["-hwaccel", "videotoolbox"]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
current_shot_start = frame_idx
|
||||
elif system == 'linux':
|
||||
# 测试 VAAPI
|
||||
test_cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel", "error",
|
||||
"-hwaccel", "vaapi",
|
||||
"-i", self.video_path,
|
||||
"-t", "0.1",
|
||||
"-f", "null",
|
||||
"-"
|
||||
]
|
||||
try:
|
||||
subprocess.run(test_cmd, capture_output=True, check=True)
|
||||
return ["-hwaccel", "vaapi"]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
shot_frames.append((frame_idx, frame))
|
||||
|
||||
# 控制单个镜头的最大帧数
|
||||
if len(shot_frames) > self.batch_size:
|
||||
keyframe, keyframe_idx = self.process_shot(shot_frames)
|
||||
if keyframe is not None:
|
||||
yield keyframe, keyframe_idx
|
||||
shot_frames.clear()
|
||||
gc.collect()
|
||||
# 尝试 CUDA
|
||||
test_cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel", "error",
|
||||
"-hwaccel", "cuda",
|
||||
"-i", self.video_path,
|
||||
"-t", "0.1",
|
||||
"-f", "null",
|
||||
"-"
|
||||
]
|
||||
try:
|
||||
subprocess.run(test_cmd, capture_output=True, check=True)
|
||||
return ["-hwaccel", "cuda"]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
elif system == 'windows':
|
||||
# 测试 CUDA
|
||||
test_cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel", "error",
|
||||
"-hwaccel", "cuda",
|
||||
"-i", self.video_path,
|
||||
"-t", "0.1",
|
||||
"-f", "null",
|
||||
"-"
|
||||
]
|
||||
try:
|
||||
subprocess.run(test_cmd, capture_output=True, check=True)
|
||||
return ["-hwaccel", "cuda"]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
# 测试 D3D11VA
|
||||
test_cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel", "error",
|
||||
"-hwaccel", "d3d11va",
|
||||
"-i", self.video_path,
|
||||
"-t", "0.1",
|
||||
"-f", "null",
|
||||
"-"
|
||||
]
|
||||
try:
|
||||
subprocess.run(test_cmd, capture_output=True, check=True)
|
||||
return ["-hwaccel", "d3d11va"]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
# 测试 DXVA2
|
||||
test_cmd = [
|
||||
"ffmpeg",
|
||||
"-hide_banner",
|
||||
"-loglevel", "error",
|
||||
"-hwaccel", "dxva2",
|
||||
"-i", self.video_path,
|
||||
"-t", "0.1",
|
||||
"-f", "null",
|
||||
"-"
|
||||
]
|
||||
try:
|
||||
subprocess.run(test_cmd, capture_output=True, check=True)
|
||||
return ["-hwaccel", "dxva2"]
|
||||
except subprocess.CalledProcessError:
|
||||
pass
|
||||
|
||||
# 处理最后一个镜头
|
||||
if shot_frames:
|
||||
keyframe, keyframe_idx = self.process_shot(shot_frames)
|
||||
if keyframe is not None:
|
||||
yield keyframe, keyframe_idx
|
||||
# 如果没有找到可用的硬件加速器
|
||||
return []
|
||||
|
||||
def process_video(self, output_dir: str, skip_seconds: float = 0) -> None:
|
||||
def process_video_pipeline(self,
|
||||
output_dir: str,
|
||||
interval_seconds: float = 5.0, # 帧提取间隔(秒)
|
||||
use_hw_accel: bool = True) -> None:
|
||||
"""
|
||||
处理视频并提取关键帧,使用分批处理方式
|
||||
执行简化的视频处理流程,直接从原视频按固定时间间隔提取帧
|
||||
|
||||
Args:
|
||||
output_dir: 输出目录
|
||||
skip_seconds: 跳过视频开头的秒数
|
||||
interval_seconds: 帧提取间隔(秒)
|
||||
use_hw_accel: 是否使用硬件加速
|
||||
"""
|
||||
# 创建输出目录
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
try:
|
||||
# 创建输出目录
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# 计算要跳过的帧数
|
||||
skip_frames = int(skip_seconds * self.fps)
|
||||
self.cap.set(cv2.CAP_PROP_POS_FRAMES, skip_frames)
|
||||
|
||||
# 检测镜头边界
|
||||
logger.info("开始检测镜头边界...")
|
||||
shot_boundaries = self.detect_shot_boundaries()
|
||||
|
||||
# 提取关键帧
|
||||
logger.info("开始提取关键帧...")
|
||||
frame_count = 0
|
||||
|
||||
pbar = tqdm(self.extract_keyframes(shot_boundaries),
|
||||
desc="提取关键帧",
|
||||
unit="帧")
|
||||
|
||||
for keyframe, frame_idx in pbar:
|
||||
if frame_idx < skip_frames:
|
||||
continue
|
||||
|
||||
# 计算时间戳
|
||||
timestamp = frame_idx / self.fps
|
||||
hours = int(timestamp // 3600)
|
||||
minutes = int((timestamp % 3600) // 60)
|
||||
seconds = int(timestamp % 60)
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}"
|
||||
|
||||
# 保存关键帧
|
||||
output_path = os.path.join(output_dir,
|
||||
f'keyframe_{frame_idx:06d}_{time_str}.jpg')
|
||||
cv2.imwrite(output_path, keyframe)
|
||||
frame_count += 1
|
||||
|
||||
pbar.set_postfix({"已保存": frame_count})
|
||||
|
||||
if frame_count % 10 == 0:
|
||||
gc.collect()
|
||||
|
||||
logger.info(f"关键帧提取完成,共保存 {frame_count} 帧到 {output_dir}")
|
||||
# 直接从原视频提取关键帧
|
||||
logger.info(f"从视频间隔 {interval_seconds} 秒提取关键帧...")
|
||||
self.extract_frames_by_interval(
|
||||
output_dir,
|
||||
interval_seconds=interval_seconds,
|
||||
use_hw_accel=use_hw_accel
|
||||
)
|
||||
|
||||
logger.info(f"处理完成!视频帧已保存在: {output_dir}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"视频处理失败: {str(e)}")
|
||||
import traceback
|
||||
logger.error(f"视频处理失败: \n{traceback.format_exc()}")
|
||||
raise
|
||||
finally:
|
||||
# 确保资源被释放
|
||||
self.cap.release()
|
||||
gc.collect()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
# 使用示例
|
||||
processor = VideoProcessor("./resource/videos/test.mp4")
|
||||
|
||||
# 设置间隔为3秒提取帧
|
||||
processor.process_video_pipeline(
|
||||
output_dir="output",
|
||||
interval_seconds=3.0,
|
||||
use_hw_accel=True
|
||||
)
|
||||
|
||||
end_time = time.time()
|
||||
print(f"处理完成!总耗时: {end_time - start_time:.2f} 秒")
|
||||
|
||||
@ -1,382 +0,0 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
from sklearn.cluster import KMeans
|
||||
import os
|
||||
import re
|
||||
from typing import List, Tuple, Generator
|
||||
from loguru import logger
|
||||
import subprocess
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class VideoProcessor:
|
||||
def __init__(self, video_path: str):
|
||||
"""
|
||||
初始化视频处理器
|
||||
|
||||
Args:
|
||||
video_path: 视频文件路径
|
||||
"""
|
||||
if not os.path.exists(video_path):
|
||||
raise FileNotFoundError(f"视频文件不存在: {video_path}")
|
||||
|
||||
self.video_path = video_path
|
||||
self.cap = cv2.VideoCapture(video_path)
|
||||
|
||||
if not self.cap.isOpened():
|
||||
raise RuntimeError(f"无法打开视频文件: {video_path}")
|
||||
|
||||
self.total_frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
||||
self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
|
||||
|
||||
def __del__(self):
|
||||
"""析构函数,确保视频资源被释放"""
|
||||
if hasattr(self, 'cap'):
|
||||
self.cap.release()
|
||||
|
||||
def preprocess_video(self) -> Generator[np.ndarray, None, None]:
|
||||
"""
|
||||
使用生成器方式读取视频帧
|
||||
|
||||
Yields:
|
||||
np.ndarray: 视频帧
|
||||
"""
|
||||
self.cap.set(cv2.CAP_PROP_POS_FRAMES, 0) # 重置到视频开始
|
||||
while self.cap.isOpened():
|
||||
ret, frame = self.cap.read()
|
||||
if not ret:
|
||||
break
|
||||
yield frame
|
||||
|
||||
def detect_shot_boundaries(self, frames: List[np.ndarray], threshold: int = 30) -> List[int]:
|
||||
"""
|
||||
使用帧差法检测镜头边界
|
||||
|
||||
Args:
|
||||
frames: 视频帧列表
|
||||
threshold: 差异阈值,默认值调低为30
|
||||
|
||||
Returns:
|
||||
List[int]: 镜头边界帧的索引列表
|
||||
"""
|
||||
shot_boundaries = []
|
||||
if len(frames) < 2: # 添加帧数检查
|
||||
logger.warning("视频帧数过少,无法检测场景边界")
|
||||
return [len(frames) - 1] # 返回最后一帧作为边界
|
||||
|
||||
for i in range(1, len(frames)):
|
||||
prev_frame = cv2.cvtColor(frames[i - 1], cv2.COLOR_BGR2GRAY)
|
||||
curr_frame = cv2.cvtColor(frames[i], cv2.COLOR_BGR2GRAY)
|
||||
|
||||
# 计算帧差
|
||||
diff = np.mean(np.abs(curr_frame.astype(float) - prev_frame.astype(float)))
|
||||
|
||||
if diff > threshold:
|
||||
shot_boundaries.append(i)
|
||||
|
||||
# 如果没有检测到任何边界,至少返回最后一帧
|
||||
if not shot_boundaries:
|
||||
logger.warning("未检测到场景边界,将视频作为单个场景处理")
|
||||
shot_boundaries.append(len(frames) - 1)
|
||||
|
||||
return shot_boundaries
|
||||
|
||||
def extract_keyframes(self, frames: List[np.ndarray], shot_boundaries: List[int]) -> Tuple[
|
||||
List[np.ndarray], List[int]]:
|
||||
"""
|
||||
从每个镜头中提取关键帧
|
||||
|
||||
Args:
|
||||
frames: 视频帧列表
|
||||
shot_boundaries: 镜头边界列表
|
||||
|
||||
Returns:
|
||||
Tuple[List[np.ndarray], List[int]]: 关键帧列表和对应的帧索引
|
||||
"""
|
||||
keyframes = []
|
||||
keyframe_indices = []
|
||||
|
||||
for i in tqdm(range(len(shot_boundaries)), desc="提取关键帧"):
|
||||
start = shot_boundaries[i - 1] if i > 0 else 0
|
||||
end = shot_boundaries[i]
|
||||
shot_frames = frames[start:end]
|
||||
|
||||
if not shot_frames:
|
||||
continue
|
||||
|
||||
# 将每一帧转换为灰度图并展平为一维数组
|
||||
frame_features = np.array([cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY).flatten()
|
||||
for frame in shot_frames])
|
||||
|
||||
try:
|
||||
# 尝试使用 KMeans
|
||||
kmeans = KMeans(n_clusters=1, random_state=0).fit(frame_features)
|
||||
center_idx = np.argmin(np.sum((frame_features - kmeans.cluster_centers_[0]) ** 2, axis=1))
|
||||
except Exception as e:
|
||||
logger.warning(f"KMeans 聚类失败,使用备选方案: {str(e)}")
|
||||
# 备选方案:选择镜头中间的帧作为关键帧
|
||||
center_idx = len(shot_frames) // 2
|
||||
|
||||
keyframes.append(shot_frames[center_idx])
|
||||
keyframe_indices.append(start + center_idx)
|
||||
|
||||
return keyframes, keyframe_indices
|
||||
|
||||
def save_keyframes(self, keyframes: List[np.ndarray], keyframe_indices: List[int],
|
||||
output_dir: str, desc: str = "保存关键帧") -> None:
|
||||
"""
|
||||
保存关键帧到指定目录,文件名格式为:keyframe_帧序号_时间戳.jpg
|
||||
时间戳精确到毫秒,格式为:HHMMSSmmm
|
||||
"""
|
||||
if not os.path.exists(output_dir):
|
||||
os.makedirs(output_dir)
|
||||
|
||||
for keyframe, frame_idx in tqdm(zip(keyframes, keyframe_indices),
|
||||
total=len(keyframes),
|
||||
desc=desc):
|
||||
# 计算精确到毫秒的时间戳
|
||||
timestamp = frame_idx / self.fps
|
||||
hours = int(timestamp // 3600)
|
||||
minutes = int((timestamp % 3600) // 60)
|
||||
seconds = int(timestamp % 60)
|
||||
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
|
||||
|
||||
output_path = os.path.join(output_dir,
|
||||
f'keyframe_{frame_idx:06d}_{time_str}.jpg')
|
||||
cv2.imwrite(output_path, keyframe)
|
||||
|
||||
def extract_frames_by_numbers(self, frame_numbers: List[int], output_folder: str) -> None:
|
||||
"""
|
||||
根据指定的帧号提取帧,如果多个帧在同一毫秒内,只保留一个
|
||||
"""
|
||||
if not frame_numbers:
|
||||
raise ValueError("未提供帧号列表")
|
||||
|
||||
if any(fn >= self.total_frames or fn < 0 for fn in frame_numbers):
|
||||
raise ValueError("存在无效的帧号")
|
||||
|
||||
if not os.path.exists(output_folder):
|
||||
os.makedirs(output_folder)
|
||||
|
||||
# 用于记录已处理的时间戳(毫秒)
|
||||
processed_timestamps = set()
|
||||
|
||||
for frame_number in tqdm(frame_numbers, desc="提取高清帧"):
|
||||
# 计算精确到毫秒的时间戳
|
||||
timestamp = frame_number / self.fps
|
||||
timestamp_ms = int(timestamp * 1000) # 转换为毫秒
|
||||
|
||||
# 如果这一毫秒已经处理过,跳过
|
||||
if timestamp_ms in processed_timestamps:
|
||||
continue
|
||||
|
||||
self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_number)
|
||||
ret, frame = self.cap.read()
|
||||
|
||||
if ret:
|
||||
# 记录这一毫秒已经处理
|
||||
processed_timestamps.add(timestamp_ms)
|
||||
|
||||
# 计算时间戳字符串
|
||||
hours = int(timestamp // 3600)
|
||||
minutes = int((timestamp % 3600) // 60)
|
||||
seconds = int(timestamp % 60)
|
||||
milliseconds = int((timestamp % 1) * 1000) # 计算毫秒部分
|
||||
time_str = f"{hours:02d}{minutes:02d}{seconds:02d}{milliseconds:03d}"
|
||||
|
||||
output_path = os.path.join(output_folder,
|
||||
f"keyframe_{frame_number:06d}_{time_str}.jpg")
|
||||
cv2.imwrite(output_path, frame)
|
||||
else:
|
||||
logger.info(f"无法读取帧 {frame_number}")
|
||||
|
||||
logger.info(f"共提取了 {len(processed_timestamps)} 个不同时间戳的帧")
|
||||
|
||||
@staticmethod
|
||||
def extract_numbers_from_folder(folder_path: str) -> List[int]:
|
||||
"""
|
||||
从文件夹中提取帧号
|
||||
|
||||
Args:
|
||||
folder_path: 关键帧文件夹路径
|
||||
|
||||
Returns:
|
||||
List[int]: 排序后的帧号列表
|
||||
"""
|
||||
files = [f for f in os.listdir(folder_path) if f.endswith('.jpg')]
|
||||
# 更新正则表达式以匹配新的文件名格式:keyframe_000123_010534123.jpg
|
||||
pattern = re.compile(r'keyframe_(\d+)_\d{9}\.jpg$')
|
||||
numbers = []
|
||||
|
||||
for f in files:
|
||||
match = pattern.search(f)
|
||||
if match:
|
||||
numbers.append(int(match.group(1)))
|
||||
else:
|
||||
logger.warning(f"文件名格式不匹配: {f}")
|
||||
|
||||
if not numbers:
|
||||
logger.error(f"在目录 {folder_path} 中未找到有效的关键帧文件")
|
||||
|
||||
return sorted(numbers)
|
||||
|
||||
def process_video(self, output_dir: str, skip_seconds: float = 0, threshold: int = 30) -> None:
|
||||
"""
|
||||
处理视频并提取关键帧
|
||||
|
||||
Args:
|
||||
output_dir: 输出目录
|
||||
skip_seconds: 跳过视频开头的秒数
|
||||
"""
|
||||
skip_frames = int(skip_seconds * self.fps)
|
||||
|
||||
logger.info("读取视频帧...")
|
||||
frames = []
|
||||
for frame in tqdm(self.preprocess_video(),
|
||||
total=self.total_frames,
|
||||
desc="读取视频"):
|
||||
frames.append(frame)
|
||||
|
||||
frames = frames[skip_frames:]
|
||||
|
||||
if not frames:
|
||||
raise ValueError(f"跳过 {skip_seconds} 秒后没有剩余帧可以处理")
|
||||
|
||||
logger.info("检测场景边界...")
|
||||
shot_boundaries = self.detect_shot_boundaries(frames, threshold)
|
||||
logger.info(f"检测到 {len(shot_boundaries)} 个场景边界")
|
||||
|
||||
keyframes, keyframe_indices = self.extract_keyframes(frames, shot_boundaries)
|
||||
|
||||
adjusted_indices = [idx + skip_frames for idx in keyframe_indices]
|
||||
self.save_keyframes(keyframes, adjusted_indices, output_dir, desc="保存压缩关键帧")
|
||||
|
||||
def process_video_pipeline(self,
|
||||
output_dir: str,
|
||||
skip_seconds: float = 0,
|
||||
threshold: int = 20, # 降低默认阈值
|
||||
compressed_width: int = 320,
|
||||
keep_temp: bool = False) -> None:
|
||||
"""
|
||||
执行完整的视频处理流程
|
||||
|
||||
Args:
|
||||
threshold: 降低默认阈值为20,使场景检测更敏感
|
||||
"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
temp_dir = os.path.join(output_dir, 'temp')
|
||||
compressed_dir = os.path.join(temp_dir, 'compressed')
|
||||
mini_frames_dir = os.path.join(temp_dir, 'mini_frames')
|
||||
hd_frames_dir = output_dir
|
||||
|
||||
os.makedirs(temp_dir, exist_ok=True)
|
||||
os.makedirs(compressed_dir, exist_ok=True)
|
||||
os.makedirs(mini_frames_dir, exist_ok=True)
|
||||
os.makedirs(hd_frames_dir, exist_ok=True)
|
||||
|
||||
mini_processor = None
|
||||
compressed_video = None
|
||||
|
||||
try:
|
||||
# 1. 压缩视频
|
||||
video_name = os.path.splitext(os.path.basename(self.video_path))[0]
|
||||
compressed_video = os.path.join(compressed_dir, f"{video_name}_compressed.mp4")
|
||||
|
||||
# 获取原始视频的宽度和高度
|
||||
original_width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH))
|
||||
original_height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
|
||||
|
||||
logger.info("步骤1: 压缩视频...")
|
||||
if original_width > original_height:
|
||||
# 横版视频
|
||||
scale_filter = f'scale={compressed_width}:-1'
|
||||
else:
|
||||
# 竖版视频
|
||||
scale_filter = f'scale=-1:{compressed_width}'
|
||||
|
||||
ffmpeg_cmd = [
|
||||
'ffmpeg', '-i', self.video_path,
|
||||
'-vf', scale_filter,
|
||||
'-y',
|
||||
compressed_video
|
||||
]
|
||||
|
||||
try:
|
||||
subprocess.run(ffmpeg_cmd, check=True, capture_output=True, text=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"FFmpeg 错误输出: {e.stderr}")
|
||||
raise
|
||||
|
||||
# 2. 从压缩视频中提取关键帧
|
||||
logger.info("\n步骤2: 从压缩视频提取关键帧...")
|
||||
mini_processor = VideoProcessor(compressed_video)
|
||||
mini_processor.process_video(mini_frames_dir, skip_seconds, threshold)
|
||||
|
||||
# 3. 从原始视频提取高清关键帧
|
||||
logger.info("\n步骤3: 提取高清关键帧...")
|
||||
frame_numbers = self.extract_numbers_from_folder(mini_frames_dir)
|
||||
|
||||
if not frame_numbers:
|
||||
raise ValueError("未能从压缩视频中提取到有效的关键帧")
|
||||
|
||||
self.extract_frames_by_numbers(frame_numbers, hd_frames_dir)
|
||||
|
||||
logger.info(f"处理完成!高清关键帧保存在: {hd_frames_dir}")
|
||||
|
||||
except Exception as e:
|
||||
import traceback
|
||||
logger.error(f"视频处理失败: \n{traceback.format_exc()}")
|
||||
raise
|
||||
|
||||
finally:
|
||||
# 释放资源
|
||||
if mini_processor:
|
||||
mini_processor.cap.release()
|
||||
del mini_processor
|
||||
|
||||
# 确保视频文件句柄被释放
|
||||
if hasattr(self, 'cap'):
|
||||
self.cap.release()
|
||||
|
||||
# 等待资源释放
|
||||
import time
|
||||
time.sleep(0.5)
|
||||
|
||||
if not keep_temp:
|
||||
try:
|
||||
# 先删除压缩视频文件
|
||||
if compressed_video and os.path.exists(compressed_video):
|
||||
try:
|
||||
os.remove(compressed_video)
|
||||
except Exception as e:
|
||||
logger.warning(f"删除压缩视频失败: {e}")
|
||||
|
||||
# 再删除临时目录
|
||||
import shutil
|
||||
if os.path.exists(temp_dir):
|
||||
max_retries = 3
|
||||
for i in range(max_retries):
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
break
|
||||
except Exception as e:
|
||||
if i == max_retries - 1:
|
||||
logger.warning(f"清理临时文件失败: {e}")
|
||||
else:
|
||||
time.sleep(1) # 等待1秒后重试
|
||||
continue
|
||||
|
||||
logger.info("临时文件已清理")
|
||||
except Exception as e:
|
||||
logger.warning(f"清理临时文件时出错: {e}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import time
|
||||
|
||||
start_time = time.time()
|
||||
processor = VideoProcessor("E:\\projects\\NarratoAI\\resource\\videos\\test.mp4")
|
||||
processor.process_video_pipeline(output_dir="output")
|
||||
end_time = time.time()
|
||||
print(f"处理完成!总耗时: {end_time - start_time:.2f} 秒")
|
||||
@ -1,10 +1,9 @@
|
||||
[app]
|
||||
project_version="0.5.3"
|
||||
project_version="0.6.0"
|
||||
# 支持视频理解的大模型提供商
|
||||
# gemini
|
||||
# qwenvl
|
||||
vision_llm_provider="qwenvl"
|
||||
vision_analysis_prompt = "你是资深视频内容分析专家,擅长分析视频画面信息,分析下面视频画面内容,只输出客观的画面描述不要给任何总结或评价"
|
||||
|
||||
########## Vision Gemini API Key
|
||||
vision_gemini_api_key = ""
|
||||
@ -173,12 +172,7 @@
|
||||
speech_region=""
|
||||
|
||||
[frames]
|
||||
skip_seconds = 0
|
||||
# threshold(差异阈值)用于判断两个连续帧之间是否发生了场景切换
|
||||
# 较小的阈值(如 20):更敏感,能捕捉到细微的场景变化,但可能会误判,关键帧图片更多
|
||||
# 较大的阈值(如 40):更保守,只捕捉明显的场景切换,但可能会漏掉渐变场景,关键帧图片更少
|
||||
# 默认值 30:在实践中是一个比较平衡的选择
|
||||
threshold = 30
|
||||
version = "v2"
|
||||
# 提取关键帧的间隔时间
|
||||
frame_interval_input = 3
|
||||
# 大模型单次处理的关键帧数量
|
||||
vision_batch_size = 5
|
||||
vision_batch_size = 10
|
||||
|
||||
@ -1,38 +1,46 @@
|
||||
requests~=2.31.0
|
||||
moviepy==2.0.0.dev2
|
||||
faster-whisper~=1.0.1
|
||||
uvicorn~=0.27.1
|
||||
fastapi~=0.115.4
|
||||
tomli~=2.0.1
|
||||
streamlit~=1.40.0
|
||||
loguru~=0.7.2
|
||||
aiohttp~=3.10.10
|
||||
urllib3~=2.2.1
|
||||
pydantic~=2.6.3
|
||||
g4f~=0.3.0.4
|
||||
dashscope~=1.15.0
|
||||
google.generativeai>=0.8.3
|
||||
python-multipart~=0.0.9
|
||||
redis==5.0.3
|
||||
opencv-python~=4.10.0.84
|
||||
# for azure speech
|
||||
# https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/9-more-realistic-ai-voices-for-conversations-now-generally/ba-p/4099471
|
||||
azure-cognitiveservices-speech~=1.37.0
|
||||
git-changelog~=2.5.2
|
||||
watchdog==5.0.2
|
||||
pydub==0.25.1
|
||||
psutil>=5.9.0
|
||||
opencv-python~=4.10.0.84
|
||||
scikit-learn~=1.5.2
|
||||
google-generativeai~=0.8.3
|
||||
pillow==10.3.0
|
||||
python-dotenv~=1.0.1
|
||||
openai~=1.53.0
|
||||
tqdm>=4.66.6
|
||||
tenacity>=9.0.0
|
||||
tiktoken==0.8.0
|
||||
yt-dlp==2024.11.18
|
||||
pysrt==1.1.2
|
||||
httpx==0.27.2
|
||||
transformers==4.47.0
|
||||
# 必须项
|
||||
requests~=2.32.0
|
||||
moviepy==2.1.1
|
||||
edge-tts==6.1.19
|
||||
streamlit~=1.45.0
|
||||
watchdog==6.0.0
|
||||
loguru~=0.7.3
|
||||
tomli~=2.2.1
|
||||
pydub==0.25.1
|
||||
|
||||
openai~=1.77.0
|
||||
google-generativeai>=0.8.5
|
||||
|
||||
# 待优化项
|
||||
# opencv-python==4.11.0.86
|
||||
# scikit-learn==1.6.1
|
||||
|
||||
# fastapi~=0.115.4
|
||||
# uvicorn~=0.27.1
|
||||
# pydantic~=2.11.4
|
||||
|
||||
# faster-whisper~=1.0.1
|
||||
# tomli~=2.0.1
|
||||
# aiohttp~=3.10.10
|
||||
# httpx==0.27.2
|
||||
# urllib3~=2.2.1
|
||||
|
||||
# python-multipart~=0.0.9
|
||||
# redis==5.0.3
|
||||
# opencv-python~=4.10.0.84
|
||||
# azure-cognitiveservices-speech~=1.37.0
|
||||
# git-changelog~=2.5.2
|
||||
# watchdog==5.0.2
|
||||
# pydub==0.25.1
|
||||
# psutil>=5.9.0
|
||||
# scikit-learn~=1.5.2
|
||||
# pillow==10.3.0
|
||||
# python-dotenv~=1.0.1
|
||||
|
||||
# tqdm>=4.66.6
|
||||
# tenacity>=9.0.0
|
||||
# tiktoken==0.8.0
|
||||
# pysrt==1.1.2
|
||||
# transformers==4.50.0
|
||||
|
||||
# yt-dlp==2025.4.30
|
||||
232
webui.py
232
webui.py
@ -1,13 +1,14 @@
|
||||
import streamlit as st
|
||||
import os
|
||||
import sys
|
||||
from uuid import uuid4
|
||||
from loguru import logger
|
||||
from app.config import config
|
||||
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, review_settings, merge_settings, system_settings
|
||||
from webui.components import basic_settings, video_settings, audio_settings, subtitle_settings, script_settings, \
|
||||
review_settings, merge_settings, system_settings
|
||||
from webui.utils import cache, file_utils
|
||||
from app.utils import utils
|
||||
from app.models.schema import VideoClipParams, VideoAspect
|
||||
from webui.utils.performance import PerformanceMonitor
|
||||
|
||||
|
||||
# 初始化配置 - 必须是第一个 Streamlit 命令
|
||||
st.set_page_config(
|
||||
@ -17,7 +18,7 @@ st.set_page_config(
|
||||
initial_sidebar_state="auto",
|
||||
menu_items={
|
||||
"Report a bug": "https://github.com/linyqh/NarratoAI/issues",
|
||||
'About': f"# NarratoAI:sunglasses: 📽️ \n #### Version: v{config.project_version} \n "
|
||||
'About': f"# Narrato:blue[AI] :sunglasses: 📽️ \n #### Version: v{config.project_version} \n "
|
||||
f"自动化影视解说视频详情请移步:https://github.com/linyqh/NarratoAI"
|
||||
},
|
||||
)
|
||||
@ -28,6 +29,7 @@ hide_streamlit_style = """
|
||||
"""
|
||||
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
||||
|
||||
|
||||
def init_log():
|
||||
"""初始化日志配置"""
|
||||
from loguru import logger
|
||||
@ -35,17 +37,7 @@ def init_log():
|
||||
_lvl = "DEBUG"
|
||||
|
||||
def format_record(record):
|
||||
# 增加更多需要过滤的警告消息
|
||||
ignore_messages = [
|
||||
"Examining the path of torch.classes raised",
|
||||
"torch.cuda.is_available()",
|
||||
"CUDA initialization"
|
||||
]
|
||||
|
||||
for msg in ignore_messages:
|
||||
if msg in record["message"]:
|
||||
return ""
|
||||
|
||||
# 简化日志格式化处理,不尝试按特定字符串过滤torch相关内容
|
||||
file_path = record["file"].path
|
||||
relative_path = os.path.relpath(file_path, config.root_dir)
|
||||
record["file"].path = f"./{relative_path}"
|
||||
@ -57,23 +49,54 @@ def init_log():
|
||||
'- <level>{message}</>' + "\n"
|
||||
return _format
|
||||
|
||||
# 优化日志过滤器
|
||||
def log_filter(record):
|
||||
ignore_messages = [
|
||||
"Examining the path of torch.classes raised",
|
||||
"torch.cuda.is_available()",
|
||||
"CUDA initialization"
|
||||
]
|
||||
return not any(msg in record["message"] for msg in ignore_messages)
|
||||
|
||||
# 替换为更简单的过滤方式,避免在过滤时访问message内容
|
||||
# 此处先不设置复杂的过滤器,等应用启动后再动态添加
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level=_lvl,
|
||||
format=format_record,
|
||||
colorize=True,
|
||||
filter=log_filter
|
||||
colorize=True
|
||||
)
|
||||
|
||||
# 应用启动后,可以再添加更复杂的过滤器
|
||||
def setup_advanced_filters():
|
||||
"""在应用完全启动后设置高级过滤器"""
|
||||
try:
|
||||
for handler_id in logger._core.handlers:
|
||||
logger.remove(handler_id)
|
||||
|
||||
# 重新添加带有高级过滤的处理器
|
||||
def advanced_filter(record):
|
||||
"""更复杂的过滤器,在应用启动后安全使用"""
|
||||
ignore_messages = [
|
||||
"Examining the path of torch.classes raised",
|
||||
"torch.cuda.is_available()",
|
||||
"CUDA initialization"
|
||||
]
|
||||
return not any(msg in record["message"] for msg in ignore_messages)
|
||||
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level=_lvl,
|
||||
format=format_record,
|
||||
colorize=True,
|
||||
filter=advanced_filter
|
||||
)
|
||||
except Exception as e:
|
||||
# 如果过滤器设置失败,确保日志仍然可用
|
||||
logger.add(
|
||||
sys.stdout,
|
||||
level=_lvl,
|
||||
format=format_record,
|
||||
colorize=True
|
||||
)
|
||||
logger.error(f"设置高级日志过滤器失败: {e}")
|
||||
|
||||
# 将高级过滤器设置放到启动主逻辑后
|
||||
import threading
|
||||
threading.Timer(5.0, setup_advanced_filters).start()
|
||||
|
||||
|
||||
def init_global_state():
|
||||
"""初始化全局状态"""
|
||||
if 'video_clip_json' not in st.session_state:
|
||||
@ -85,6 +108,7 @@ def init_global_state():
|
||||
if 'subclip_videos' not in st.session_state:
|
||||
st.session_state['subclip_videos'] = {}
|
||||
|
||||
|
||||
def tr(key):
|
||||
"""翻译函数"""
|
||||
i18n_dir = os.path.join(os.path.dirname(__file__), "webui", "i18n")
|
||||
@ -92,90 +116,94 @@ def tr(key):
|
||||
loc = locales.get(st.session_state['ui_language'], {})
|
||||
return loc.get("Translation", {}).get(key, key)
|
||||
|
||||
|
||||
def render_generate_button():
|
||||
"""渲染生成按钮和处理逻辑"""
|
||||
if st.button(tr("Generate Video"), use_container_width=True, type="primary"):
|
||||
from app.services import task as tm
|
||||
|
||||
# 重置日志容器和记录
|
||||
log_container = st.empty()
|
||||
log_records = []
|
||||
|
||||
def log_received(msg):
|
||||
with log_container:
|
||||
log_records.append(msg)
|
||||
st.code("\n".join(log_records))
|
||||
|
||||
from loguru import logger
|
||||
logger.add(log_received)
|
||||
|
||||
config.save_config()
|
||||
task_id = st.session_state.get('task_id')
|
||||
|
||||
if not task_id:
|
||||
st.error(tr("请先裁剪视频"))
|
||||
return
|
||||
if not st.session_state.get('video_clip_json_path'):
|
||||
st.error(tr("脚本文件不能为空"))
|
||||
return
|
||||
if not st.session_state.get('video_origin_path'):
|
||||
st.error(tr("视频文件不能为空"))
|
||||
return
|
||||
|
||||
st.toast(tr("生成视频"))
|
||||
logger.info(tr("开始生成视频"))
|
||||
|
||||
# 获取所有参数
|
||||
script_params = script_settings.get_script_params()
|
||||
video_params = video_settings.get_video_params()
|
||||
audio_params = audio_settings.get_audio_params()
|
||||
subtitle_params = subtitle_settings.get_subtitle_params()
|
||||
|
||||
# 合并所有参数
|
||||
all_params = {
|
||||
**script_params,
|
||||
**video_params,
|
||||
**audio_params,
|
||||
**subtitle_params
|
||||
}
|
||||
|
||||
# 创建参数对象
|
||||
params = VideoClipParams(**all_params)
|
||||
|
||||
result = tm.start_subclip(
|
||||
task_id=task_id,
|
||||
params=params,
|
||||
subclip_path_videos=st.session_state['subclip_videos']
|
||||
)
|
||||
|
||||
video_files = result.get("videos", [])
|
||||
st.success(tr("视生成完成"))
|
||||
|
||||
try:
|
||||
from app.services import task as tm
|
||||
import torch
|
||||
|
||||
# 重置日志容器和记录
|
||||
log_container = st.empty()
|
||||
log_records = []
|
||||
if video_files:
|
||||
player_cols = st.columns(len(video_files) * 2 + 1)
|
||||
for i, url in enumerate(video_files):
|
||||
player_cols[i * 2 + 1].video(url)
|
||||
except Exception as e:
|
||||
logger.error(f"播放视频失败: {e}")
|
||||
|
||||
def log_received(msg):
|
||||
with log_container:
|
||||
log_records.append(msg)
|
||||
st.code("\n".join(log_records))
|
||||
file_utils.open_task_folder(config.root_dir, task_id)
|
||||
logger.info(tr("视频生成完成"))
|
||||
|
||||
from loguru import logger
|
||||
logger.add(log_received)
|
||||
|
||||
config.save_config()
|
||||
task_id = st.session_state.get('task_id')
|
||||
|
||||
if not task_id:
|
||||
st.error(tr("请先裁剪视频"))
|
||||
return
|
||||
if not st.session_state.get('video_clip_json_path'):
|
||||
st.error(tr("脚本文件不能为空"))
|
||||
return
|
||||
if not st.session_state.get('video_origin_path'):
|
||||
st.error(tr("视频文件不能为空"))
|
||||
return
|
||||
|
||||
st.toast(tr("生成视频"))
|
||||
logger.info(tr("开始生成视频"))
|
||||
|
||||
# 获取所有参数
|
||||
script_params = script_settings.get_script_params()
|
||||
video_params = video_settings.get_video_params()
|
||||
audio_params = audio_settings.get_audio_params()
|
||||
subtitle_params = subtitle_settings.get_subtitle_params()
|
||||
|
||||
# 合并所有参数
|
||||
all_params = {
|
||||
**script_params,
|
||||
**video_params,
|
||||
**audio_params,
|
||||
**subtitle_params
|
||||
}
|
||||
|
||||
# 创建参数对象
|
||||
params = VideoClipParams(**all_params)
|
||||
|
||||
result = tm.start_subclip(
|
||||
task_id=task_id,
|
||||
params=params,
|
||||
subclip_path_videos=st.session_state['subclip_videos']
|
||||
)
|
||||
|
||||
video_files = result.get("videos", [])
|
||||
st.success(tr("视生成完成"))
|
||||
|
||||
try:
|
||||
if video_files:
|
||||
player_cols = st.columns(len(video_files) * 2 + 1)
|
||||
for i, url in enumerate(video_files):
|
||||
player_cols[i * 2 + 1].video(url)
|
||||
except Exception as e:
|
||||
logger.error(f"播放视频失败: {e}")
|
||||
|
||||
file_utils.open_task_folder(config.root_dir, task_id)
|
||||
logger.info(tr("视频生成完成"))
|
||||
|
||||
finally:
|
||||
PerformanceMonitor.cleanup_resources()
|
||||
|
||||
def main():
|
||||
"""主函数"""
|
||||
init_log()
|
||||
init_global_state()
|
||||
utils.init_resources()
|
||||
|
||||
st.title(f"NarratoAI :sunglasses:📽️")
|
||||
# 仅初始化基本资源,避免过早地加载依赖PyTorch的资源
|
||||
# 检查是否能分解utils.init_resources()为基本资源和高级资源(如依赖PyTorch的资源)
|
||||
try:
|
||||
utils.init_resources()
|
||||
except Exception as e:
|
||||
logger.warning(f"资源初始化时出现警告: {e}")
|
||||
|
||||
st.title(f"Narrato:blue[AI]:sunglasses: 📽️")
|
||||
st.write(tr("Get Help"))
|
||||
|
||||
|
||||
# 首先渲染不依赖PyTorch的UI部分
|
||||
# 渲染基础设置面板
|
||||
basic_settings.render_basic_settings(tr)
|
||||
# 渲染合并设置
|
||||
@ -190,14 +218,18 @@ def main():
|
||||
audio_settings.render_audio_panel(tr)
|
||||
with panel[2]:
|
||||
subtitle_settings.render_subtitle_panel(tr)
|
||||
# 渲染系统设置面板
|
||||
system_settings.render_system_panel(tr)
|
||||
|
||||
# 渲染视频审查面板
|
||||
review_settings.render_review_panel(tr)
|
||||
|
||||
# 渲染生成按钮和处理逻辑
|
||||
# 放到最后渲染可能使用PyTorch的部分
|
||||
# 渲染系统设置面板
|
||||
with panel[2]:
|
||||
system_settings.render_system_panel(tr)
|
||||
|
||||
# 放到最后渲染生成按钮和处理逻辑
|
||||
render_generate_button()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@ -8,7 +8,7 @@ from webui.components import (
|
||||
audio_settings,
|
||||
subtitle_settings
|
||||
)
|
||||
from webui.utils import cache, file_utils, performance
|
||||
from webui.utils import cache, file_utils
|
||||
|
||||
__all__ = [
|
||||
'config',
|
||||
@ -17,6 +17,5 @@ __all__ = [
|
||||
'audio_settings',
|
||||
'subtitle_settings',
|
||||
'cache',
|
||||
'file_utils',
|
||||
'performance'
|
||||
'file_utils'
|
||||
]
|
||||
@ -1,7 +1,10 @@
|
||||
import traceback
|
||||
|
||||
import streamlit as st
|
||||
import os
|
||||
from app.config import config
|
||||
from app.utils import utils
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def render_basic_settings(tr):
|
||||
@ -266,7 +269,7 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
elif provider.lower() == 'moonshot':
|
||||
base_url = "https://api.moonshot.cn/v1"
|
||||
elif provider.lower() == 'deepseek':
|
||||
base_url = "https://api.deepseek.com/v1"
|
||||
base_url = "https://api.deepseek.com"
|
||||
|
||||
# 构建测试URL
|
||||
test_url = f"{base_url.rstrip('/')}/chat/completions"
|
||||
@ -288,7 +291,7 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
"messages": [
|
||||
{"role": "user", "content": "直接回复我文本'当前网络可用'"}
|
||||
],
|
||||
"max_tokens": 10
|
||||
"stream": False
|
||||
}
|
||||
|
||||
# 发送测试请求
|
||||
@ -296,7 +299,6 @@ def test_text_model_connection(api_key, base_url, model_name, provider, tr):
|
||||
test_url,
|
||||
headers=headers,
|
||||
json=test_data,
|
||||
timeout=10
|
||||
)
|
||||
|
||||
if response.status_code == 200:
|
||||
@ -313,7 +315,7 @@ def render_text_llm_settings(tr):
|
||||
st.subheader(tr("Text Generation Model Settings"))
|
||||
|
||||
# 文案生成模型提供商选择
|
||||
text_providers = ['DeepSeek', 'OpenAI', 'Qwen', 'Moonshot', 'Gemini']
|
||||
text_providers = ['DeepSeek', 'OpenAI', 'Siliconflow', 'Qwen', 'Moonshot', 'Gemini']
|
||||
saved_text_provider = config.app.get("text_llm_provider", "DeepSeek").lower()
|
||||
saved_provider_index = 0
|
||||
|
||||
@ -331,9 +333,9 @@ def render_text_llm_settings(tr):
|
||||
config.app["text_llm_provider"] = text_provider
|
||||
|
||||
# 获取已保存的文本模型配置
|
||||
text_api_key = config.app.get(f"text_{text_provider}_api_key", "")
|
||||
text_base_url = config.app.get(f"text_{text_provider}_base_url", "")
|
||||
text_model_name = config.app.get(f"text_{text_provider}_model_name", "")
|
||||
text_api_key = config.app.get(f"text_{text_provider}_api_key")
|
||||
text_base_url = config.app.get(f"text_{text_provider}_base_url")
|
||||
text_model_name = config.app.get(f"text_{text_provider}_model_name")
|
||||
|
||||
# 渲染文本模型配置输入框
|
||||
st_text_api_key = st.text_input(tr("Text API Key"), value=text_api_key, type="password")
|
||||
@ -342,6 +344,8 @@ def render_text_llm_settings(tr):
|
||||
|
||||
# 添加测试按钮
|
||||
if st.button(tr("Test Connection"), key="test_text_connection"):
|
||||
logger.debug(st_text_base_url)
|
||||
logger.debug(st_text_model_name)
|
||||
with st.spinner(tr("Testing connection...")):
|
||||
success, message = test_text_model_connection(
|
||||
api_key=st_text_api_key,
|
||||
@ -364,11 +368,11 @@ def render_text_llm_settings(tr):
|
||||
if st_text_model_name:
|
||||
config.app[f"text_{text_provider}_model_name"] = st_text_model_name
|
||||
|
||||
# Cloudflare 特殊配置
|
||||
if text_provider == 'cloudflare':
|
||||
st_account_id = st.text_input(
|
||||
tr("Account ID"),
|
||||
value=config.app.get(f"text_{text_provider}_account_id", "")
|
||||
)
|
||||
if st_account_id:
|
||||
config.app[f"text_{text_provider}_account_id"] = st_account_id
|
||||
# # Cloudflare 特殊配置
|
||||
# if text_provider == 'cloudflare':
|
||||
# st_account_id = st.text_input(
|
||||
# tr("Account ID"),
|
||||
# value=config.app.get(f"text_{text_provider}_account_id", "")
|
||||
# )
|
||||
# if st_account_id:
|
||||
# config.app[f"text_{text_provider}_account_id"] = st_account_id
|
||||
|
||||
@ -285,8 +285,8 @@ def render_merge_settings(tr):
|
||||
error_message = str(e)
|
||||
if "moviepy" in error_message.lower():
|
||||
st.error(tr("Error processing video files. Please check if the videos are valid MP4 files."))
|
||||
elif "pysrt" in error_message.lower():
|
||||
st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files."))
|
||||
# elif "pysrt" in error_message.lower():
|
||||
# st.error(tr("Error processing subtitle files. Please check if the subtitles are valid SRT files."))
|
||||
else:
|
||||
st.error(f"{tr('Error during merge')}: {error_message}")
|
||||
|
||||
|
||||
@ -33,7 +33,7 @@ def render_video_item(tr, video_list, subclip_videos, index):
|
||||
video_script = video_list[index]
|
||||
|
||||
# 显示时间戳
|
||||
timestamp = video_script.get('timestamp', '')
|
||||
timestamp = video_script.get('_id', '')
|
||||
st.text_area(
|
||||
tr("Timestamp"),
|
||||
value=timestamp,
|
||||
|
||||
@ -47,7 +47,7 @@ def render_script_file(tr, params):
|
||||
(tr("None"), ""),
|
||||
(tr("Auto Generate"), "auto"),
|
||||
(tr("Short Generate"), "short"),
|
||||
(tr("Upload Script"), "upload_script") # 新增上传脚本选项
|
||||
(tr("Upload Script"), "upload_script")
|
||||
]
|
||||
|
||||
# 获取已有脚本文件
|
||||
@ -214,38 +214,25 @@ def render_script_buttons(tr, params):
|
||||
# 根据脚本类型显示不同的设置
|
||||
if script_path != "short":
|
||||
# 非短视频模式下显示原有的三个输入框
|
||||
input_cols = st.columns(3)
|
||||
input_cols = st.columns(2)
|
||||
|
||||
with input_cols[0]:
|
||||
skip_seconds = st.number_input(
|
||||
"skip_seconds",
|
||||
st.number_input(
|
||||
tr("Frame Interval (seconds)"),
|
||||
min_value=0,
|
||||
value=st.session_state.get('skip_seconds', config.frames.get('skip_seconds', 0)),
|
||||
help=tr("Skip the first few seconds"),
|
||||
key="skip_seconds_input"
|
||||
value=st.session_state.get('frame_interval_input', config.frames.get('frame_interval_input', 3)),
|
||||
help=tr("Frame Interval (seconds) (More keyframes consume more tokens)"),
|
||||
key="frame_interval_input"
|
||||
)
|
||||
st.session_state['skip_seconds'] = skip_seconds
|
||||
|
||||
|
||||
with input_cols[1]:
|
||||
threshold = st.number_input(
|
||||
"threshold",
|
||||
st.number_input(
|
||||
tr("Batch Size"),
|
||||
min_value=0,
|
||||
value=st.session_state.get('threshold', config.frames.get('threshold', 30)),
|
||||
help=tr("Difference threshold"),
|
||||
key="threshold_input"
|
||||
value=st.session_state.get('vision_batch_size', config.frames.get('vision_batch_size', 10)),
|
||||
help=tr("Batch Size (More keyframes consume more tokens)"),
|
||||
key="vision_batch_size"
|
||||
)
|
||||
st.session_state['threshold'] = threshold
|
||||
|
||||
with input_cols[2]:
|
||||
vision_batch_size = st.number_input(
|
||||
"vision_batch_size",
|
||||
min_value=1,
|
||||
max_value=20,
|
||||
value=st.session_state.get('vision_batch_size', config.frames.get('vision_batch_size', 5)),
|
||||
help=tr("Vision processing batch size"),
|
||||
key="vision_batch_size_input"
|
||||
)
|
||||
st.session_state['vision_batch_size'] = vision_batch_size
|
||||
|
||||
# 生成/加载按钮
|
||||
if script_path == "auto":
|
||||
@ -259,7 +246,8 @@ def render_script_buttons(tr, params):
|
||||
|
||||
if st.button(button_name, key="script_action", disabled=not script_path):
|
||||
if script_path == "auto":
|
||||
generate_script_docu(tr, params)
|
||||
# 执行纪录片视频脚本生成(视频无字幕无配音)
|
||||
generate_script_docu(params)
|
||||
elif script_path == "short":
|
||||
# 获取自定义片段数量参数
|
||||
custom_clips = st.session_state.get('custom_clips', 5)
|
||||
@ -366,12 +354,11 @@ def crop_video(tr, params):
|
||||
utils.cut_video(params, update_progress)
|
||||
time.sleep(0.5)
|
||||
progress_bar.progress(100)
|
||||
status_text.text("剪完成!")
|
||||
st.success("视频剪辑成功完成!")
|
||||
except Exception as e:
|
||||
st.error(f"剪辑过程中发生错误: {str(e)}")
|
||||
finally:
|
||||
time.sleep(2)
|
||||
time.sleep(1)
|
||||
progress_bar.empty()
|
||||
status_text.empty()
|
||||
|
||||
|
||||
@ -127,7 +127,7 @@ def get_subtitle_params():
|
||||
'font_name': st.session_state.get('font_name', ''),
|
||||
'font_size': st.session_state.get('font_size', 60),
|
||||
'text_fore_color': st.session_state.get('text_fore_color', '#FFFFFF'),
|
||||
'position': st.session_state.get('subtitle_position', 'bottom'),
|
||||
'subtitle_position': st.session_state.get('subtitle_position', 'bottom'),
|
||||
'custom_position': st.session_state.get('custom_position', 70.0),
|
||||
'stroke_color': st.session_state.get('stroke_color', '#000000'),
|
||||
'stroke_width': st.session_state.get('stroke_width', 1.5),
|
||||
|
||||
@ -85,6 +85,7 @@
|
||||
"TTS Provider": "TTS Provider",
|
||||
"Hide Log": "Hide Log",
|
||||
"Upload Local Files": "Upload Local Files",
|
||||
"File Uploaded Successfully": "File Uploaded Successfully"
|
||||
"File Uploaded Successfully": "File Uploaded Successfully",
|
||||
"Frame Interval (seconds)": "Frame Interval (seconds) (More keyframes consume more tokens)"
|
||||
}
|
||||
}
|
||||
@ -115,7 +115,6 @@
|
||||
"Text Generation Model Settings": "文案生成模型设置",
|
||||
"LLM Model Name": "大语言模型名称",
|
||||
"LLM Model API Key": "大语言模型 API 密钥",
|
||||
"Batch Size": "批处理大小",
|
||||
"Text Model Provider": "文案生成模型提供商",
|
||||
"Text API Key": "文案生成 API 密钥",
|
||||
"Text Base URL": "文案生成接口地址",
|
||||
@ -192,6 +191,10 @@
|
||||
"Generate Short Video Script": "AI生成短剧混剪脚本",
|
||||
"Adjust the volume of the original audio": "调整原始音频的音量",
|
||||
"Original Volume": "视频音量",
|
||||
"Auto Generate": "纪录片解说 (画面解说)"
|
||||
"Auto Generate": "纪录片解说 (画面解说)",
|
||||
"Frame Interval (seconds)": "帧间隔 (秒)",
|
||||
"Frame Interval (seconds) (More keyframes consume more tokens)": "帧间隔 (秒) (更多关键帧消耗更多令牌)",
|
||||
"Batch Size": "批处理大小",
|
||||
"Batch Size (More keyframes consume more tokens)": "批处理大小, 每批处理越少消耗 token 越多"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -5,20 +5,23 @@ import time
|
||||
import asyncio
|
||||
import traceback
|
||||
import requests
|
||||
from app.utils import video_processor
|
||||
import streamlit as st
|
||||
from loguru import logger
|
||||
from requests.adapters import HTTPAdapter
|
||||
from urllib3.util.retry import Retry
|
||||
from datetime import datetime
|
||||
|
||||
from app.config import config
|
||||
from app.utils.script_generator import ScriptProcessor
|
||||
from app.utils import utils, video_processor, video_processor_v2, qwenvl_analyzer
|
||||
from app.utils import utils, video_processor, qwenvl_analyzer
|
||||
from webui.tools.base import create_vision_analyzer, get_batch_files, get_batch_timestamps, chekc_video_config
|
||||
|
||||
|
||||
def generate_script_docu(tr, params):
|
||||
def generate_script_docu(params):
|
||||
"""
|
||||
生成 纪录片 视频脚本
|
||||
要求: 原视频无字幕无配音
|
||||
适合场景: 纪录片、动物搞笑解说、荒野建造等
|
||||
"""
|
||||
progress_bar = st.progress(0)
|
||||
status_text = st.empty()
|
||||
@ -35,8 +38,9 @@ def generate_script_docu(tr, params):
|
||||
if not params.video_origin_path:
|
||||
st.error("请先选择视频文件")
|
||||
return
|
||||
|
||||
# ===================提取键帧===================
|
||||
"""
|
||||
1. 提取键帧
|
||||
"""
|
||||
update_progress(10, "正在提取关键帧...")
|
||||
|
||||
# 创建临时目录用于存储关键帧
|
||||
@ -64,21 +68,12 @@ def generate_script_docu(tr, params):
|
||||
os.makedirs(video_keyframes_dir, exist_ok=True)
|
||||
|
||||
# 初始化视频处理器
|
||||
if config.frames.get("version") == "v2":
|
||||
processor = video_processor_v2.VideoProcessor(params.video_origin_path)
|
||||
# 处理视频并提取关键帧
|
||||
processor.process_video_pipeline(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=st.session_state.get('skip_seconds'),
|
||||
threshold=st.session_state.get('threshold')
|
||||
)
|
||||
else:
|
||||
processor = video_processor.VideoProcessor(params.video_origin_path)
|
||||
# 处理视频并提取关键帧
|
||||
processor.process_video(
|
||||
output_dir=video_keyframes_dir,
|
||||
skip_seconds=0
|
||||
)
|
||||
processor = video_processor.VideoProcessor(params.video_origin_path)
|
||||
# 处理视频并提取关键帧
|
||||
processor.process_video_pipeline(
|
||||
output_dir=video_keyframes_dir,
|
||||
interval_seconds=st.session_state.get('frame_interval_input'),
|
||||
)
|
||||
|
||||
# 获取所有关键文件路径
|
||||
for filename in sorted(os.listdir(video_keyframes_dir)):
|
||||
@ -101,9 +96,11 @@ def generate_script_docu(tr, params):
|
||||
|
||||
raise Exception(f"关键帧提取失败: {str(e)}")
|
||||
|
||||
# 根据不同的 LLM 提供商处理
|
||||
"""
|
||||
2. 视觉分析(批量分析每一帧)
|
||||
"""
|
||||
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
|
||||
logger.debug(f"Vision LLM 提供商: {vision_llm_provider}")
|
||||
logger.debug(f"VLM 视觉大模型提供商: {vision_llm_provider}")
|
||||
|
||||
try:
|
||||
# ===================初始化视觉分析器===================
|
||||
@ -137,111 +134,240 @@ def generate_script_docu(tr, params):
|
||||
|
||||
# 执行异步分析
|
||||
vision_batch_size = st.session_state.get('vision_batch_size') or config.frames.get("vision_batch_size")
|
||||
vision_analysis_prompt = """
|
||||
我提供了 %s 张视频帧,它们按时间顺序排列,代表一个连续的视频片段。请仔细分析每一帧的内容,并关注帧与帧之间的变化,以理解整个片段的活动。
|
||||
|
||||
首先,请详细描述每一帧的关键视觉信息(包含:主要内容、人物、动作和场景)。
|
||||
然后,基于所有帧的分析,请用**简洁的语言**总结整个视频片段中发生的主要活动或事件流程。
|
||||
|
||||
请务必使用 JSON 格式输出你的结果。JSON 结构应如下:
|
||||
{
|
||||
"frame_observations": [
|
||||
{
|
||||
"frame_number": 1, // 或其他标识帧的方式
|
||||
"observation": "描述每张视频帧中的主要内容、人物、动作和场景。"
|
||||
},
|
||||
// ... 更多帧的观察 ...
|
||||
],
|
||||
"overall_activity_summary": "在这里填写你总结的整个片段的主要活动,保持简洁。"
|
||||
}
|
||||
|
||||
请务必不要遗漏视频帧,我提供了 %s 张视频帧,frame_observations 必须包含 %s 个元素
|
||||
|
||||
请只返回 JSON 字符串,不要包含任何其他解释性文字。
|
||||
"""
|
||||
results = loop.run_until_complete(
|
||||
analyzer.analyze_images(
|
||||
images=keyframe_files,
|
||||
prompt=config.app.get('vision_analysis_prompt'),
|
||||
prompt=vision_analysis_prompt,
|
||||
batch_size=vision_batch_size
|
||||
)
|
||||
)
|
||||
loop.close()
|
||||
|
||||
"""
|
||||
3. 处理分析结果(格式化为 json 数据)
|
||||
"""
|
||||
# ===================处理分析结果===================
|
||||
update_progress(60, "正在整理分析结果...")
|
||||
|
||||
# 合并所有批次的析结果
|
||||
# 合并所有批次的分析结果
|
||||
frame_analysis = ""
|
||||
merged_frame_observations = [] # 合并所有批次的帧观察
|
||||
overall_activity_summaries = [] # 合并所有批次的整体总结
|
||||
prev_batch_files = None
|
||||
|
||||
frame_counter = 1 # 初始化帧计数器,用于给所有帧分配连续的序号
|
||||
# logger.debug(json.dumps(results, indent=4, ensure_ascii=False))
|
||||
# 确保分析目录存在
|
||||
analysis_dir = os.path.join(utils.storage_dir(), "temp", "analysis")
|
||||
os.makedirs(analysis_dir, exist_ok=True)
|
||||
origin_res = os.path.join(analysis_dir, "frame_analysis.json")
|
||||
with open(origin_res, 'w', encoding='utf-8') as f:
|
||||
json.dump(results, f, ensure_ascii=False, indent=2)
|
||||
|
||||
# 开始处理
|
||||
for result in results:
|
||||
if 'error' in result:
|
||||
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
|
||||
|
||||
# 获取当前批次的文件列表 keyframe_001136_000045.jpg 将 000045 精度提升到 毫秒
|
||||
continue
|
||||
|
||||
# 获取当前批次的文件列表
|
||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
|
||||
# logger.debug(batch_files)
|
||||
|
||||
first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
|
||||
|
||||
# 获取批次的时间戳范围
|
||||
first_timestamp, last_timestamp, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
|
||||
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
|
||||
|
||||
# 添加带时间戳的分析结果
|
||||
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
|
||||
frame_analysis += result['response']
|
||||
frame_analysis += "\n"
|
||||
|
||||
|
||||
# 解析响应中的JSON数据
|
||||
response_text = result['response']
|
||||
try:
|
||||
# 处理可能包含```json```格式的响应
|
||||
if "```json" in response_text:
|
||||
json_content = response_text.split("```json")[1].split("```")[0].strip()
|
||||
elif "```" in response_text:
|
||||
json_content = response_text.split("```")[1].split("```")[0].strip()
|
||||
else:
|
||||
json_content = response_text.strip()
|
||||
|
||||
response_data = json.loads(json_content)
|
||||
|
||||
# 提取frame_observations和overall_activity_summary
|
||||
if "frame_observations" in response_data:
|
||||
frame_obs = response_data["frame_observations"]
|
||||
overall_summary = response_data.get("overall_activity_summary", "")
|
||||
|
||||
# 添加时间戳信息到每个帧观察
|
||||
for i, obs in enumerate(frame_obs):
|
||||
if i < len(batch_files):
|
||||
# 从文件名中提取时间戳
|
||||
file_path = batch_files[i]
|
||||
file_name = os.path.basename(file_path)
|
||||
# 提取时间戳字符串 (格式如: keyframe_000675_000027000.jpg)
|
||||
# 格式解析: keyframe_帧序号_毫秒时间戳.jpg
|
||||
timestamp_parts = file_name.split('_')
|
||||
if len(timestamp_parts) >= 3:
|
||||
timestamp_str = timestamp_parts[-1].split('.')[0]
|
||||
try:
|
||||
# 修正时间戳解析逻辑
|
||||
# 格式为000100000,表示00:01:00,000,即1分钟
|
||||
# 需要按照对应位数进行解析:
|
||||
# 前两位是小时,中间两位是分钟,后面是秒和毫秒
|
||||
if len(timestamp_str) >= 9: # 确保格式正确
|
||||
hours = int(timestamp_str[0:2])
|
||||
minutes = int(timestamp_str[2:4])
|
||||
seconds = int(timestamp_str[4:6])
|
||||
milliseconds = int(timestamp_str[6:9])
|
||||
|
||||
# 计算总秒数
|
||||
timestamp_seconds = hours * 3600 + minutes * 60 + seconds + milliseconds / 1000
|
||||
formatted_time = utils.format_time(timestamp_seconds) # 格式化时间戳
|
||||
else:
|
||||
# 兼容旧的解析方式
|
||||
timestamp_seconds = int(timestamp_str) / 1000 # 转换为秒
|
||||
formatted_time = utils.format_time(timestamp_seconds) # 格式化时间戳
|
||||
except ValueError:
|
||||
logger.warning(f"无法解析时间戳: {timestamp_str}")
|
||||
timestamp_seconds = 0
|
||||
formatted_time = "00:00:00,000"
|
||||
else:
|
||||
logger.warning(f"文件名格式不符合预期: {file_name}")
|
||||
timestamp_seconds = 0
|
||||
formatted_time = "00:00:00,000"
|
||||
|
||||
# 添加额外信息到帧观察
|
||||
obs["frame_path"] = file_path
|
||||
obs["timestamp"] = formatted_time
|
||||
obs["timestamp_seconds"] = timestamp_seconds
|
||||
obs["batch_index"] = result['batch_index']
|
||||
|
||||
# 使用全局递增的帧计数器替换原始的frame_number
|
||||
if "frame_number" in obs:
|
||||
obs["original_frame_number"] = obs["frame_number"] # 保留原始编号作为参考
|
||||
obs["frame_number"] = frame_counter # 赋值连续的帧编号
|
||||
frame_counter += 1 # 增加帧计数器
|
||||
|
||||
# 添加到合并列表
|
||||
merged_frame_observations.append(obs)
|
||||
|
||||
# 添加批次整体总结信息
|
||||
if overall_summary:
|
||||
# 从文件名中提取时间戳数值
|
||||
first_time_str = first_timestamp.split('_')[-1].split('.')[0]
|
||||
last_time_str = last_timestamp.split('_')[-1].split('.')[0]
|
||||
|
||||
# 转换为毫秒并计算持续时间(秒)
|
||||
try:
|
||||
# 修正解析逻辑,与上面相同的方式解析时间戳
|
||||
if len(first_time_str) >= 9 and len(last_time_str) >= 9:
|
||||
# 解析第一个时间戳
|
||||
first_hours = int(first_time_str[0:2])
|
||||
first_minutes = int(first_time_str[2:4])
|
||||
first_seconds = int(first_time_str[4:6])
|
||||
first_ms = int(first_time_str[6:9])
|
||||
first_time_seconds = first_hours * 3600 + first_minutes * 60 + first_seconds + first_ms / 1000
|
||||
|
||||
# 解析第二个时间戳
|
||||
last_hours = int(last_time_str[0:2])
|
||||
last_minutes = int(last_time_str[2:4])
|
||||
last_seconds = int(last_time_str[4:6])
|
||||
last_ms = int(last_time_str[6:9])
|
||||
last_time_seconds = last_hours * 3600 + last_minutes * 60 + last_seconds + last_ms / 1000
|
||||
|
||||
batch_duration = last_time_seconds - first_time_seconds
|
||||
else:
|
||||
# 兼容旧的解析方式
|
||||
first_time_ms = int(first_time_str)
|
||||
last_time_ms = int(last_time_str)
|
||||
batch_duration = (last_time_ms - first_time_ms) / 1000
|
||||
except ValueError:
|
||||
# 使用 utils.time_to_seconds 函数处理格式化的时间戳
|
||||
first_time_seconds = utils.time_to_seconds(first_time_str.replace('_', ':').replace('-', ','))
|
||||
last_time_seconds = utils.time_to_seconds(last_time_str.replace('_', ':').replace('-', ','))
|
||||
batch_duration = last_time_seconds - first_time_seconds
|
||||
|
||||
overall_activity_summaries.append({
|
||||
"batch_index": result['batch_index'],
|
||||
"time_range": f"{first_timestamp}-{last_timestamp}",
|
||||
"duration_seconds": batch_duration,
|
||||
"summary": overall_summary
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"解析批次 {result['batch_index']} 的响应数据失败: {str(e)}")
|
||||
# 添加原始响应作为回退
|
||||
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
|
||||
frame_analysis += response_text
|
||||
frame_analysis += "\n"
|
||||
|
||||
# 更新上一个批次的文件
|
||||
prev_batch_files = batch_files
|
||||
|
||||
# 将合并后的结果转为JSON字符串
|
||||
merged_results = {
|
||||
"frame_observations": merged_frame_observations,
|
||||
"overall_activity_summaries": overall_activity_summaries
|
||||
}
|
||||
|
||||
# 使用当前时间创建文件名
|
||||
now = datetime.now()
|
||||
timestamp_str = now.strftime("%Y%m%d_%H%M")
|
||||
|
||||
# 保存完整的分析结果为JSON
|
||||
analysis_filename = f"frame_analysis_{timestamp_str}.json"
|
||||
analysis_json_path = os.path.join(analysis_dir, analysis_filename)
|
||||
with open(analysis_json_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(merged_results, f, ensure_ascii=False, indent=2)
|
||||
logger.info(f"分析结果已保存到: {analysis_json_path}")
|
||||
|
||||
if not frame_analysis.strip():
|
||||
raise Exception("未能生成有效的帧分析结果")
|
||||
|
||||
# 保存分析结果
|
||||
analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
|
||||
with open(analysis_path, 'w', encoding='utf-8') as f:
|
||||
f.write(frame_analysis)
|
||||
|
||||
update_progress(70, "正在生成脚本...")
|
||||
|
||||
"""
|
||||
4. 生成文案
|
||||
"""
|
||||
logger.info("开始准备生成解说文案")
|
||||
update_progress(80, "正在生成文案...")
|
||||
from app.services.generate_narration_script import parse_frame_analysis_to_markdown, generate_narration
|
||||
# 从配置中获取文本生成相关配置
|
||||
text_provider = config.app.get('text_llm_provider', 'gemini').lower()
|
||||
text_api_key = config.app.get(f'text_{text_provider}_api_key')
|
||||
text_model = config.app.get(f'text_{text_provider}_model_name')
|
||||
text_base_url = config.app.get(f'text_{text_provider}_base_url')
|
||||
|
||||
# 构建帧内容列表
|
||||
frame_content_list = []
|
||||
prev_batch_files = None
|
||||
# 整理帧分析数据
|
||||
markdown_output = parse_frame_analysis_to_markdown(analysis_json_path)
|
||||
|
||||
for i, result in enumerate(results):
|
||||
if 'error' in result:
|
||||
continue
|
||||
|
||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
|
||||
|
||||
frame_content = {
|
||||
"timestamp": timestamp_range,
|
||||
"picture": result['response'],
|
||||
"narration": "",
|
||||
"OST": 2
|
||||
}
|
||||
frame_content_list.append(frame_content)
|
||||
|
||||
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
|
||||
|
||||
# 更新上一个批次的文件
|
||||
prev_batch_files = batch_files
|
||||
|
||||
if not frame_content_list:
|
||||
raise Exception("没有有效的帧内容可以处理")
|
||||
|
||||
# ===================开始生成文案===================
|
||||
update_progress(80, "正在生成文案...")
|
||||
# 校验配置
|
||||
api_params = {
|
||||
"vision_api_key": vision_api_key,
|
||||
"vision_model_name": vision_model,
|
||||
"vision_base_url": vision_base_url or "",
|
||||
"text_api_key": text_api_key,
|
||||
"text_model_name": text_model,
|
||||
"text_base_url": text_base_url or ""
|
||||
}
|
||||
chekc_video_config(api_params)
|
||||
custom_prompt = st.session_state.get('custom_prompt', '')
|
||||
processor = ScriptProcessor(
|
||||
model_name=text_model,
|
||||
api_key=text_api_key,
|
||||
prompt=custom_prompt,
|
||||
base_url=text_base_url or "",
|
||||
video_theme=st.session_state.get('video_theme', '')
|
||||
# 生成文案
|
||||
# 生成解说文案
|
||||
narration = generate_narration(
|
||||
markdown_output,
|
||||
text_api_key,
|
||||
base_url=text_base_url,
|
||||
model=text_model
|
||||
)
|
||||
|
||||
# 处理帧内容生成脚本
|
||||
script_result = processor.process_frames(frame_content_list)
|
||||
|
||||
narration_dict = json.loads(narration)['items']
|
||||
# 为 narration_dict 中每个 item 新增一个 OST: 2 的字段, 代表保留原声和配音
|
||||
narration_dict = [{**item, "OST": 2} for item in narration_dict]
|
||||
logger.debug(f"解说文案创作完成:\n{"\n".join([item['narration'] for item in narration_dict])}")
|
||||
# 结果转换为JSON字符串
|
||||
script = json.dumps(script_result, ensure_ascii=False, indent=2)
|
||||
script = json.dumps(narration_dict, ensure_ascii=False, indent=2)
|
||||
|
||||
except Exception as e:
|
||||
logger.exception(f"大模型处理过程中发生错误\n{traceback.format_exc()}")
|
||||
@ -250,7 +376,7 @@ def generate_script_docu(tr, params):
|
||||
if script is None:
|
||||
st.error("生成脚本失败,请检查日志")
|
||||
st.stop()
|
||||
logger.info(f"脚本生成完成")
|
||||
logger.success(f"剪辑脚本生成完成")
|
||||
if isinstance(script, list):
|
||||
st.session_state['video_clip_json'] = script
|
||||
elif isinstance(script, str):
|
||||
|
||||
@ -1,8 +0,0 @@
|
||||
from .performance import monitor_performance, PerformanceMonitor
|
||||
from .cache import *
|
||||
from .file_utils import *
|
||||
|
||||
__all__ = [
|
||||
'monitor_performance',
|
||||
'PerformanceMonitor'
|
||||
]
|
||||
@ -1,8 +1,8 @@
|
||||
"""
|
||||
合并视频和字幕文件
|
||||
"""
|
||||
from moviepy.editor import VideoFileClip, concatenate_videoclips
|
||||
import pysrt
|
||||
from moviepy import VideoFileClip, concatenate_videoclips
|
||||
# import pysrt
|
||||
import os
|
||||
|
||||
|
||||
|
||||
@ -1,37 +0,0 @@
|
||||
import psutil
|
||||
import os
|
||||
from loguru import logger
|
||||
import torch
|
||||
|
||||
class PerformanceMonitor:
|
||||
@staticmethod
|
||||
def monitor_memory():
|
||||
process = psutil.Process(os.getpid())
|
||||
memory_info = process.memory_info()
|
||||
|
||||
logger.debug(f"Memory usage: {memory_info.rss / 1024 / 1024:.2f} MB")
|
||||
|
||||
if torch.cuda.is_available():
|
||||
gpu_memory = torch.cuda.memory_allocated() / 1024 / 1024
|
||||
logger.debug(f"GPU Memory usage: {gpu_memory:.2f} MB")
|
||||
|
||||
@staticmethod
|
||||
def cleanup_resources():
|
||||
if torch.cuda.is_available():
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
import gc
|
||||
gc.collect()
|
||||
|
||||
PerformanceMonitor.monitor_memory()
|
||||
|
||||
def monitor_performance(func):
|
||||
"""性能监控装饰器"""
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
PerformanceMonitor.monitor_memory()
|
||||
result = func(*args, **kwargs)
|
||||
return result
|
||||
finally:
|
||||
PerformanceMonitor.cleanup_resources()
|
||||
return wrapper
|
||||
Loading…
x
Reference in New Issue
Block a user