mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-14 13:02:50 +00:00
-重构了 start_subclip 函数,优化了视频剪辑流程 - 新增 clip_video 函数,用于裁剪视频片段 - 改进了字幕生成和处理逻辑- 优化了音频合并和处理 - 更新了任务状态管理
155 lines
5.1 KiB
Python
155 lines
5.1 KiB
Python
#!/usr/bin/env python
|
||
# -*- coding: UTF-8 -*-
|
||
|
||
'''
|
||
@Project: NarratoAI
|
||
@File : subtitle_merger
|
||
@Author : viccy
|
||
@Date : 2025/5/6 下午4:00
|
||
'''
|
||
|
||
import re
|
||
import os
|
||
from datetime import datetime, timedelta
|
||
|
||
|
||
def parse_time(time_str):
|
||
"""解析时间字符串为timedelta对象"""
|
||
hours, minutes, seconds_ms = time_str.split(':')
|
||
seconds, milliseconds = seconds_ms.split(',')
|
||
|
||
td = timedelta(
|
||
hours=int(hours),
|
||
minutes=int(minutes),
|
||
seconds=int(seconds),
|
||
milliseconds=int(milliseconds)
|
||
)
|
||
return td
|
||
|
||
|
||
def format_time(td):
|
||
"""将timedelta对象格式化为SRT时间字符串"""
|
||
total_seconds = int(td.total_seconds())
|
||
hours = total_seconds // 3600
|
||
minutes = (total_seconds % 3600) // 60
|
||
seconds = total_seconds % 60
|
||
milliseconds = td.microseconds // 1000
|
||
|
||
return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}"
|
||
|
||
|
||
def extract_time_range_from_filename(filename):
|
||
"""从文件名中提取时间范围"""
|
||
pattern = r'subtitle_(\d{2})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})'
|
||
match = re.search(pattern, filename)
|
||
|
||
if not match:
|
||
return None, None
|
||
|
||
start_h, start_m, start_s, end_h, end_m, end_s = map(int, match.groups())
|
||
|
||
start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s)
|
||
end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s)
|
||
|
||
return start_time, end_time
|
||
|
||
|
||
def merge_subtitle_files(subtitle_files, output_file=None):
|
||
"""
|
||
合并多个SRT字幕文件
|
||
|
||
参数:
|
||
subtitle_files: 包含SRT文件路径的列表
|
||
output_file: 输出文件的路径,如果为None则自动生成
|
||
|
||
返回:
|
||
合并后的字幕文件路径
|
||
"""
|
||
# 按文件名中的开始时间排序
|
||
sorted_files = sorted(subtitle_files,
|
||
key=lambda x: extract_time_range_from_filename(x)[0])
|
||
|
||
merged_subtitles = []
|
||
subtitle_index = 1
|
||
|
||
for file_path in sorted_files:
|
||
# 从文件名获取起始时间偏移
|
||
offset_time, _ = extract_time_range_from_filename(file_path)
|
||
|
||
if offset_time is None:
|
||
print(f"警告: 无法从文件名 {os.path.basename(file_path)} 中提取时间范围,跳过该文件")
|
||
continue
|
||
|
||
with open(file_path, 'r', encoding='utf-8') as file:
|
||
content = file.read()
|
||
|
||
# 解析字幕文件
|
||
subtitle_blocks = re.split(r'\n\s*\n', content.strip())
|
||
|
||
for block in subtitle_blocks:
|
||
lines = block.strip().split('\n')
|
||
if len(lines) < 3: # 确保块有足够的行数
|
||
continue
|
||
|
||
# 解析时间轴行
|
||
time_line = lines[1]
|
||
time_parts = time_line.split(' --> ')
|
||
if len(time_parts) != 2:
|
||
continue
|
||
|
||
start_time = parse_time(time_parts[0])
|
||
end_time = parse_time(time_parts[1])
|
||
|
||
# 应用时间偏移
|
||
adjusted_start_time = start_time + offset_time
|
||
adjusted_end_time = end_time + offset_time
|
||
|
||
# 重建字幕块
|
||
adjusted_time_line = f"{format_time(adjusted_start_time)} --> {format_time(adjusted_end_time)}"
|
||
text_lines = lines[2:]
|
||
|
||
new_block = [
|
||
str(subtitle_index),
|
||
adjusted_time_line,
|
||
*text_lines
|
||
]
|
||
|
||
merged_subtitles.append('\n'.join(new_block))
|
||
subtitle_index += 1
|
||
|
||
# 合并所有字幕块
|
||
merged_content = '\n\n'.join(merged_subtitles)
|
||
|
||
# 确定输出文件路径
|
||
if output_file is None:
|
||
# 自动生成输出文件名
|
||
first_file_path = sorted_files[0]
|
||
last_file_path = sorted_files[-1]
|
||
_, first_end = extract_time_range_from_filename(first_file_path)
|
||
_, last_end = extract_time_range_from_filename(last_file_path)
|
||
|
||
dir_path = os.path.dirname(first_file_path)
|
||
first_start_str = os.path.basename(first_file_path).split('-')[0].replace('subtitle_', '')
|
||
last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60)
|
||
last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}"
|
||
|
||
output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt")
|
||
|
||
# 写入合并后的内容
|
||
with open(output_file, 'w', encoding='utf-8') as file:
|
||
file.write(merged_content)
|
||
|
||
return output_file
|
||
|
||
|
||
if __name__ == '__main__':
|
||
subtitle_files = [
|
||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt",
|
||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt",
|
||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt",
|
||
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt",
|
||
]
|
||
|
||
output_file = merge_subtitle_files(subtitle_files)
|
||
print(f"字幕文件已合并至: {output_file}")
|