diff --git a/app/services/subtitle_merger.py b/app/services/subtitle_merger.py index 1388b76..9097586 100644 --- a/app/services/subtitle_merger.py +++ b/app/services/subtitle_merger.py @@ -38,15 +38,20 @@ def format_time(td): return f"{hours:02d}:{minutes:02d}:{seconds:02d},{milliseconds:03d}" -def extract_time_range_from_filename(filename): - """从文件名中提取时间范围""" - pattern = r'subtitle_(\d{2})_(\d{2})_(\d{2})-(\d{2})_(\d{2})_(\d{2})' - match = re.search(pattern, filename) - - if not match: +def parse_edited_time_range(time_range_str): + """从editedTimeRange字符串中提取时间范围""" + if not time_range_str: return None, None - start_h, start_m, start_s, end_h, end_m, end_s = map(int, match.groups()) + parts = time_range_str.split('-') + if len(parts) != 2: + return None, None + + start_time_str, end_time_str = parts + + # 将HH:MM:SS格式转换为timedelta + start_h, start_m, start_s = map(int, start_time_str.split(':')) + end_h, end_m, end_s = map(int, end_time_str.split(':')) start_time = timedelta(hours=start_h, minutes=start_m, seconds=start_s) end_time = timedelta(hours=end_h, minutes=end_m, seconds=end_s) @@ -54,33 +59,36 @@ def extract_time_range_from_filename(filename): return start_time, end_time -def merge_subtitle_files(subtitle_files, output_file=None): +def merge_subtitle_files(subtitle_items, output_file=None): """ 合并多个SRT字幕文件 参数: - subtitle_files: 包含SRT文件路径的列表 + subtitle_items: 字典列表,每个字典包含subtitle文件路径和editedTimeRange output_file: 输出文件的路径,如果为None则自动生成 返回: 合并后的字幕文件路径 """ - # 按文件名中的开始时间排序 - sorted_files = sorted(subtitle_files, - key=lambda x: extract_time_range_from_filename(x)[0]) + # 按照editedTimeRange的开始时间排序 + sorted_items = sorted(subtitle_items, + key=lambda x: parse_edited_time_range(x.get('editedTimeRange', ''))[0] or timedelta()) merged_subtitles = [] subtitle_index = 1 - for file_path in sorted_files: - # 从文件名获取起始时间偏移 - offset_time, _ = extract_time_range_from_filename(file_path) + for item in sorted_items: + if not item.get('subtitle') or not os.path.exists(item.get('subtitle')): + continue + + # 从editedTimeRange获取起始时间偏移 + offset_time, _ = parse_edited_time_range(item.get('editedTimeRange', '')) if offset_time is None: - print(f"警告: 无法从文件名 {os.path.basename(file_path)} 中提取时间范围,跳过该文件") + print(f"警告: 无法从项目 {item.get('_id')} 的editedTimeRange中提取时间范围,跳过该项") continue - with open(file_path, 'r', encoding='utf-8') as file: + with open(item['subtitle'], 'r', encoding='utf-8') as file: content = file.read() # 解析字幕文件 @@ -117,24 +125,23 @@ def merge_subtitle_files(subtitle_files, output_file=None): merged_subtitles.append('\n'.join(new_block)) subtitle_index += 1 - # 合并所有字幕块 - merged_content = '\n\n'.join(merged_subtitles) - # 确定输出文件路径 if output_file is None: - # 自动生成输出文件名 - first_file_path = sorted_files[0] - last_file_path = sorted_files[-1] - _, first_end = extract_time_range_from_filename(first_file_path) - _, last_end = extract_time_range_from_filename(last_file_path) + dir_path = os.path.dirname(sorted_items[0]['subtitle']) + first_start = parse_edited_time_range(sorted_items[0]['editedTimeRange'])[0] + last_end = parse_edited_time_range(sorted_items[-1]['editedTimeRange'])[1] - dir_path = os.path.dirname(first_file_path) - first_start_str = os.path.basename(first_file_path).split('-')[0].replace('subtitle_', '') + first_start_h, first_start_m, first_start_s = int(first_start.seconds // 3600), int((first_start.seconds % 3600) // 60), int(first_start.seconds % 60) last_end_h, last_end_m, last_end_s = int(last_end.seconds // 3600), int((last_end.seconds % 3600) // 60), int(last_end.seconds % 60) + + first_start_str = f"{first_start_h:02d}_{first_start_m:02d}_{first_start_s:02d}" last_end_str = f"{last_end_h:02d}_{last_end_m:02d}_{last_end_s:02d}" output_file = os.path.join(dir_path, f"merged_subtitle_{first_start_str}-{last_end_str}.srt") + # 合并所有字幕块 + merged_content = '\n\n'.join(merged_subtitles) + # 写入合并后的内容 with open(output_file, 'w', encoding='utf-8') as file: file.write(merged_content) @@ -143,12 +150,53 @@ def merge_subtitle_files(subtitle_files, output_file=None): if __name__ == '__main__': - subtitle_files = [ - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt", - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt", - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt", - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt", + # 测试数据 + test_data = [ + {'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!', + 'timestamp': '00:00:00-00:01:15', + 'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!', + 'OST': 0, + '_id': 1, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3', + 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_00_00-00_01_15.srt', + 'sourceTimeRange': '00:00:00-00:00:26', + 'duration': 26, + 'editedTimeRange': '00:00:00-00:00:26' + }, + {'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', + 'timestamp': '00:01:15-00:04:40', + 'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…', + 'OST': 0, + '_id': 2, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3', + 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_01_15-00_04_40.srt', + 'sourceTimeRange': '00:01:15-00:01:29', + 'duration': 14, + 'editedTimeRange': '00:00:26-00:00:40' + }, + {'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。', + 'timestamp': '00:04:58-00:05:45', + 'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!', + 'OST': 0, + '_id': 4, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3', + 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_04_58-00_05_45.srt', + 'sourceTimeRange': '00:04:58-00:05:20', + 'duration': 22, + 'editedTimeRange': '00:00:57-00:01:19' + }, + {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', + 'timestamp': '00:05:45-00:06:00', + 'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', + 'OST': 0, + '_id': 5, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3', + 'subtitle': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt', + 'sourceTimeRange': '00:05:45-00:05:53', + 'duration': 8, + 'editedTimeRange': '00:01:19-00:01:27' + } ] - output_file = merge_subtitle_files(subtitle_files) + output_file = merge_subtitle_files(test_data) print(f"字幕文件已合并至: {output_file}") diff --git a/app/services/task.py b/app/services/task.py index 640d821..93ccf86 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -250,7 +250,7 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di subclip_clip_result = { tts_result['_id']: tts_result['subtitle_file'] for tts_result in tts_results } - list_script = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result) + new_script_list = update_script.update_script_timestamps(list_script, video_clip_result, tts_clip_result, subclip_clip_result) sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60) @@ -258,24 +258,19 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di 4. 合并音频和字幕 """ logger.info("\n\n## 4. 合并音频和字幕") - subtitle_files = [ - tts_result["subtitle_file"] for tts_result in tts_results - ] - total_duration = sum([script["duration"] for script in list_script]) + total_duration = sum([script["duration"] for script in new_script_list]) if tts_results: try: # 合并音频文件 merged_audio_path = audio_merger.merge_audio_files( task_id=task_id, total_duration=total_duration, - list_script=list_script + list_script=new_script_list ) logger.info(f"音频文件合并成功->{merged_audio_path}") - # # 合并字幕文件 - # merged_subtitle_path = subtitle_merger.merge_subtitle_files( - # subtitle_files=subtitle_files, - # ) - # logger.info(f"字幕文件合并成功->{merged_subtitle_path}") + # 合并字幕文件 + merged_subtitle_path = subtitle_merger.merge_subtitle_files(new_script_list) + logger.info(f"字幕文件合并成功->{merged_subtitle_path}") except Exception as e: logger.error(f"合并音频文件失败: {str(e)}") merged_audio_path = "" @@ -292,10 +287,10 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di combined_video_path = path.join(utils.task_dir(task_id), f"merger.mp4") logger.info(f"\n\n## 5. 合并视频: => {combined_video_path}") - + videos_clips = [new_script['video_path'] for new_script in new_script_list] merger_video.combine_clip_videos( output_video_path=combined_video_path, - video_paths=subclip_videos, + video_paths=videos_clips, video_ost_list=video_ost, video_aspect=params.video_aspect, threads=params.n_threads # 多线程