diff --git a/app/services/audio_merger.py b/app/services/audio_merger.py index 73cab3b..510c6b7 100644 --- a/app/services/audio_merger.py +++ b/app/services/audio_merger.py @@ -18,15 +18,14 @@ def check_ffmpeg(): return False -def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list): +def merge_audio_files(task_id: str, total_duration: float, list_script: list): """ - 合并音频文件,根据OST设置处理不同的音频轨道 + 合并音频文件 Args: task_id: 任务ID - audio_files: TTS生成的音频文件列表 total_duration: 总时长 - list_script: 完整脚本信息,包含OST设置 + list_script: 完整脚本信息,包含duration时长和audio路径 Returns: str: 合并后的音频文件路径 @@ -39,32 +38,34 @@ def merge_audio_files(task_id: str, audio_files: list, total_duration: float, li # 创建一个空的音频片段 final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位 + # 计算每个片段的开始位置(基于duration字段) + current_position = 0 # 初始位置(秒) + # 遍历脚本中的每个片段 - for segment, audio_file in zip(list_script, audio_files): + for segment in list_script: try: - # 加载TTS音频文件 - tts_audio = AudioSegment.from_file(audio_file) - - # 获取片段的开始和结束时间 - start_time, end_time = segment['timestamp'].split('-') - start_seconds = utils.time_to_seconds(start_time) - end_seconds = utils.time_to_seconds(end_time) - - # 根据OST设置处理音频 - if segment['OST'] == 0: - # 只使用TTS音频 - final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000) - elif segment['OST'] == 1: - # 只使用原声(假设原声已经在视频中) - continue - elif segment['OST'] == 2: - # 混合TTS音频和原声 - original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000) - mixed_audio = original_audio.overlay(tts_audio) - final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000) + # 获取片段时长(秒) + duration = segment['duration'] + + # 检查audio字段是否为空 + if segment['audio'] and os.path.exists(segment['audio']): + # 加载TTS音频文件 + tts_audio = AudioSegment.from_file(segment['audio']) + + # 将TTS音频添加到最终音频 + final_audio = final_audio.overlay(tts_audio, position=current_position * 1000) + else: + # audio为空,不添加音频,仅保留间隔 + logger.info(f"片段 {segment.get('timestamp', '')} 没有音频文件,保留 {duration} 秒的间隔") + + # 更新下一个片段的开始位置 + current_position += duration except Exception as e: - logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}") + logger.error(f"处理音频片段时出错: {str(e)}") + # 即使处理失败,也要更新位置,确保后续片段位置正确 + if 'duration' in segment: + current_position += segment['duration'] continue # 保存合并后的音频文件 @@ -93,7 +94,7 @@ def time_to_seconds(time_str): # 分割时间部分 parts = time_part.split(':') - + if len(parts) == 3: # HH:MM:SS h, m, s = map(int, parts) seconds = h * 3600 + m * 60 + s @@ -118,11 +119,11 @@ def extract_timestamp(filename): # 从文件名中提取时间部分 time_part = filename.split('_', 1)[1].split('.')[0] # 获取 "00_06,500-00_24,800" 部分 start_time, end_time = time_part.split('-') # 分割成开始和结束时间 - + # 将下划线格式转换回冒号格式 start_time = start_time.replace('_', ':') end_time = end_time.replace('_', ':') - + # 将时间戳转换为秒 start_seconds = time_to_seconds(start_time) end_seconds = time_to_seconds(end_time) @@ -135,17 +136,36 @@ def extract_timestamp(filename): if __name__ == "__main__": # 示例用法 - audio_files =[ - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:06-00:24.mp3", - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:32-00:38.mp3", - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:43-00:52.mp3", - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:52-01:09.mp3", - "/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01:13-01:15.mp3", - ] - total_duration = 38 - video_script_path = "/Users/apple/Desktop/home/NarratoAI/resource/scripts/test003.json" - with open(video_script_path, "r", encoding="utf-8") as f: - video_script = json.load(f) + total_duration = 90 - output_file = merge_audio_files("test456", audio_files, total_duration, video_script) + video_script = [ + {'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!', + 'timestamp': '00:00:00-00:00:26', + 'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!', + 'OST': 0, 'duration': 26, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3'}, + {'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:01:29', + 'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…', + 'OST': 0, 'duration': 14, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3'}, + {'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58', + 'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪', + 'OST': 1, 'duration': 17, + 'audio': ''}, + {'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。', + 'timestamp': '00:04:58-00:05:20', + 'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!', + 'OST': 0, 'duration': 22, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3'}, + {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', + 'timestamp': '00:05:45-00:05:53', + 'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', + 'OST': 0, 'duration': 8, + 'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'}, + {'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03', + 'narration': '抓刺客', + 'OST': 1, 'duration': 3, + 'audio': ''}] + + output_file = merge_audio_files("test456", total_duration, video_script) print(output_file) diff --git a/app/services/clip_video.py b/app/services/clip_video.py index d5c591a..73b7456 100644 --- a/app/services/clip_video.py +++ b/app/services/clip_video.py @@ -12,14 +12,10 @@ import os import subprocess import json import hashlib -import logging +from loguru import logger from typing import Dict, List, Optional from pathlib import Path -# 配置日志 -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - def parse_timestamp(timestamp: str) -> tuple: """ @@ -174,7 +170,7 @@ def clip_video( # 执行FFmpeg命令 try: logger.info(f"裁剪视频片段: {timestamp} -> {start_time}到{calculated_end_time}") - logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}") + # logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}") process = subprocess.run( ffmpeg_cmd, @@ -185,7 +181,6 @@ def clip_video( ) result[timestamp] = output_path - logger.info(f"成功裁剪视频片段: {timestamp} -> {output_path}") except subprocess.CalledProcessError as e: logger.error(f"裁剪视频片段失败: {timestamp}") @@ -217,10 +212,18 @@ if __name__ == "__main__": 'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3', 'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt', 'duration': 7.675, 'text': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!'}] + subclip_path_videos = { + '00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-00-00-00-01-15.mp4', + '00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-01-15-00-04-40.mp4', + '00:04:41-00:04:58': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4', + '00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-58-00-05-45.mp4', + '00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-05-45-00-06-00.mp4', + '00:06:00-00:06:03': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4', + } # 使用方法示例 try: - result = clip_video(video_origin_path, tts_result) + result = clip_video(video_origin_path, tts_result, subclip_path_videos) print("裁剪结果:") print(json.dumps(result, indent=4, ensure_ascii=False)) except Exception as e: diff --git a/app/services/task.py b/app/services/task.py index 66a7761..ae2ea29 100644 --- a/app/services/task.py +++ b/app/services/task.py @@ -10,7 +10,7 @@ from app.config import config from app.models import const from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams from app.services import (llm, material, subtitle, video, voice, audio_merger, - subtitle_merger, clip_video, merger_video) + subtitle_merger, clip_video, merger_video, update_script) from app.services import state as sm from app.utils import utils @@ -193,11 +193,6 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di logger.debug(f"解说完整脚本: \n{video_script}") logger.debug(f"解说 OST 列表: \n{video_ost}") logger.debug(f"解说时间戳列表: \n{time_list}") - - # 获取视频总时长(单位 s) - last_timestamp = list_script[-1]['timestamp'].split("-")[1] - total_duration = utils.time_to_seconds(last_timestamp) - except Exception as e: logger.error(f"无法读取视频json脚本,请检查脚本格式是否正确") raise ValueError("无法读取视频json脚本,请检查脚本格式是否正确") @@ -224,19 +219,54 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di voice_pitch=params.voice_pitch, force_regenerate=True ) - audio_files = [ - tts_result["audio_file"] for tts_result in tts_results - ] + + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20) + + # """ + # 3. (可选) 使用 whisper 生成字幕 + # """ + # if merged_subtitle_path is None: + # if audio_files: + # merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") + # subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() + # logger.info(f"\n\n使用 {subtitle_provider} 生成字幕") + # + # subtitle.create( + # audio_file=merged_audio_path, + # subtitle_file=merged_subtitle_path, + # ) + # subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path) + # if not subtitle_lines: + # logger.warning(f"字幕文件无效: {merged_subtitle_path}") + # + # sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40) + + """ + 3. 裁剪视频 - 将超出音频长度的视频进行裁剪 + """ + logger.info("\n\n## 3. 裁剪视频") + clip_result = clip_video.clip_video(params.video_origin_path, tts_results) + subclip_path_videos.update(clip_result) + # 更新 list_script 中的时间戳 + list_script = update_script.update_script_timestamps(list_script, clip_result) + subclip_videos = [x for x in subclip_path_videos.values()] + + sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60) + + """ + 4. 合并音频和字幕 + """ + logger.info("\n\n## 4. 合并音频和字幕") + subtitle_files = [ tts_result["subtitle_file"] for tts_result in tts_results ] + total_duration = sum([script["duration"] for script in list_script]) if tts_results: - logger.info(f"合并音频/字幕文件") try: # 合并音频文件 merged_audio_path = audio_merger.merge_audio_files( task_id=task_id, - audio_files=audio_files, total_duration=total_duration, list_script=list_script # 传入完整脚本以便处理OST ) @@ -253,39 +283,9 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di else: logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.") return - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20) """ - 3. (可选) 使用 whisper 生成字幕 - """ - if merged_subtitle_path is None: - if audio_files: - merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt") - subtitle_provider = config.app.get("subtitle_provider", "").strip().lower() - logger.info(f"\n\n使用 {subtitle_provider} 生成字幕") - - subtitle.create( - audio_file=merged_audio_path, - subtitle_file=merged_subtitle_path, - ) - subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path) - if not subtitle_lines: - logger.warning(f"字幕文件无效: {merged_subtitle_path}") - - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40) - - """ - 4. 裁剪视频 - 将超出音频长度的视频进行裁剪 - """ - logger.info("\n\n## 4. 裁剪视频") - result = clip_video.clip_video(params.video_origin_path, tts_results) - subclip_path_videos.update(result) - subclip_videos = [x for x in subclip_path_videos.values()] - - sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60) - - """ - 5. 合并视频 + 6. 合并视频 """ final_video_paths = [] combined_video_paths = [] diff --git a/app/services/update_script.py b/app/services/update_script.py new file mode 100644 index 0000000..5dd10a9 --- /dev/null +++ b/app/services/update_script.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# -*- coding: UTF-8 -*- + +''' +@Project: NarratoAI +@File : update_script +@Author : 小林同学 +@Date : 2025/5/6 下午11:00 +''' + +import re +import os +from typing import Dict, List, Any, Tuple + + +def extract_timestamp_from_video_path(video_path: str) -> str: + """ + 从视频文件路径中提取时间戳 + + Args: + video_path: 视频文件路径 + + Returns: + 提取出的时间戳,格式为 'HH:MM:SS-HH:MM:SS' + """ + # 使用正则表达式从文件名中提取时间戳 + filename = os.path.basename(video_path) + match = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename) + + if match: + # 提取并格式化时间戳 + start_time = match.group(1).replace('-', ':') + end_time = match.group(2).replace('-', ':') + return f"{start_time}-{end_time}" + + return "" + + +def calculate_duration(timestamp: str) -> float: + """ + 计算时间戳范围的持续时间(秒) + + Args: + timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 的时间戳 + + Returns: + 持续时间(秒) + """ + try: + start_time, end_time = timestamp.split('-') + + # 解析时间 + start_h, start_m, start_s = map(int, start_time.split(':')) + end_h, end_m, end_s = map(int, end_time.split(':')) + + # 转换为秒 + start_seconds = start_h * 3600 + start_m * 60 + start_s + end_seconds = end_h * 3600 + end_m * 60 + end_s + + # 计算时间差(秒) + return round(end_seconds - start_seconds, 2) + except (ValueError, AttributeError): + return 0.0 + + +def update_script_timestamps(script_list: List[Dict[str, Any]], tts_result: Dict[str, str]) -> List[Dict[str, Any]]: + """ + 根据 tts_res 中的视频文件更新 list_script 中的时间戳,并添加持续时间 + + Args: + script_list: 原始脚本列表 + tts_result: TTS 结果字典,键为原时间戳,值为视频文件路径 + + Returns: + 更新后的脚本列表 + """ + # 创建副本,避免修改原始数据 + updated_script = [] + + # 建立原始时间戳到新时间戳的映射 + timestamp_mapping = {} + for orig_timestamp, video_path in tts_result.items(): + new_timestamp = extract_timestamp_from_video_path(video_path) + if new_timestamp: + timestamp_mapping[orig_timestamp] = new_timestamp + + # 更新脚本中的时间戳 + for item in script_list: + item_copy = item.copy() + if item_copy.get('timestamp') in timestamp_mapping: + # 更新时间戳 + new_timestamp = timestamp_mapping[item_copy['timestamp']] + item_copy['timestamp'] = new_timestamp + + # 计算并添加持续时间 + item_copy['duration'] = calculate_duration(new_timestamp) + elif 'timestamp' in item_copy: + # 对于未更新的时间戳,也计算并添加持续时间 + item_copy['duration'] = calculate_duration(item_copy['timestamp']) + + updated_script.append(item_copy) + + return updated_script + + +if __name__ == '__main__': + list_script = [ + {'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!', + 'timestamp': '00:00:00-00:01:15', + 'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸!范闲在北齐"死"了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!', + 'OST': 0}, + {'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:04:40', + 'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…', + 'OST': 0}, {'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58', + 'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪', + 'OST': 1}, + {'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。', + 'timestamp': '00:04:58-00:05:45', + 'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!', + 'OST': 0}, {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', + 'timestamp': '00:05:45-00:06:00', + 'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 'OST': 0}, + {'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03', + 'narration': '抓刺客', 'OST': 1}] + tts_res = { + '00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-00-00-00-00-26.mp4', + '00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-01-15-00-01-29.mp4', + '00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-04-58-00-05-20.mp4', + '00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-05-45-00-05-53.mp4'} + + # 更新并打印结果 + updated_list_script = update_script_timestamps(list_script, tts_res) + for item in updated_list_script: + print(f"Picture: {item['picture'][:20]}... | Timestamp: {item['timestamp']} | Duration: {item['duration']} 秒")