refactor(task): 重构任务处理流程并添加新功能

- 更新了音频合并逻辑,现在基于脚本中的 duration 字段
- 添加了视频裁剪步骤,根据音频长度调整视频
- 新增了 update_script 模块,用于更新脚本中的时间戳和持续时间
- 优化了任务处理流程,提高了代码可读性和可维护性
This commit is contained in:
linyq 2025-05-07 00:26:59 +08:00
parent 2ed627890f
commit 5c4ff41274
4 changed files with 248 additions and 91 deletions

View File

@ -18,15 +18,14 @@ def check_ffmpeg():
return False return False
def merge_audio_files(task_id: str, audio_files: list, total_duration: float, list_script: list): def merge_audio_files(task_id: str, total_duration: float, list_script: list):
""" """
合并音频文件根据OST设置处理不同的音频轨道 合并音频文件
Args: Args:
task_id: 任务ID task_id: 任务ID
audio_files: TTS生成的音频文件列表
total_duration: 总时长 total_duration: 总时长
list_script: 完整脚本信息包含OST设置 list_script: 完整脚本信息包含duration时长和audio路径
Returns: Returns:
str: 合并后的音频文件路径 str: 合并后的音频文件路径
@ -39,32 +38,34 @@ def merge_audio_files(task_id: str, audio_files: list, total_duration: float, li
# 创建一个空的音频片段 # 创建一个空的音频片段
final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位 final_audio = AudioSegment.silent(duration=total_duration * 1000) # 总时长以毫秒为单位
# 计算每个片段的开始位置基于duration字段
current_position = 0 # 初始位置(秒)
# 遍历脚本中的每个片段 # 遍历脚本中的每个片段
for segment, audio_file in zip(list_script, audio_files): for segment in list_script:
try: try:
# 获取片段时长(秒)
duration = segment['duration']
# 检查audio字段是否为空
if segment['audio'] and os.path.exists(segment['audio']):
# 加载TTS音频文件 # 加载TTS音频文件
tts_audio = AudioSegment.from_file(audio_file) tts_audio = AudioSegment.from_file(segment['audio'])
# 获取片段的开始和结束时间 # 将TTS音频添加到最终音频
start_time, end_time = segment['timestamp'].split('-') final_audio = final_audio.overlay(tts_audio, position=current_position * 1000)
start_seconds = utils.time_to_seconds(start_time) else:
end_seconds = utils.time_to_seconds(end_time) # audio为空不添加音频仅保留间隔
logger.info(f"片段 {segment.get('timestamp', '')} 没有音频文件,保留 {duration} 秒的间隔")
# 根据OST设置处理音频 # 更新下一个片段的开始位置
if segment['OST'] == 0: current_position += duration
# 只使用TTS音频
final_audio = final_audio.overlay(tts_audio, position=start_seconds * 1000)
elif segment['OST'] == 1:
# 只使用原声(假设原声已经在视频中)
continue
elif segment['OST'] == 2:
# 混合TTS音频和原声
original_audio = AudioSegment.silent(duration=(end_seconds - start_seconds) * 1000)
mixed_audio = original_audio.overlay(tts_audio)
final_audio = final_audio.overlay(mixed_audio, position=start_seconds * 1000)
except Exception as e: except Exception as e:
logger.error(f"处理音频文件 {audio_file} 时出错: {str(e)}") logger.error(f"处理音频片段时出错: {str(e)}")
# 即使处理失败,也要更新位置,确保后续片段位置正确
if 'duration' in segment:
current_position += segment['duration']
continue continue
# 保存合并后的音频文件 # 保存合并后的音频文件
@ -135,17 +136,36 @@ def extract_timestamp(filename):
if __name__ == "__main__": if __name__ == "__main__":
# 示例用法 # 示例用法
audio_files =[ total_duration = 90
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:06-00:24.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:32-00:38.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:43-00:52.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_00:52-01:09.mp3",
"/Users/apple/Desktop/home/NarratoAI/storage/tasks/test456/audio_01:13-01:15.mp3",
]
total_duration = 38
video_script_path = "/Users/apple/Desktop/home/NarratoAI/resource/scripts/test003.json"
with open(video_script_path, "r", encoding="utf-8") as f:
video_script = json.load(f)
output_file = merge_audio_files("test456", audio_files, total_duration, video_script) video_script = [
{'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!',
'timestamp': '00:00:00-00:00:26',
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
'OST': 0, 'duration': 26,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_00_00-00_01_15.mp3'},
{'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:01:29',
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
'OST': 0, 'duration': 14,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_01_15-00_04_40.mp3'},
{'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58',
'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
'OST': 1, 'duration': 17,
'audio': ''},
{'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
'timestamp': '00:04:58-00:05:20',
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
'OST': 0, 'duration': 22,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_04_58-00_05_45.mp3'},
{'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'timestamp': '00:05:45-00:05:53',
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'OST': 0, 'duration': 8,
'audio': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3'},
{'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03',
'narration': '抓刺客',
'OST': 1, 'duration': 3,
'audio': ''}]
output_file = merge_audio_files("test456", total_duration, video_script)
print(output_file) print(output_file)

View File

@ -12,14 +12,10 @@ import os
import subprocess import subprocess
import json import json
import hashlib import hashlib
import logging from loguru import logger
from typing import Dict, List, Optional from typing import Dict, List, Optional
from pathlib import Path from pathlib import Path
# 配置日志
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def parse_timestamp(timestamp: str) -> tuple: def parse_timestamp(timestamp: str) -> tuple:
""" """
@ -174,7 +170,7 @@ def clip_video(
# 执行FFmpeg命令 # 执行FFmpeg命令
try: try:
logger.info(f"裁剪视频片段: {timestamp} -> {start_time}{calculated_end_time}") logger.info(f"裁剪视频片段: {timestamp} -> {start_time}{calculated_end_time}")
logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}") # logger.debug(f"执行命令: {' '.join(ffmpeg_cmd)}")
process = subprocess.run( process = subprocess.run(
ffmpeg_cmd, ffmpeg_cmd,
@ -185,7 +181,6 @@ def clip_video(
) )
result[timestamp] = output_path result[timestamp] = output_path
logger.info(f"成功裁剪视频片段: {timestamp} -> {output_path}")
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
logger.error(f"裁剪视频片段失败: {timestamp}") logger.error(f"裁剪视频片段失败: {timestamp}")
@ -217,10 +212,18 @@ if __name__ == "__main__":
'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3', 'audio_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/audio_00_05_45-00_06_00.mp3',
'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt', 'subtitle_file': '/Users/apple/Desktop/home/NarratoAI/storage/tasks/qyn2-2-demo/subtitle_00_05_45-00_06_00.srt',
'duration': 7.675, 'text': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!'}] 'duration': 7.675, 'text': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!'}]
subclip_path_videos = {
'00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-00-00-00-01-15.mp4',
'00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-01-15-00-04-40.mp4',
'00:04:41-00:04:58': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-41-00-04-58.mp4',
'00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-04-58-00-05-45.mp4',
'00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-05-45-00-06-00.mp4',
'00:06:00-00:06:03': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/6e7e343c7592c7d6f9a9636b55000f23/vid-00-06-00-00-06-03.mp4',
}
# 使用方法示例 # 使用方法示例
try: try:
result = clip_video(video_origin_path, tts_result) result = clip_video(video_origin_path, tts_result, subclip_path_videos)
print("裁剪结果:") print("裁剪结果:")
print(json.dumps(result, indent=4, ensure_ascii=False)) print(json.dumps(result, indent=4, ensure_ascii=False))
except Exception as e: except Exception as e:

View File

@ -10,7 +10,7 @@ from app.config import config
from app.models import const from app.models import const
from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams from app.models.schema import VideoConcatMode, VideoParams, VideoClipParams
from app.services import (llm, material, subtitle, video, voice, audio_merger, from app.services import (llm, material, subtitle, video, voice, audio_merger,
subtitle_merger, clip_video, merger_video) subtitle_merger, clip_video, merger_video, update_script)
from app.services import state as sm from app.services import state as sm
from app.utils import utils from app.utils import utils
@ -193,11 +193,6 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
logger.debug(f"解说完整脚本: \n{video_script}") logger.debug(f"解说完整脚本: \n{video_script}")
logger.debug(f"解说 OST 列表: \n{video_ost}") logger.debug(f"解说 OST 列表: \n{video_ost}")
logger.debug(f"解说时间戳列表: \n{time_list}") logger.debug(f"解说时间戳列表: \n{time_list}")
# 获取视频总时长(单位 s)
last_timestamp = list_script[-1]['timestamp'].split("-")[1]
total_duration = utils.time_to_seconds(last_timestamp)
except Exception as e: except Exception as e:
logger.error(f"无法读取视频json脚本请检查脚本格式是否正确") logger.error(f"无法读取视频json脚本请检查脚本格式是否正确")
raise ValueError("无法读取视频json脚本请检查脚本格式是否正确") raise ValueError("无法读取视频json脚本请检查脚本格式是否正确")
@ -224,19 +219,54 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
voice_pitch=params.voice_pitch, voice_pitch=params.voice_pitch,
force_regenerate=True force_regenerate=True
) )
audio_files = [
tts_result["audio_file"] for tts_result in tts_results sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
]
# """
# 3. (可选) 使用 whisper 生成字幕
# """
# if merged_subtitle_path is None:
# if audio_files:
# merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
# subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
# logger.info(f"\n\n使用 {subtitle_provider} 生成字幕")
#
# subtitle.create(
# audio_file=merged_audio_path,
# subtitle_file=merged_subtitle_path,
# )
# subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path)
# if not subtitle_lines:
# logger.warning(f"字幕文件无效: {merged_subtitle_path}")
#
# sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
"""
3. 裁剪视频 - 将超出音频长度的视频进行裁剪
"""
logger.info("\n\n## 3. 裁剪视频")
clip_result = clip_video.clip_video(params.video_origin_path, tts_results)
subclip_path_videos.update(clip_result)
# 更新 list_script 中的时间戳
list_script = update_script.update_script_timestamps(list_script, clip_result)
subclip_videos = [x for x in subclip_path_videos.values()]
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
"""
4. 合并音频和字幕
"""
logger.info("\n\n## 4. 合并音频和字幕")
subtitle_files = [ subtitle_files = [
tts_result["subtitle_file"] for tts_result in tts_results tts_result["subtitle_file"] for tts_result in tts_results
] ]
total_duration = sum([script["duration"] for script in list_script])
if tts_results: if tts_results:
logger.info(f"合并音频/字幕文件")
try: try:
# 合并音频文件 # 合并音频文件
merged_audio_path = audio_merger.merge_audio_files( merged_audio_path = audio_merger.merge_audio_files(
task_id=task_id, task_id=task_id,
audio_files=audio_files,
total_duration=total_duration, total_duration=total_duration,
list_script=list_script # 传入完整脚本以便处理OST list_script=list_script # 传入完整脚本以便处理OST
) )
@ -253,39 +283,9 @@ def start_subclip(task_id: str, params: VideoClipParams, subclip_path_videos: di
else: else:
logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.") logger.error("TTS转换音频失败, 可能是网络不可用! 如果您在中国, 请使用VPN.")
return return
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=20)
""" """
3. (可选) 使用 whisper 生成字幕 6. 合并视频
"""
if merged_subtitle_path is None:
if audio_files:
merged_subtitle_path = path.join(utils.task_dir(task_id), f"subtitle.srt")
subtitle_provider = config.app.get("subtitle_provider", "").strip().lower()
logger.info(f"\n\n使用 {subtitle_provider} 生成字幕")
subtitle.create(
audio_file=merged_audio_path,
subtitle_file=merged_subtitle_path,
)
subtitle_lines = subtitle.file_to_subtitles(merged_subtitle_path)
if not subtitle_lines:
logger.warning(f"字幕文件无效: {merged_subtitle_path}")
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=40)
"""
4. 裁剪视频 - 将超出音频长度的视频进行裁剪
"""
logger.info("\n\n## 4. 裁剪视频")
result = clip_video.clip_video(params.video_origin_path, tts_results)
subclip_path_videos.update(result)
subclip_videos = [x for x in subclip_path_videos.values()]
sm.state.update_task(task_id, state=const.TASK_STATE_PROCESSING, progress=60)
"""
5. 合并视频
""" """
final_video_paths = [] final_video_paths = []
combined_video_paths = [] combined_video_paths = []

View File

@ -0,0 +1,134 @@
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
'''
@Project: NarratoAI
@File : update_script
@Author : 小林同学
@Date : 2025/5/6 下午11:00
'''
import re
import os
from typing import Dict, List, Any, Tuple
def extract_timestamp_from_video_path(video_path: str) -> str:
"""
从视频文件路径中提取时间戳
Args:
video_path: 视频文件路径
Returns:
提取出的时间戳格式为 'HH:MM:SS-HH:MM:SS'
"""
# 使用正则表达式从文件名中提取时间戳
filename = os.path.basename(video_path)
match = re.search(r'vid-(\d{2}-\d{2}-\d{2})-(\d{2}-\d{2}-\d{2})\.mp4', filename)
if match:
# 提取并格式化时间戳
start_time = match.group(1).replace('-', ':')
end_time = match.group(2).replace('-', ':')
return f"{start_time}-{end_time}"
return ""
def calculate_duration(timestamp: str) -> float:
"""
计算时间戳范围的持续时间
Args:
timestamp: 格式为 'HH:MM:SS-HH:MM:SS' 的时间戳
Returns:
持续时间
"""
try:
start_time, end_time = timestamp.split('-')
# 解析时间
start_h, start_m, start_s = map(int, start_time.split(':'))
end_h, end_m, end_s = map(int, end_time.split(':'))
# 转换为秒
start_seconds = start_h * 3600 + start_m * 60 + start_s
end_seconds = end_h * 3600 + end_m * 60 + end_s
# 计算时间差(秒)
return round(end_seconds - start_seconds, 2)
except (ValueError, AttributeError):
return 0.0
def update_script_timestamps(script_list: List[Dict[str, Any]], tts_result: Dict[str, str]) -> List[Dict[str, Any]]:
"""
根据 tts_res 中的视频文件更新 list_script 中的时间戳并添加持续时间
Args:
script_list: 原始脚本列表
tts_result: TTS 结果字典键为原时间戳值为视频文件路径
Returns:
更新后的脚本列表
"""
# 创建副本,避免修改原始数据
updated_script = []
# 建立原始时间戳到新时间戳的映射
timestamp_mapping = {}
for orig_timestamp, video_path in tts_result.items():
new_timestamp = extract_timestamp_from_video_path(video_path)
if new_timestamp:
timestamp_mapping[orig_timestamp] = new_timestamp
# 更新脚本中的时间戳
for item in script_list:
item_copy = item.copy()
if item_copy.get('timestamp') in timestamp_mapping:
# 更新时间戳
new_timestamp = timestamp_mapping[item_copy['timestamp']]
item_copy['timestamp'] = new_timestamp
# 计算并添加持续时间
item_copy['duration'] = calculate_duration(new_timestamp)
elif 'timestamp' in item_copy:
# 对于未更新的时间戳,也计算并添加持续时间
item_copy['duration'] = calculate_duration(item_copy['timestamp'])
updated_script.append(item_copy)
return updated_script
if __name__ == '__main__':
list_script = [
{'picture': '【解说】好的,各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!',
'timestamp': '00:00:00-00:01:15',
'narration': '好的各位,欢迎回到我的频道!《庆余年 2》刚开播就给了我们一个王炸范闲在北齐""了?这怎么可能!上集片尾那个巨大的悬念,这一集就立刻揭晓了!范闲假死归来,他面临的第一个,也是最大的难关,就是如何面对他最敬爱的,同时也是最可怕的那个人——庆帝!',
'OST': 0},
{'picture': '【解说】上一集我们看到,范闲在北齐遭遇了惊天变故,生死不明!', 'timestamp': '00:01:15-00:04:40',
'narration': '但我们都知道,他绝不可能就这么轻易退场!第二集一开场,范闲就已经秘密回到了京都。他的生死传闻,可不像我们想象中那样只是小范围流传,而是…',
'OST': 0}, {'picture': '画面切到王启年小心翼翼地向范闲汇报。', 'timestamp': '00:04:41-00:04:58',
'narration': '我发现大人的死讯不光是在民间,在官场上也它传开了,所以呢,所以啊,可不是什么好事,将来您跟陛下怎么交代,这可是欺君之罪',
'OST': 1},
{'picture': '【解说】"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。',
'timestamp': '00:04:58-00:05:45',
'narration': '"欺君之罪"!在封建王朝,这可是抄家灭族的大罪!搁一般人,肯定脚底抹油溜之大吉了。但范闲是谁啊?他偏要反其道而行之!他竟然决定,直接去见庆帝!冒着天大的风险,用"假死"这个事实去赌庆帝的态度!',
'OST': 0}, {'picture': '【解说】但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!',
'timestamp': '00:05:45-00:06:00',
'narration': '但想见庆帝,哪有那么容易?范闲艺高人胆大,竟然选择了最激进的方式——闯宫!', 'OST': 0},
{'picture': '画面切换到范闲蒙面闯入皇宫,被侍卫包围的场景。', 'timestamp': '00:06:00-00:06:03',
'narration': '抓刺客', 'OST': 1}]
tts_res = {
'00:00:00-00:01:15': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-00-00-00-00-26.mp4',
'00:01:15-00:04:40': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-01-15-00-01-29.mp4',
'00:04:58-00:05:45': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-04-58-00-05-20.mp4',
'00:05:45-00:06:00': '/Users/apple/Desktop/home/NarratoAI/storage/temp/clip_video/0ac14d474144b54d614c26a5c87cffe7/vid-00-05-45-00-05-53.mp4'}
# 更新并打印结果
updated_list_script = update_script_timestamps(list_script, tts_res)
for item in updated_list_script:
print(f"Picture: {item['picture'][:20]}... | Timestamp: {item['timestamp']} | Duration: {item['duration']}")