NarratoAI/app/utils/check_script.py
linyq 02589c8355 剪辑逻辑进度70%;
待优化点:
1. 生成字幕逻辑优化
2. 优化脚本-解说质量
3. 修复字幕bug
2024-09-29 00:02:40 +08:00

116 lines
4.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
from loguru import logger
import os
from datetime import timedelta
def time_to_seconds(time_str):
parts = list(map(int, time_str.split(':')))
if len(parts) == 2:
return timedelta(minutes=parts[0], seconds=parts[1]).total_seconds()
elif len(parts) == 3:
return timedelta(hours=parts[0], minutes=parts[1], seconds=parts[2]).total_seconds()
raise ValueError(f"无法解析时间字符串: {time_str}")
def seconds_to_time_str(seconds):
hours, remainder = divmod(int(seconds), 3600)
minutes, seconds = divmod(remainder, 60)
if hours > 0:
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
else:
return f"{minutes:02d}:{seconds:02d}"
def adjust_timestamp(start_time, duration):
start_seconds = time_to_seconds(start_time)
end_seconds = start_seconds + duration
return f"{start_time}-{seconds_to_time_str(end_seconds)}"
def estimate_audio_duration(text):
# 假设平均每个字符需要 0.2 秒
return len(text) * 0.2
def check_script(data, total_duration):
errors = []
time_ranges = []
logger.info("开始检查脚本")
logger.info(f"视频总时长: {total_duration:.2f}")
logger.info("=" * 50)
for i, item in enumerate(data, 1):
logger.info(f"\n检查第 {i} 项:")
# 检查所有必需字段
required_fields = ['picture', 'timestamp', 'narration', 'OST']
for field in required_fields:
if field not in item:
errors.append(f"{i} 项缺少 {field} 字段")
logger.info(f" - 错误: 缺少 {field} 字段")
else:
logger.info(f" - {field}: {item[field]}")
# 检查 OST 相关规则
if item.get('OST') == False:
if not item.get('narration'):
errors.append(f"{i} 项 OST 为 false但 narration 为空")
logger.info(" - 错误: OST 为 false但 narration 为空")
elif len(item['narration']) > 60:
errors.append(f"{i} 项 OST 为 false但 narration 超过 60 字")
logger.info(f" - 错误: OST 为 false但 narration 超过 60 字 (当前: {len(item['narration'])} 字)")
else:
logger.info(" - OST 为 falsenarration 检查通过")
elif item.get('OST') == True:
if "原声播放_" not in item.get('narration'):
errors.append(f"{i} 项 OST 为 true但 narration 不为空")
logger.info(" - 错误: OST 为 true但 narration 不为空")
else:
logger.info(" - OST 为 truenarration 检查通过")
# 检查 timestamp
if 'timestamp' in item:
start, end = map(time_to_seconds, item['timestamp'].split('-'))
if any((start < existing_end and end > existing_start) for existing_start, existing_end in time_ranges):
errors.append(f"{i} 项 timestamp '{item['timestamp']}' 与其他时间段重叠")
logger.info(f" - 错误: timestamp '{item['timestamp']}' 与其他时间段重叠")
else:
logger.info(f" - timestamp '{item['timestamp']}' 检查通过")
time_ranges.append((start, end))
# if end > total_duration:
# errors.append(f"第 {i} 项 timestamp '{item['timestamp']}' 超过总时长 {total_duration:.2f} 秒")
# logger.info(f" - 错误: timestamp '{item['timestamp']}' 超过总时长 {total_duration:.2f} 秒")
# else:
# logger.info(f" - timestamp 在总时长范围内")
# 处理 narration 字段
if item.get('OST') == False and item.get('narration'):
estimated_duration = estimate_audio_duration(item['narration'])
start_time = item['timestamp'].split('-')[0]
item['timestamp'] = adjust_timestamp(start_time, estimated_duration)
logger.info(f" - 已调整 timestamp 为 {item['timestamp']} (估算音频时长: {estimated_duration:.2f} 秒)")
if errors:
logger.info("检查结果:不通过")
logger.info("发现以下错误:")
for error in errors:
logger.info(f"- {error}")
else:
logger.info("检查结果:通过")
logger.info("所有项目均符合规则要求。")
return errors, data
if __name__ == "__main__":
file_path = "/Users/apple/Desktop/home/NarratoAI/resource/scripts/test004.json"
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
total_duration = 280
# check_script(data, total_duration)
from app.utils.utils import add_new_timestamps
res = add_new_timestamps(data)
print(json.dumps(res, indent=4, ensure_ascii=False))