NarratoAI/app/utils/check_script.py

import json
from loguru import logger
import os
from datetime import timedelta

def time_to_seconds(time_str):
    parts = list(map(int, time_str.split(':')))
    if len(parts) == 2:
        return timedelta(minutes=parts[0], seconds=parts[1]).total_seconds()
    elif len(parts) == 3:
        return timedelta(hours=parts[0], minutes=parts[1], seconds=parts[2]).total_seconds()
    raise ValueError(f"无法解析时间字符串: {time_str}")

def seconds_to_time_str(seconds):
    hours, remainder = divmod(int(seconds), 3600)
    minutes, seconds = divmod(remainder, 60)
    if hours > 0:
        return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
    else:
        return f"{minutes:02d}:{seconds:02d}"

def adjust_timestamp(start_time, duration):
    start_seconds = time_to_seconds(start_time)
    end_seconds = start_seconds + duration
    return f"{start_time}-{seconds_to_time_str(end_seconds)}"

def estimate_audio_duration(text):
    # 假设平均每个字符需要 0.2 秒
    return len(text) * 0.2

def check_script(data, total_duration):
    errors = []
    time_ranges = []

    logger.info("开始检查脚本")
    logger.info(f"视频总时长: {total_duration:.2f} 秒")
    logger.info("=" * 50)

    for i, item in enumerate(data, 1):
        logger.info(f"\n检查第 {i} 项:")

        # 检查所有必需字段
        required_fields = ['picture', 'timestamp', 'narration', 'OST']
        for field in required_fields:
            if field not in item:
                errors.append(f"第 {i} 项缺少 {field} 字段")
                logger.info(f"  - 错误: 缺少 {field} 字段")
            else:
                logger.info(f"  - {field}: {item[field]}")

        # 检查 OST 相关规则
        if item.get('OST') == False:
            if not item.get('narration'):
                errors.append(f"第 {i} 项 OST 为 false，但 narration 为空")
                logger.info("  - 错误: OST 为 false，但 narration 为空")
            elif len(item['narration']) > 60:
                errors.append(f"第 {i} 项 OST 为 false，但 narration 超过 60 字")
                logger.info(f"  - 错误: OST 为 false，但 narration 超过 60 字 (当前: {len(item['narration'])} 字)")
            else:
                logger.info("  - OST 为 false，narration 检查通过")
        elif item.get('OST') == True:
            if "原声播放_" not in item.get('narration'):
                errors.append(f"第 {i} 项 OST 为 true，但 narration 不为空")
                logger.info("  - 错误: OST 为 true，但 narration 不为空")
            else:
                logger.info("  - OST 为 true，narration 检查通过")

        # 检查 timestamp
        if 'timestamp' in item:
            start, end = map(time_to_seconds, item['timestamp'].split('-'))
            if any((start < existing_end and end > existing_start) for existing_start, existing_end in time_ranges):
                errors.append(f"第 {i} 项 timestamp '{item['timestamp']}' 与其他时间段重叠")
                logger.info(f"  - 错误: timestamp '{item['timestamp']}' 与其他时间段重叠")
            else:
                logger.info(f"  - timestamp '{item['timestamp']}' 检查通过")
                time_ranges.append((start, end))

            # if end > total_duration:
            #     errors.append(f"第 {i} 项 timestamp '{item['timestamp']}' 超过总时长 {total_duration:.2f} 秒")
            #     logger.info(f"  - 错误: timestamp '{item['timestamp']}' 超过总时长 {total_duration:.2f} 秒")
            # else:
            #     logger.info(f"  - timestamp 在总时长范围内")

        # 处理 narration 字段
        if item.get('OST') == False and item.get('narration'):
            estimated_duration = estimate_audio_duration(item['narration'])
            start_time = item['timestamp'].split('-')[0]
            item['timestamp'] = adjust_timestamp(start_time, estimated_duration)
            logger.info(f"  - 已调整 timestamp 为 {item['timestamp']} (估算音频时长: {estimated_duration:.2f} 秒)")

    if errors:
        logger.info("检查结果：不通过")
        logger.info("发现以下错误：")
        for error in errors:
            logger.info(f"- {error}")
    else:
        logger.info("检查结果：通过")
        logger.info("所有项目均符合规则要求。")

    return errors, data


if __name__ == "__main__":
    file_path = "/Users/apple/Desktop/home/NarratoAI/resource/scripts/test004.json"

    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    total_duration = 280

    # check_script(data, total_duration)

    from app.utils.utils import add_new_timestamps
    res = add_new_timestamps(data)
    print(json.dumps(res, indent=4, ensure_ascii=False))