mirror of
https://github.com/linyqh/NarratoAI.git
synced 2026-02-22 00:10:27 +00:00
fix: 修复短剧混剪字幕在 windows 环境下加载失败
This commit is contained in:
parent
5e46ea2746
commit
26f0dfeab5
5
.gitignore
vendored
5
.gitignore
vendored
@ -39,4 +39,9 @@ bug清单.md
|
|||||||
task.md
|
task.md
|
||||||
.claude/*
|
.claude/*
|
||||||
.serena/*
|
.serena/*
|
||||||
|
|
||||||
|
# OpenSpec: 忽略活动的变更提案,但保留归档和规范
|
||||||
|
openspec/*
|
||||||
|
AGENTS.md
|
||||||
CLAUDE.md
|
CLAUDE.md
|
||||||
|
tests/*
|
||||||
@ -39,6 +39,20 @@ def analyze_subtitle(
|
|||||||
try:
|
try:
|
||||||
# 加载字幕文件
|
# 加载字幕文件
|
||||||
subtitles = load_srt(srt_path)
|
subtitles = load_srt(srt_path)
|
||||||
|
|
||||||
|
# 检查字幕是否为空
|
||||||
|
if not subtitles:
|
||||||
|
error_msg = (
|
||||||
|
f"字幕文件 {srt_path} 解析后无有效内容。\n"
|
||||||
|
f"请检查:\n"
|
||||||
|
f"1. 文件格式是否为标准 SRT\n"
|
||||||
|
f"2. 文件编码是否为 UTF-8、GBK 或 GB2312\n"
|
||||||
|
f"3. 文件内容是否为空"
|
||||||
|
)
|
||||||
|
logger.error(error_msg)
|
||||||
|
raise ValueError(error_msg)
|
||||||
|
|
||||||
|
logger.info(f"成功加载字幕文件 {srt_path},共 {len(subtitles)} 条有效字幕")
|
||||||
subtitle_content = "\n".join([f"{sub['timestamp']}\n{sub['text']}" for sub in subtitles])
|
subtitle_content = "\n".join([f"{sub['timestamp']}\n{sub['text']}" for sub in subtitles])
|
||||||
|
|
||||||
# 初始化统一LLM服务
|
# 初始化统一LLM服务
|
||||||
|
|||||||
@ -1,45 +1,80 @@
|
|||||||
# 公共方法
|
# 公共方法
|
||||||
import json
|
import json
|
||||||
import requests # 新增
|
import requests # 新增
|
||||||
|
import pysrt
|
||||||
|
from loguru import logger
|
||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
|
|
||||||
|
|
||||||
def load_srt(file_path: str) -> List[Dict]:
|
def load_srt(file_path: str) -> List[Dict]:
|
||||||
"""加载并解析SRT文件
|
"""加载并解析SRT文件(使用 pysrt 库,支持多种编码和格式)
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_path: SRT文件路径
|
file_path: SRT文件路径
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
字幕内容列表
|
字幕内容列表,格式:
|
||||||
|
[
|
||||||
|
{
|
||||||
|
'number': int, # 字幕序号
|
||||||
|
'timestamp': str, # "00:00:01,000 --> 00:00:03,000"
|
||||||
|
'text': str, # 字幕文本
|
||||||
|
'start_time': str, # "00:00:01,000"
|
||||||
|
'end_time': str # "00:00:03,000"
|
||||||
|
},
|
||||||
|
...
|
||||||
|
]
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
FileNotFoundError: 文件不存在
|
||||||
|
ValueError: 文件编码不支持或格式错误
|
||||||
"""
|
"""
|
||||||
with open(file_path, 'r', encoding='utf-8-sig') as f:
|
# 编码自动检测:依次尝试常见编码
|
||||||
content = f.read().strip()
|
encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312']
|
||||||
|
subs = None
|
||||||
|
detected_encoding = None
|
||||||
|
|
||||||
# 按空行分割字幕块
|
for encoding in encodings:
|
||||||
subtitle_blocks = content.split('\n\n')
|
try:
|
||||||
|
subs = pysrt.open(file_path, encoding=encoding)
|
||||||
|
detected_encoding = encoding
|
||||||
|
logger.info(f"成功加载字幕文件 {file_path},编码:{encoding},共 {len(subs)} 条")
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"使用编码 {encoding} 加载失败: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if subs is None:
|
||||||
|
# 所有编码都失败
|
||||||
|
raise ValueError(
|
||||||
|
f"无法读取字幕文件 {file_path},"
|
||||||
|
f"请检查文件编码(支持 UTF-8、GBK、GB2312)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 检查是否为空
|
||||||
|
if not subs:
|
||||||
|
logger.warning(f"字幕文件 {file_path} 解析后无有效内容")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# 转换为原格式(向后兼容)
|
||||||
subtitles = []
|
subtitles = []
|
||||||
|
for sub in subs:
|
||||||
|
# 合并多行文本为单行(某些 SRT 文件会有换行)
|
||||||
|
text = sub.text.replace('\n', ' ').strip()
|
||||||
|
|
||||||
for block in subtitle_blocks:
|
# 跳过空字幕
|
||||||
lines = block.split('\n')
|
if not text:
|
||||||
if len(lines) >= 3: # 确保块包含足够的行
|
continue
|
||||||
try:
|
|
||||||
number = int(lines[0].strip())
|
|
||||||
timestamp = lines[1]
|
|
||||||
text = ' '.join(lines[2:])
|
|
||||||
|
|
||||||
# 解析时间戳
|
subtitles.append({
|
||||||
start_time, end_time = timestamp.split(' --> ')
|
'number': sub.index,
|
||||||
|
'timestamp': f"{sub.start} --> {sub.end}",
|
||||||
subtitles.append({
|
'text': text,
|
||||||
'number': number,
|
'start_time': str(sub.start),
|
||||||
'timestamp': timestamp,
|
'end_time': str(sub.end)
|
||||||
'text': text,
|
})
|
||||||
'start_time': start_time,
|
|
||||||
'end_time': end_time
|
|
||||||
})
|
|
||||||
except ValueError as e:
|
|
||||||
print(f"Warning: 跳过无效的字幕块: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
|
logger.info(f"成功解析 {len(subtitles)} 条有效字幕")
|
||||||
return subtitles
|
return subtitles
|
||||||
|
|||||||
@ -343,12 +343,34 @@ def short_drama_summary(tr):
|
|||||||
# 只有当有文件上传且尚未处理时才执行处理逻辑
|
# 只有当有文件上传且尚未处理时才执行处理逻辑
|
||||||
if subtitle_file is not None and not st.session_state['subtitle_file_processed']:
|
if subtitle_file is not None and not st.session_state['subtitle_file_processed']:
|
||||||
try:
|
try:
|
||||||
# 读取上传的SRT内容
|
# 清理文件名,防止路径污染和路径遍历攻击
|
||||||
script_content = subtitle_file.read().decode('utf-8')
|
safe_filename = os.path.basename(subtitle_file.name)
|
||||||
|
|
||||||
|
# 编码自动检测:依次尝试常见编码
|
||||||
|
encodings = ['utf-8', 'utf-8-sig', 'gbk', 'gb2312']
|
||||||
|
script_content = None
|
||||||
|
detected_encoding = None
|
||||||
|
|
||||||
|
for encoding in encodings:
|
||||||
|
try:
|
||||||
|
subtitle_file.seek(0) # 重置文件指针
|
||||||
|
script_content = subtitle_file.read().decode(encoding)
|
||||||
|
detected_encoding = encoding
|
||||||
|
break
|
||||||
|
except UnicodeDecodeError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if script_content is None:
|
||||||
|
st.error(tr("无法读取字幕文件,请检查文件编码(支持 UTF-8、GBK、GB2312)"))
|
||||||
|
st.stop()
|
||||||
|
|
||||||
|
# 验证字幕内容(简单检查)
|
||||||
|
if len(script_content.strip()) < 10:
|
||||||
|
st.warning(tr("字幕文件内容似乎为空,请检查文件"))
|
||||||
|
|
||||||
# 保存到字幕目录
|
# 保存到字幕目录
|
||||||
script_file_path = os.path.join(utils.subtitle_dir(), subtitle_file.name)
|
script_file_path = os.path.join(utils.subtitle_dir(), safe_filename)
|
||||||
file_name, file_extension = os.path.splitext(subtitle_file.name)
|
file_name, file_extension = os.path.splitext(safe_filename)
|
||||||
|
|
||||||
# 如果文件已存在,添加时间戳
|
# 如果文件已存在,添加时间戳
|
||||||
if os.path.exists(script_file_path):
|
if os.path.exists(script_file_path):
|
||||||
@ -356,12 +378,16 @@ def short_drama_summary(tr):
|
|||||||
file_name_with_timestamp = f"{file_name}_{timestamp}"
|
file_name_with_timestamp = f"{file_name}_{timestamp}"
|
||||||
script_file_path = os.path.join(utils.subtitle_dir(), file_name_with_timestamp + file_extension)
|
script_file_path = os.path.join(utils.subtitle_dir(), file_name_with_timestamp + file_extension)
|
||||||
|
|
||||||
# 直接写入SRT内容,不进行JSON转换
|
# 直接写入SRT内容(统一使用 UTF-8)
|
||||||
with open(script_file_path, "w", encoding='utf-8') as f:
|
with open(script_file_path, "w", encoding='utf-8') as f:
|
||||||
f.write(script_content)
|
f.write(script_content)
|
||||||
|
|
||||||
# 更新状态
|
# 更新状态
|
||||||
st.success(tr("字幕上传成功"))
|
st.success(
|
||||||
|
f"{tr('字幕上传成功')} "
|
||||||
|
f"(编码: {detected_encoding.upper()}, "
|
||||||
|
f"大小: {len(script_content)} 字符)"
|
||||||
|
)
|
||||||
st.session_state['subtitle_path'] = script_file_path
|
st.session_state['subtitle_path'] = script_file_path
|
||||||
st.session_state['subtitle_file_processed'] = True # 标记已处理
|
st.session_state['subtitle_file_processed'] = True # 标记已处理
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user