mirror of
https://github.com/linyqh/NarratoAI.git
synced 2025-12-13 04:02:49 +00:00
优化qwenvl_analyzer.py中的base_url配置,移除默认值设置;在utils.py中新增format_time函数以支持秒数格式化为HH:MM:SS,mmm格式;在generate_script_docu.py中重构分析结果处理逻辑,合并批次分析结果并保存为JSON格式,同时兼容旧的文本格式输出。
This commit is contained in:
parent
c2660c6983
commit
6c6ceeebea
@ -30,7 +30,7 @@ class QwenAnalyzer:
|
|||||||
|
|
||||||
self.model_name = model_name
|
self.model_name = model_name
|
||||||
self.api_key = api_key
|
self.api_key = api_key
|
||||||
self.base_url = base_url or "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
self.base_url = base_url
|
||||||
|
|
||||||
# 配置API客户端
|
# 配置API客户端
|
||||||
self._configure_client()
|
self._configure_client()
|
||||||
|
|||||||
@ -197,6 +197,28 @@ def time_convert_seconds_to_hmsm(seconds) -> str:
|
|||||||
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
|
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, seconds, milliseconds)
|
||||||
|
|
||||||
|
|
||||||
|
def format_time(seconds: float) -> str:
|
||||||
|
"""
|
||||||
|
将秒数转换为格式化的时间字符串 (HH:MM:SS,mmm)
|
||||||
|
|
||||||
|
参数:
|
||||||
|
seconds: 需要转换的秒数,可以是整数或浮点数
|
||||||
|
|
||||||
|
返回:
|
||||||
|
格式化的时间字符串,格式为 HH:MM:SS,mmm
|
||||||
|
"""
|
||||||
|
# 计算小时、分钟、秒和毫秒
|
||||||
|
hours = int(seconds // 3600)
|
||||||
|
remaining_seconds = seconds % 3600
|
||||||
|
minutes = int(remaining_seconds // 60)
|
||||||
|
remaining_seconds = remaining_seconds % 60
|
||||||
|
secs = int(remaining_seconds)
|
||||||
|
milliseconds = int((remaining_seconds - secs) * 1000)
|
||||||
|
|
||||||
|
# 格式化为时间字符串
|
||||||
|
return "{:02d}:{:02d}:{:02d},{:03d}".format(hours, minutes, secs, milliseconds)
|
||||||
|
|
||||||
|
|
||||||
def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
|
def text_to_srt(idx: int, msg: str, start_time: float, end_time: float) -> str:
|
||||||
start_time = time_convert_seconds_to_hmsm(start_time)
|
start_time = time_convert_seconds_to_hmsm(start_time)
|
||||||
end_time = time_convert_seconds_to_hmsm(end_time)
|
end_time = time_convert_seconds_to_hmsm(end_time)
|
||||||
|
|||||||
@ -96,7 +96,7 @@ def generate_script_docu(params):
|
|||||||
raise Exception(f"关键帧提取失败: {str(e)}")
|
raise Exception(f"关键帧提取失败: {str(e)}")
|
||||||
|
|
||||||
"""
|
"""
|
||||||
2. 视觉分析
|
2. 视觉分析(批量分析每一帧)
|
||||||
"""
|
"""
|
||||||
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
|
vision_llm_provider = st.session_state.get('vision_llm_providers').lower()
|
||||||
logger.debug(f"VLM 视觉大模型提供商: {vision_llm_provider}")
|
logger.debug(f"VLM 视觉大模型提供商: {vision_llm_provider}")
|
||||||
@ -167,37 +167,138 @@ def generate_script_docu(params):
|
|||||||
# ===================处理分析结果===================
|
# ===================处理分析结果===================
|
||||||
update_progress(60, "正在整理分析结果...")
|
update_progress(60, "正在整理分析结果...")
|
||||||
|
|
||||||
# 合并所有批次的析结果
|
# 合并所有批次的分析结果
|
||||||
frame_analysis = ""
|
frame_analysis = ""
|
||||||
|
merged_frame_observations = [] # 合并所有批次的帧观察
|
||||||
|
overall_activity_summaries = [] # 合并所有批次的整体总结
|
||||||
prev_batch_files = None
|
prev_batch_files = None
|
||||||
|
frame_counter = 1 # 初始化帧计数器,用于给所有帧分配连续的序号
|
||||||
|
logger.debug(json.dumps(results, indent=4, ensure_ascii=False))
|
||||||
|
|
||||||
for result in results:
|
for result in results:
|
||||||
if 'error' in result:
|
if 'error' in result:
|
||||||
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
|
logger.warning(f"批次 {result['batch_index']} 处理出现警告: {result['error']}")
|
||||||
|
continue
|
||||||
# 获取当前批次的文件列表 keyframe_001136_000045.jpg 将 000045 精度提升到 毫秒
|
|
||||||
|
# 获取当前批次的文件列表
|
||||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||||
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
|
logger.debug(f"批次 {result['batch_index']} 处理完成,共 {len(batch_files)} 张图片")
|
||||||
# logger.debug(batch_files)
|
|
||||||
|
# 获取批次的时间戳范围
|
||||||
first_timestamp, last_timestamp, _ = get_batch_timestamps(batch_files, prev_batch_files)
|
first_timestamp, last_timestamp, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
|
||||||
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
|
logger.debug(f"处理时间戳: {first_timestamp}-{last_timestamp}")
|
||||||
|
|
||||||
# 添加带时间戳的分析结果
|
# 解析响应中的JSON数据
|
||||||
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
|
response_text = result['response']
|
||||||
frame_analysis += result['response']
|
try:
|
||||||
frame_analysis += "\n"
|
# 处理可能包含```json```格式的响应
|
||||||
|
if "```json" in response_text:
|
||||||
|
json_content = response_text.split("```json")[1].split("```")[0].strip()
|
||||||
|
elif "```" in response_text:
|
||||||
|
json_content = response_text.split("```")[1].split("```")[0].strip()
|
||||||
|
else:
|
||||||
|
json_content = response_text.strip()
|
||||||
|
|
||||||
|
response_data = json.loads(json_content)
|
||||||
|
|
||||||
|
# 提取frame_observations和overall_activity_summary
|
||||||
|
if "frame_observations" in response_data:
|
||||||
|
frame_obs = response_data["frame_observations"]
|
||||||
|
overall_summary = response_data.get("overall_activity_summary", "")
|
||||||
|
|
||||||
|
# 添加时间戳信息到每个帧观察
|
||||||
|
for i, obs in enumerate(frame_obs):
|
||||||
|
if i < len(batch_files):
|
||||||
|
# 从文件名中提取时间戳
|
||||||
|
file_path = batch_files[i]
|
||||||
|
file_name = os.path.basename(file_path)
|
||||||
|
# 提取时间戳字符串 (格式如: keyframe_000675_000027000.jpg)
|
||||||
|
# 格式解析: keyframe_帧序号_毫秒时间戳.jpg
|
||||||
|
timestamp_parts = file_name.split('_')
|
||||||
|
if len(timestamp_parts) >= 3:
|
||||||
|
timestamp_str = timestamp_parts[-1].split('.')[0]
|
||||||
|
try:
|
||||||
|
timestamp_seconds = int(timestamp_str) / 1000 # 转换为秒
|
||||||
|
formatted_time = utils.format_time(timestamp_seconds) # 格式化时间戳
|
||||||
|
except ValueError:
|
||||||
|
logger.warning(f"无法解析时间戳: {timestamp_str}")
|
||||||
|
timestamp_seconds = 0
|
||||||
|
formatted_time = "00:00:00,000"
|
||||||
|
else:
|
||||||
|
logger.warning(f"文件名格式不符合预期: {file_name}")
|
||||||
|
timestamp_seconds = 0
|
||||||
|
formatted_time = "00:00:00,000"
|
||||||
|
|
||||||
|
# 添加额外信息到帧观察
|
||||||
|
obs["frame_path"] = file_path
|
||||||
|
obs["timestamp"] = formatted_time
|
||||||
|
obs["timestamp_seconds"] = timestamp_seconds
|
||||||
|
|
||||||
|
# 使用全局递增的帧计数器替换原始的frame_number
|
||||||
|
if "frame_number" in obs:
|
||||||
|
obs["original_frame_number"] = obs["frame_number"] # 保留原始编号作为参考
|
||||||
|
obs["frame_number"] = frame_counter # 赋值连续的帧编号
|
||||||
|
frame_counter += 1 # 增加帧计数器
|
||||||
|
|
||||||
|
# 添加到合并列表
|
||||||
|
merged_frame_observations.append(obs)
|
||||||
|
|
||||||
|
# 添加批次整体总结信息
|
||||||
|
if overall_summary:
|
||||||
|
# 从文件名中提取时间戳数值
|
||||||
|
first_time_str = first_timestamp.split('_')[-1].split('.')[0]
|
||||||
|
last_time_str = last_timestamp.split('_')[-1].split('.')[0]
|
||||||
|
|
||||||
|
# 转换为毫秒并计算持续时间(秒)
|
||||||
|
try:
|
||||||
|
first_time_ms = int(first_time_str)
|
||||||
|
last_time_ms = int(last_time_str)
|
||||||
|
batch_duration = (last_time_ms - first_time_ms) / 1000
|
||||||
|
except ValueError:
|
||||||
|
# 使用 utils.time_to_seconds 函数处理格式化的时间戳
|
||||||
|
first_time_seconds = utils.time_to_seconds(first_time_str.replace('_', ':').replace('-', ','))
|
||||||
|
last_time_seconds = utils.time_to_seconds(last_time_str.replace('_', ':').replace('-', ','))
|
||||||
|
batch_duration = last_time_seconds - first_time_seconds
|
||||||
|
|
||||||
|
overall_activity_summaries.append({
|
||||||
|
"batch_index": result['batch_index'],
|
||||||
|
"time_range": f"{first_timestamp}-{last_timestamp}",
|
||||||
|
"duration_seconds": batch_duration,
|
||||||
|
"summary": overall_summary
|
||||||
|
})
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"解析批次 {result['batch_index']} 的响应数据失败: {str(e)}")
|
||||||
|
# 添加原始响应作为回退
|
||||||
|
frame_analysis += f"\n=== {first_timestamp}-{last_timestamp} ===\n"
|
||||||
|
frame_analysis += response_text
|
||||||
|
frame_analysis += "\n"
|
||||||
|
|
||||||
# 更新上一个批次的文件
|
# 更新上一个批次的文件
|
||||||
prev_batch_files = batch_files
|
prev_batch_files = batch_files
|
||||||
|
|
||||||
|
# 将合并后的结果转为JSON字符串
|
||||||
|
merged_results = {
|
||||||
|
"frame_observations": merged_frame_observations,
|
||||||
|
"overall_activity_summaries": overall_activity_summaries
|
||||||
|
}
|
||||||
|
|
||||||
|
# 保存完整的分析结果为JSON
|
||||||
|
analysis_json_path = os.path.join(utils.task_dir(), "frame_analysis.json")
|
||||||
|
with open(analysis_json_path, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(merged_results, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
# 同时保存原始文本格式的分析结果(兼容性)
|
||||||
|
if not frame_analysis.strip() and merged_frame_observations:
|
||||||
|
# 如果没有原始文本但有合并结果,则从合并结果生成文本
|
||||||
|
frame_analysis = json.dumps(merged_results, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
if not frame_analysis.strip():
|
if not frame_analysis.strip():
|
||||||
raise Exception("未能生成有效的帧分析结果")
|
raise Exception("未能生成有效的帧分析结果")
|
||||||
|
|
||||||
# 保存分析结果
|
# # 保存文本格式分析结果
|
||||||
analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
|
# analysis_path = os.path.join(utils.temp_dir(), "frame_analysis.txt")
|
||||||
with open(analysis_path, 'w', encoding='utf-8') as f:
|
# with open(analysis_path, 'w', encoding='utf-8') as f:
|
||||||
f.write(frame_analysis)
|
# f.write(frame_analysis)
|
||||||
|
|
||||||
update_progress(70, "正在生成脚本...")
|
update_progress(70, "正在生成脚本...")
|
||||||
|
|
||||||
@ -211,26 +312,41 @@ def generate_script_docu(params):
|
|||||||
frame_content_list = []
|
frame_content_list = []
|
||||||
prev_batch_files = None
|
prev_batch_files = None
|
||||||
|
|
||||||
for i, result in enumerate(results):
|
# 使用合并后的观察结果构建帧内容列表
|
||||||
if 'error' in result:
|
if merged_frame_observations:
|
||||||
continue
|
for obs in merged_frame_observations:
|
||||||
|
frame_content = {
|
||||||
|
"_id": obs.get("frame_number", 0), # 使用全局连续的帧编号作为ID
|
||||||
|
"timestamp": obs.get("timestamp", ""),
|
||||||
|
"picture": obs.get("observation", ""),
|
||||||
|
"narration": "",
|
||||||
|
"OST": 2,
|
||||||
|
"timestamp_seconds": obs.get("timestamp_seconds", 0)
|
||||||
|
}
|
||||||
|
frame_content_list.append(frame_content)
|
||||||
|
logger.debug(f"添加帧内容: ID={obs.get('frame_number', 0)}, 时间={obs.get('timestamp', '')}, 描述长度={len(obs.get('observation', ''))}")
|
||||||
|
else:
|
||||||
|
# 兼容旧的处理方式,如果没有合并后的观察结果
|
||||||
|
for i, result in enumerate(results):
|
||||||
|
if 'error' in result:
|
||||||
|
continue
|
||||||
|
|
||||||
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
batch_files = get_batch_files(keyframe_files, result, vision_batch_size)
|
||||||
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
|
_, _, timestamp_range = get_batch_timestamps(batch_files, prev_batch_files)
|
||||||
|
|
||||||
frame_content = {
|
frame_content = {
|
||||||
"_id": i + 1,
|
"_id": i + 1,
|
||||||
"timestamp": timestamp_range,
|
"timestamp": timestamp_range,
|
||||||
"picture": result['response'],
|
"picture": result['response'],
|
||||||
"narration": "",
|
"narration": "",
|
||||||
"OST": 2
|
"OST": 2
|
||||||
}
|
}
|
||||||
frame_content_list.append(frame_content)
|
frame_content_list.append(frame_content)
|
||||||
|
|
||||||
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
|
logger.debug(f"添加帧内容: 时间范围={timestamp_range}, 分析结果长度={len(result['response'])}")
|
||||||
|
|
||||||
# 更新上一个批次的文件
|
# 更新上一个批次的文件
|
||||||
prev_batch_files = batch_files
|
prev_batch_files = batch_files
|
||||||
|
|
||||||
if not frame_content_list:
|
if not frame_content_list:
|
||||||
raise Exception("没有有效的帧内容可以处理")
|
raise Exception("没有有效的帧内容可以处理")
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user