From 8ad81d27b666da669db8038602921f9199cebed6 Mon Sep 17 00:00:00 2001
From: linyq <linyqemail@163.com>
Date: Mon, 7 Jul 2025 17:45:34 +0800
Subject: [PATCH] =?UTF-8?q?feat(prompts):=20=E6=9B=B4=E6=96=B0JSON?=
 =?UTF-8?q?=E6=A0=BC=E5=BC=8F=E8=BE=93=E5=87=BA=E4=BB=A5=E5=A2=9E=E5=BC=BA?=
 =?UTF-8?q?=E8=A7=A3=E6=9E=90=E5=85=BC=E5=AE=B9=E6=80=A7?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

在多个文件中修正了JSON格式输出，确保使用标准格式 `{` 和 `}`，并移除双大括号 `{{` 和 `}}`。这些更改提升了系统对LLM生成内容的解析能力，确保输出的JSON格式有效且符合要求，增强了整体用户体验。
---
 .../prompts/documentary/frame_analysis.py     |  8 +--
 .../documentary/narration_generation.py       | 10 ++--
 .../short_drama_editing/plot_extraction.py    |  8 +--
 .../short_drama_editing/subtitle_analysis.py  |  8 +--
 .../script_generation.py                      |  8 +--
 docs/prompt_management_system.md              | 48 ++++++++++++++++-
 webui/tools/generate_short_summary.py         | 52 +++++++++++++++----
 7 files changed, 109 insertions(+), 33 deletions(-)
diff --git a/app/services/prompts/documentary/frame_analysis.py b/app/services/prompts/documentary/frame_analysis.py
index ec1c478..44f0eac 100644
--- a/app/services/prompts/documentary/frame_analysis.py
+++ b/app/services/prompts/documentary/frame_analysis.py
@@ -46,19 +46,19 @@ ${custom_instructions}
 
 请按照以下JSON格式输出分析结果：
 
-{{
+{
   "analysis": [
-    {{
+    {
       "timestamp": "00:00:05,390",
       "picture": "详细的画面描述，包括场景、人物、物体、动作等",
       "scene_type": "场景类型（如：建造、准备、完成等）",
       "key_elements": ["关键元素1", "关键元素2"],
       "visual_quality": "画面质量描述（构图、光线、色彩等）"
-    }}
+    }
   ],
   "summary": "整体视频内容概述",
   "total_frames": "分析的帧数"
-}}
+}
 
 重要要求：
 1. 只输出JSON格式，不要添加任何其他文字或代码块标记
diff --git a/app/services/prompts/documentary/narration_generation.py b/app/services/prompts/documentary/narration_generation.py
index 81eb24a..ab9e1c6 100644
--- a/app/services/prompts/documentary/narration_generation.py
+++ b/app/services/prompts/documentary/narration_generation.py
@@ -59,16 +59,16 @@ ${video_frame_description}
 我正在尝试做这个内容的解说纪录片视频，我需要你以 <video_frame_description> </video_frame_description> 中的内容为解说目标，根据我刚才提供给你的对标文案特点，以及你总结的特点，帮我生成一段关于荒野建造的解说文案，文案需要符合平台受欢迎的解说风格，请使用 json 格式进行输出；使用 <output> 中的输出格式：
 
 <output>
-{{
+{
   "items": [
-    {{
+    {
         "_id": 1,
         "timestamp": "00:00:05,390-00:00:10,430",
         "picture": "画面描述",
-        "narration": "解说文案",
-    }}
+        "narration": "解说文案"
+    }
   ]
-}}
+}
 </output>
 
 <restriction>
diff --git a/app/services/prompts/short_drama_editing/plot_extraction.py b/app/services/prompts/short_drama_editing/plot_extraction.py
index f68d21a..9802fff 100644
--- a/app/services/prompts/short_drama_editing/plot_extraction.py
+++ b/app/services/prompts/short_drama_editing/plot_extraction.py
@@ -51,15 +51,15 @@ ${subtitle_content}
 
 请返回一个JSON对象，包含一个名为"plot_points"的数组，数组中包含多个对象，每个对象都要包含以下字段：
 
-{{
+{
   "plot_points": [
-    {{
+    {
       "timestamp": "时间段，格式为xx:xx:xx,xxx-xx:xx:xx,xxx",
       "title": "关键剧情的主题",
       "picture": "关键剧情前后的详细剧情描述，包括人物对话、动作、情感变化等"
-    }}
+    }
   ]
-}}
+}
 
 重要要求：
 1. 请确保返回的是合法的JSON格式
diff --git a/app/services/prompts/short_drama_editing/subtitle_analysis.py b/app/services/prompts/short_drama_editing/subtitle_analysis.py
index 8fc2c28..792ca23 100644
--- a/app/services/prompts/short_drama_editing/subtitle_analysis.py
+++ b/app/services/prompts/short_drama_editing/subtitle_analysis.py
@@ -45,20 +45,20 @@ ${subtitle_content}
 
 请按照以下JSON格式输出分析结果：
 
-{{
+{
   "summary": "整体剧情梗概，简要概括主要故事线、角色关系和核心冲突",
   "plot_titles": [
     "情节点1标题",
     "情节点2标题",
     "情节点3标题"
   ],
-  "analysis_details": {{
+  "analysis_details": {
     "main_characters": ["主要角色1", "主要角色2"],
     "story_theme": "故事主题",
     "conflict_type": "冲突类型（如：爱情、复仇、家庭等）",
     "emotional_peaks": ["情感高潮点1", "情感高潮点2"]
-  }}
-}}
+  }
+}
 
 重要要求：
 1. 必须输出有效的JSON格式，不能包含注释或其他文字
diff --git a/app/services/prompts/short_drama_narration/script_generation.py b/app/services/prompts/short_drama_narration/script_generation.py
index 0bfec6a..608eb31 100644
--- a/app/services/prompts/short_drama_narration/script_generation.py
+++ b/app/services/prompts/short_drama_narration/script_generation.py
@@ -39,17 +39,17 @@ ${plot_analysis}
 
 请严格按照以下JSON格式输出，不要添加任何其他文字、说明或代码块标记：
 
-{{
+{
   "items": [
-    {{
+    {
         "_id": 1,
         "timestamp": "00:00:05,390-00:00:10,430",
         "picture": "剧情描述或者备注",
         "narration": "解说文案，如果片段为穿插的原片片段，可以直接使用 '播放原片+_id' 进行占位",
         "OST": 0
-    }}
+    }
   ]
-}}
+}
 
 重要要求：
 1. 必须输出有效的JSON格式，不能包含注释
diff --git a/docs/prompt_management_system.md b/docs/prompt_management_system.md
index 74abac2..42662f8 100644
--- a/docs/prompt_management_system.md
+++ b/docs/prompt_management_system.md
@@ -192,16 +192,62 @@ python test_prompt_system.py
 ## 注意事项
 
 1. **模板参数**: 使用 `${parameter_name}` 格式
-2. **JSON转义**: 模板中的JSON需要使用双大括号 `{{` 和 `}}`
+2. **JSON格式**: 模板中的JSON示例使用标准格式 `{` 和 `}`，不要使用双大括号
 3. **参数验证**: 必需参数会自动验证
 4. **版本管理**: 支持多版本共存，默认使用最新版本
 5. **输出验证**: 建议对LLM输出进行验证以确保格式正确
+6. **JSON解析**: 系统提供强大的JSON解析兼容性，自动处理各种格式问题
+
+## JSON解析优化
+
+系统提供了强大的JSON解析兼容性，能够处理LLM生成的各种格式问题：
+
+### 支持的格式修复
+
+1. **双大括号修复**: 自动将 `{{` 和 `}}` 转换为标准的 `{` 和 `}`
+2. **代码块提取**: 自动从 ````json` 代码块中提取JSON内容
+3. **额外文本处理**: 自动提取大括号包围的JSON内容，忽略前后的额外文本
+4. **尾随逗号修复**: 自动移除对象和数组末尾的多余逗号
+5. **注释移除**: 自动移除 `//` 和 `#` 注释
+6. **引号修复**: 自动修复单引号和缺失的属性名引号
+
+### 解析策略
+
+系统采用多重解析策略，按优先级依次尝试：
+
+```python
+strategies = [
+    ("直接解析", lambda s: json.loads(s)),
+    ("修复双大括号", _fix_double_braces),
+    ("提取代码块", _extract_code_block),
+    ("提取大括号内容", _extract_braces_content),
+    ("修复常见格式问题", _fix_common_json_issues),
+    ("修复引号问题", _fix_quote_issues),
+    ("修复尾随逗号", _fix_trailing_commas),
+    ("强制修复", _force_fix_json),
+]
+```
+
+### 使用示例
+
+```python
+from webui.tools.generate_short_summary import parse_and_fix_json
+
+# 处理双大括号JSON
+json_str = '{{ "items": [{{ "_id": 1, "name": "test" }}] }}'
+result = parse_and_fix_json(json_str)  # 自动修复并解析
+
+# 处理有额外文本的JSON
+json_str = '这是一些文本\n{"items": []}\n更多文本'
+result = parse_and_fix_json(json_str)  # 自动提取JSON部分
+```
 
 ## 性能优化
 
 - 提示词模板会被缓存
 - 支持批量操作
 - 异步渲染支持（未来版本）
+- JSON解析采用多策略优化，确保高成功率
 
 ## 故障排除
 
diff --git a/webui/tools/generate_short_summary.py b/webui/tools/generate_short_summary.py
index c175e03..dc972af 100644
--- a/webui/tools/generate_short_summary.py
+++ b/webui/tools/generate_short_summary.py
@@ -45,6 +45,15 @@ def parse_and_fix_json(json_string):
     except json.JSONDecodeError as e:
         logger.warning(f"直接JSON解析失败: {e}")
 
+    # 尝试修复双大括号问题（LLM生成的常见问题）
+    try:
+        # 将双大括号替换为单大括号
+        fixed_braces = json_string.replace('{{', '{').replace('}}', '}')
+        logger.info("修复双大括号格式")
+        return json.loads(fixed_braces)
+    except json.JSONDecodeError:
+        pass
+
     # 尝试提取JSON部分
     try:
         # 查找JSON代码块
@@ -68,19 +77,40 @@ def parse_and_fix_json(json_string):
     except json.JSONDecodeError:
         pass
 
-    # 尝试修复常见的JSON格式问题
+    # 尝试综合修复JSON格式问题
     try:
-        # 移除注释
-        json_string = re.sub(r'#.*', '', json_string)
-        # 移除多余的逗号
-        json_string = re.sub(r',\s*}', '}', json_string)
-        json_string = re.sub(r',\s*]', ']', json_string)
-        # 修复单引号
-        json_string = re.sub(r"'([^']*)':", r'"\1":', json_string)
+        fixed_json = json_string
 
-        logger.info("尝试修复JSON格式问题后解析")
-        return json.loads(json_string)
-    except json.JSONDecodeError:
+        # 1. 修复双大括号问题
+        fixed_json = fixed_json.replace('{{', '{').replace('}}', '}')
+
+        # 2. 提取JSON内容（如果有其他文本包围）
+        start_idx = fixed_json.find('{')
+        end_idx = fixed_json.rfind('}')
+        if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
+            fixed_json = fixed_json[start_idx:end_idx+1]
+
+        # 3. 移除注释
+        fixed_json = re.sub(r'#.*', '', fixed_json)
+        fixed_json = re.sub(r'//.*', '', fixed_json)
+
+        # 4. 移除多余的逗号
+        fixed_json = re.sub(r',\s*}', '}', fixed_json)
+        fixed_json = re.sub(r',\s*]', ']', fixed_json)
+
+        # 5. 修复单引号
+        fixed_json = re.sub(r"'([^']*)':", r'"\1":', fixed_json)
+
+        # 6. 修复没有引号的属性名
+        fixed_json = re.sub(r'(\w+)(\s*):', r'"\1"\2:', fixed_json)
+
+        # 7. 修复重复的引号
+        fixed_json = re.sub(r'""([^"]*?)""', r'"\1"', fixed_json)
+
+        logger.info("尝试综合修复JSON格式问题后解析")
+        return json.loads(fixed_json)
+    except json.JSONDecodeError as e:
+        logger.debug(f"综合修复失败: {e}")
         pass
 
     # 如果所有方法都失败，尝试创建一个基本的结构