Use tolerant JSON parsing for short scripts

This commit is contained in:
zhanglei 2026-07-02 15:46:20 +08:00
parent 40b91d25b7
commit 2d0adcfdec
4 changed files with 70 additions and 6 deletions

View File

@ -14,9 +14,25 @@ from webui.tools.generate_short_summary import (
_build_combined_subtitle_content,
_normalize_paths,
analyze_short_drama_plot,
parse_and_fix_json,
)
def _parse_generated_script_payload(script):
if isinstance(script, list):
return script
if isinstance(script, str):
parsed = parse_and_fix_json(script)
if isinstance(parsed, list):
return parsed
if isinstance(parsed, dict) and isinstance(parsed.get("items"), list):
return parsed["items"]
raise ValueError("Generated script JSON must be a list or contain an items list")
raise ValueError("Generated script payload must be a list or JSON string")
def generate_script_short(
tr,
params,
@ -175,10 +191,7 @@ def generate_script_short(
script = result.get("script")
logger.info(f"脚本生成完成 {json.dumps(script, ensure_ascii=False, indent=4)}")
if isinstance(script, list):
st.session_state['video_clip_json'] = script
elif isinstance(script, str):
st.session_state['video_clip_json'] = json.loads(script)
st.session_state['video_clip_json'] = _parse_generated_script_payload(script)
update_progress(80, tr("Script generation completed"))

View File

@ -176,8 +176,8 @@ def parse_and_fix_json(json_string):
# 5. 修复单引号
fixed_json = re.sub(r"'([^']*)':", r'"\1":', fixed_json)
# 6. 修复没有引号的属性名
fixed_json = re.sub(r'(\w+)(\s*):', r'"\1"\2:', fixed_json)
# 6. 修复没有引号的属性名,仅匹配对象边界后的 key避免误伤时间戳等字符串值
fixed_json = re.sub(r'([{\[,]\s*)([A-Za-z_][A-Za-z0-9_]*)(\s*:)', r'\1"\2"\3', fixed_json)
# 7. 修复重复的引号
fixed_json = re.sub(r'""([^"]*?)""', r'"\1"', fixed_json)

View File

@ -0,0 +1,38 @@
import unittest
from webui.tools.generate_script_short import _parse_generated_script_payload
class GenerateScriptShortPayloadTests(unittest.TestCase):
def test_parse_generated_script_payload_keeps_list_payload(self):
payload = [{"_id": 1, "timestamp": "00:00:01,000-00:00:02,000"}]
self.assertEqual(payload, _parse_generated_script_payload(payload))
def test_parse_generated_script_payload_accepts_items_wrapper(self):
payload = '{"items": [{"_id": 1, "timestamp": "00:00:01,000-00:00:02,000"}]}'
parsed = _parse_generated_script_payload(payload)
self.assertEqual(1, parsed[0]["_id"])
def test_parse_generated_script_payload_repairs_common_llm_json_formatting(self):
payload = """```json
{
"items": [
{"_id": 1, "timestamp": "00:00:01,000-00:00:02,000",},
],
}
```"""
parsed = _parse_generated_script_payload(payload)
self.assertEqual(1, parsed[0]["_id"])
def test_parse_generated_script_payload_rejects_invalid_shape(self):
with self.assertRaises(ValueError):
_parse_generated_script_payload('{"unexpected": []}')
if __name__ == "__main__":
unittest.main()

View File

@ -22,6 +22,19 @@ class GenerateShortSummaryJsonTests(unittest.TestCase):
self.assertEqual(1, parsed["items"][0]["_id"])
def test_repair_does_not_corrupt_timestamp_values(self):
parsed = parse_and_fix_json(
"""```json
{
items: [
{_id: 1, timestamp: "00:00:01,000-00:00:02,000",},
],
}
```"""
)
self.assertEqual("00:00:01,000-00:00:02,000", parsed["items"][0]["timestamp"])
if __name__ == "__main__":
unittest.main()