From fd657a478a133978266c6de2358136b83e198f10 Mon Sep 17 00:00:00 2001 From: JEECG <445654970@qq.com> Date: Wed, 15 Apr 2026 23:09:28 +0800 Subject: [PATCH] =?UTF-8?q?PR=E6=9C=89=E9=97=AE=E9=A2=98=E4=BF=AE=E5=A4=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../llm/document/TikaDocumentParser.java | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/jeecg-boot/jeecg-boot-module/jeecg-boot-module-airag/src/main/java/org/jeecg/modules/airag/llm/document/TikaDocumentParser.java b/jeecg-boot/jeecg-boot-module/jeecg-boot-module-airag/src/main/java/org/jeecg/modules/airag/llm/document/TikaDocumentParser.java index 58dd4e724..bfa4fd066 100644 --- a/jeecg-boot/jeecg-boot-module/jeecg-boot-module-airag/src/main/java/org/jeecg/modules/airag/llm/document/TikaDocumentParser.java +++ b/jeecg-boot/jeecg-boot-module/jeecg-boot-module-airag/src/main/java/org/jeecg/modules/airag/llm/document/TikaDocumentParser.java @@ -71,27 +71,25 @@ public class TikaDocumentParser { public Document parse(File file) { AssertUtils.assertNotEmpty("请选择文件", file); - try { + // 使用 Tika 自动检测 MIME 类型 + String fileName = file.getName().toLowerCase(); + //后缀 + String ext = FilenameUtils.getExtension(fileName); + if (fileName.endsWith(".txt") + || fileName.endsWith(".md") + || fileName.endsWith(".pdf")) { // 用于解析(使用FileInputStream避免file.toPath()在Linux非UTF-8环境下中文文件名报错) try (InputStream isForParsing = new FileInputStream(file)) { - // 使用 Tika 自动检测 MIME 类型 - String fileName = file.getName().toLowerCase(); - //后缀 - String ext = FilenameUtils.getExtension(fileName); - if (fileName.endsWith(".txt") - || fileName.endsWith(".md") - || fileName.endsWith(".pdf")) { - return extractByTika(isForParsing); - //update-begin---author:wangshuai---date:2026-01-09---for:【QQYUN-14261】【AI】AI助手,支持多模态能力- 文档--- - } else if (FILE_SUFFIX.contains(ext.toLowerCase())) { - return parseDocExcelPdfUsingApachePoi(file); - //update-end---author:wangshuai---date:2026-01-09---for:【QQYUN-14261】【AI】AI助手,支持多模态能力- 文档--- - } else { - throw new IllegalArgumentException("不支持的文件格式: " + FilenameUtils.getExtension(fileName)); - } - } catch (IOException e) { + return extractByTika(isForParsing); + } catch (IOException e) { throw new RuntimeException(e); } + //update-begin---author:wangshuai---date:2026-01-09---for:【QQYUN-14261】【AI】AI助手,支持多模态能力- 文档--- + } else if (FILE_SUFFIX.contains(ext.toLowerCase())) { + return parseDocExcelPdfUsingApachePoi(file); + //update-end---author:wangshuai---date:2026-01-09---for:【QQYUN-14261】【AI】AI助手,支持多模态能力- 文档--- + } else { + throw new IllegalArgumentException("不支持的文件格式: " + FilenameUtils.getExtension(fileName)); } }