From 919b652a066ed6ee8480e177d3866b4be49eb448 Mon Sep 17 00:00:00 2001 From: kuaifan Date: Wed, 19 Mar 2025 22:49:03 +0800 Subject: [PATCH] =?UTF-8?q?perf:=20=E4=BC=98=E5=8C=96AI=E8=A7=A3=E6=9E=90?= =?UTF-8?q?=E6=96=87=E4=BB=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/Module/TextExtractor.php | 49 +++++++--------------- app/Tasks/BotReceiveMsgTask.php | 74 +++++++++++++++++++++------------ 2 files changed, 63 insertions(+), 60 deletions(-) diff --git a/app/Module/TextExtractor.php b/app/Module/TextExtractor.php index aac73d03d..31f225ec3 100644 --- a/app/Module/TextExtractor.php +++ b/app/Module/TextExtractor.php @@ -3,7 +3,6 @@ namespace App\Module; use Exception; -use Illuminate\Support\Facades\Log; use PhpOffice\PhpWord\IOFactory; use Smalot\PdfParser\Parser; @@ -19,25 +18,17 @@ class TextExtractor public function extractText(string $filePath): string { if (!file_exists($filePath)) { - throw new Exception("文件不存在: {$filePath}"); + throw new Exception("File does not exist: {$filePath}"); } $fileExtension = strtolower(pathinfo($filePath, PATHINFO_EXTENSION)); - try { - return match($fileExtension) { - 'pdf' => $this->extractFromPDF($filePath), - 'docx' => $this->extractFromDOCX($filePath), - 'ipynb' => $this->extractFromIPYNB($filePath), - default => $this->extractFromOtherFile($filePath), - }; - } catch (Exception $e) { - Log::error('文本提取失败', [ - 'file' => $filePath, - 'error' => $e->getMessage() - ]); - throw $e; - } + return match($fileExtension) { + 'pdf' => $this->extractFromPDF($filePath), + 'docx' => $this->extractFromDOCX($filePath), + 'ipynb' => $this->extractFromIPYNB($filePath), + default => $this->extractFromOtherFile($filePath), + }; } /** @@ -55,11 +46,7 @@ class TextExtractor return $pdf->getText(); } catch (Exception $e) { - Log::error('PDF解析失败', [ - 'file' => $filePath, - 'error' => $e->getMessage() - ]); - throw new Exception("PDF文本提取失败: " . $e->getMessage()); + throw new Exception("PDF text extraction failed: " . $e->getMessage()); } } @@ -99,7 +86,7 @@ class TextExtractor $notebook = json_decode($content, true); if (json_last_error() !== JSON_ERROR_NONE) { - throw new Exception("IPYNB文件解析失败: " . json_last_error_msg()); + throw new Exception("IPYNB file parsing failed: " . json_last_error_msg()); } $extractedText = ''; @@ -127,7 +114,7 @@ class TextExtractor protected function extractFromOtherFile(string $filePath): string { if ($this->isBinaryFile($filePath)) { - throw new Exception("无法读取该类型文件的文本内容"); + throw new Exception("Unable to read the text content of this type of file"); } return file_get_contents($filePath); @@ -158,25 +145,21 @@ class TextExtractor * 获取文件内容 * @param $filePath * @param float|int $maxSize 最大文件大小,单位字节,默认300KB - * @return string + * @return array */ public static function getFileContent($filePath, float|int $maxSize = 300 * 1024) { if (!file_exists($filePath) || !is_file($filePath)) { - return "(Failed to read contents of {$filePath})"; + return Base::retError("Failed to read contents of {$filePath}"); } if (filesize($filePath) > $maxSize) { - return "(File size exceeds " . Base::readableBytes($maxSize) . ", unable to display content)"; + return Base::retError("File size exceeds " . Base::readableBytes($maxSize) . ", unable to display content"); } - $te = new self(); try { - $isBinary = $te->isBinaryFile($filePath); - if ($isBinary) { - return "(Binary file, unable to display content)"; - } - return $te->extractText($filePath); + $extractor = new self(); + return Base::retSuccess("success", $extractor->extractText($filePath)); } catch (Exception $e) { - return "(Failed to read contents of {$filePath}: {$e->getMessage()})"; + return Base::retError($e->getMessage()); } } } diff --git a/app/Tasks/BotReceiveMsgTask.php b/app/Tasks/BotReceiveMsgTask.php index 7ee9f8faf..8b78ba68d 100644 --- a/app/Tasks/BotReceiveMsgTask.php +++ b/app/Tasks/BotReceiveMsgTask.php @@ -17,6 +17,7 @@ use App\Module\Doo; use App\Module\Ihttp; use App\Module\TextExtractor; use Carbon\Carbon; +use Exception; use League\HTMLToMarkdown\HtmlConverter; use DB; @@ -90,8 +91,16 @@ class BotReceiveMsgTask extends AbstractTask } // 提取指令 - $command = $this->extractCommand($msg, $botUser->isAiBot(), $this->mention); - if (empty($command)) { + try { + $command = $this->extractCommand($msg, $botUser->isAiBot(), $this->mention); + if (empty($command)) { + return; + } + } catch (Exception $e) { + WebSocketDialogMsg::sendMsg(null, $msg->dialog_id, 'template', [ + 'type' => 'content', + 'content' => $e->getMessage() ?: "指令解析失败。", + ], $botUser->userid, false, false, true); // todo 未能在任务end事件来发送任务 return; } @@ -473,14 +482,20 @@ class BotReceiveMsgTask extends AbstractTask if ($replyMsg) { switch ($replyMsg->type) { case 'text': - $replyCommand = $this->extractCommand($replyMsg, true); - if ($replyCommand) { - $replyCommand = Base::cutStr($replyCommand, 20000); + try { + $replyCommand = $this->extractCommand($replyMsg, true); + } catch (Exception) { + $errorContent = "引用消息解析失败。"; } break; case 'file': $msgData = Base::json2array($replyMsg->getRawOriginal('msg')); - $replyCommand = TextExtractor::getFileContent(public_path($msgData['path'])); + $fileResult = TextExtractor::getFileContent(public_path($msgData['path'])); + if (Base::isError($fileResult)) { + $errorContent = $fileResult['msg']; + } else { + $replyCommand = $fileResult['data']; + } break; } } @@ -581,6 +596,7 @@ class BotReceiveMsgTask extends AbstractTask * @param bool $isAiBot * @param bool $mention * @return string + * @throws Exception */ private function extractCommand(WebSocketDialogMsg $msg, bool $isAiBot = false, bool $mention = false) { @@ -608,13 +624,12 @@ class BotReceiveMsgTask extends AbstractTask if (preg_match_all("/(.*?)<\/span>/", $original, $match)) { $taskIds = Base::newIntval($match[1]); foreach ($taskIds as $index => $taskId) { - $taskName = addslashes($match[2][$index]) . " (ID:{$taskId})"; - $taskContext = "任务状态:不存在或已删除"; $taskInfo = ProjectTask::with(['content'])->whereId($taskId)->first(); - if ($taskInfo) { - $taskName = addslashes($taskInfo->name) . " (ID:{$taskId})"; - $taskContext = implode("\n", $taskInfo->AIContext()); + if (!$taskInfo) { + throw new Exception("任务不存在或已被删除"); } + $taskName = addslashes($taskInfo->name) . " (ID:{$taskId})"; + $taskContext = implode("\n", $taskInfo->AIContext()); $contents[] = "\n{$taskContext}\n"; $original = str_replace($match[0][$index], "'{$taskName}' (see below for task_content tag)", $original); } @@ -628,28 +643,31 @@ class BotReceiveMsgTask extends AbstractTask $pathContent = null; // 文件 if (preg_match("/single\/file\/(.*?)$/", $urlPath, $fileMatch)) { - $pathTag = "file_content"; - $pathName = addslashes($match[3][$index]); - $pathContent = "文件状态:不存在或已删除"; $fileInfo = FileContent::idOrCodeToContent($fileMatch[1]); - if ($fileInfo && isset($fileInfo->content['url'])) { - $urlPath = public_path($fileInfo->content['url']); - if (file_exists($urlPath)) { - $pathName .= " (ID:{$fileInfo->id})"; - $pathContent = TextExtractor::getFileContent($urlPath); - } + if (!$fileInfo || !isset($fileInfo->content['url'])) { + throw new Exception("文件不存在或已被删除"); } + $urlPath = public_path($fileInfo->content['url']); + if (!file_exists($urlPath)) { + throw new Exception("文件不存在或已被删除"); + } + $fileResult = TextExtractor::getFileContent($urlPath); + if (Base::isError($fileResult)) { + throw new Exception("文件读取失败:" . $fileResult['msg']); + } + $pathTag = "file_content"; + $pathName = addslashes($match[3][$index]) . " (ID:{$fileInfo->id})"; + $pathContent = $fileResult['data']; } // 报告 elseif (preg_match("/single\/report\/detail\/(.*?)$/", $urlPath, $reportMatch)) { - $pathTag = "report_content"; - $pathName = addslashes($match[3][$index]); - $pathContent = "报告状态:不存在或已删除"; $reportInfo = Report::idOrCodeToContent($reportMatch[1]); - if ($reportInfo) { - $pathName .= " (ID:{$reportInfo->id})"; - $pathContent = $reportInfo->content; + if (!$reportInfo) { + throw new Exception("报告不存在或已被删除"); } + $pathTag = "report_content"; + $pathName = addslashes($match[3][$index]) . " (ID:{$reportInfo->id})"; + $pathContent = $reportInfo->content; } if ($pathTag) { $contents[] = "<{$pathTag} path=\"{$pathName}\">\n{$pathContent}\n"; @@ -662,7 +680,9 @@ class BotReceiveMsgTask extends AbstractTask try { $converter = new HtmlConverter(); $original = $converter->convert($original); - } catch (\Exception) { } + } catch (\Exception) { + throw new Exception("Failed to convert HTML to Markdown"); + } } if ($contents) { // 添加tag内容