perf: 优化全文搜索

This commit is contained in:
kuaifan 2025-04-17 22:36:32 +08:00
parent fe84f812e7
commit f258dcfca2
3 changed files with 66 additions and 36 deletions

View File

@ -7,7 +7,7 @@ use App\Module\ZincSearch\ZincSearchKeyValue;
use App\Module\ZincSearch\ZincSearchUserMsg; use App\Module\ZincSearch\ZincSearchUserMsg;
use Illuminate\Console\Command; use Illuminate\Console\Command;
class SyncDialogUserMsgToZincSearch extends Command class SyncUserMsgToZincSearch extends Command
{ {
/** /**
* 更新数据 * 更新数据
@ -18,7 +18,7 @@ class SyncDialogUserMsgToZincSearch extends Command
* --c: 清除索引 * --c: 清除索引
*/ */
protected $signature = 'zinc:sync-dialog-user-msg {--f} {--i} {--c} {--batch=1000}'; protected $signature = 'zinc:sync-user-msg {--f} {--i} {--c} {--batch=1000}';
protected $description = '同步聊天会话用户和消息到 ZincSearch'; protected $description = '同步聊天会话用户和消息到 ZincSearch';
/** /**

View File

@ -238,6 +238,15 @@ class ZincSearchBase
return (new self())->request("/api/{$index}/_search", $body); return (new self())->request("/api/{$index}/_search", $body);
} }
/**
* 兼容ES查询文档
*/
public static function elasticSearch($index, $searchParams): array
{
$body = json_encode($searchParams);
return (new self())->request("/es/{$index}/_search", $body);
}
/** /**
* 多索引查询 * 多索引查询
*/ */

View File

@ -47,10 +47,9 @@ class ZincSearchUserMsg
if (!ZincSearchBase::indexExists(self::$indexName)) { if (!ZincSearchBase::indexExists(self::$indexName)) {
$mappings = [ $mappings = [
'properties' => [ 'properties' => [
// 共用字段 // 关联字段
'dialog_id' => ['type' => 'keyword', 'index' => true], '_id' => ['type' => 'keyword', 'index' => true],
'created_at' => ['type' => 'date', 'index' => true], '_dialog_userid' => ['type' => 'keyword', 'index' => true],
'updated_at' => ['type' => 'date', 'index' => true],
// dialog_users 字段 // dialog_users 字段
'userid' => ['type' => 'keyword', 'index' => true], 'userid' => ['type' => 'keyword', 'index' => true],
@ -63,14 +62,13 @@ class ZincSearchUserMsg
// dialog_msgs 字段 // dialog_msgs 字段
'msg_id' => ['type' => 'keyword', 'index' => true], 'msg_id' => ['type' => 'keyword', 'index' => true],
'dialog_id' => ['type' => 'keyword', 'index' => true],
'sender_userid' => ['type' => 'keyword', 'index' => true], 'sender_userid' => ['type' => 'keyword', 'index' => true],
'msg_type' => ['type' => 'keyword', 'index' => true], 'msg_type' => ['type' => 'keyword', 'index' => true],
'key' => ['type' => 'text', 'index' => true], 'key' => ['type' => 'text', 'index' => true],
'bot' => ['type' => 'numeric', 'index' => true], 'bot' => ['type' => 'numeric', 'index' => true],
'created_at' => ['type' => 'date', 'index' => true],
// 关联字段 'updated_at' => ['type' => 'date', 'index' => true],
'userid_msg_id' => ['type' => 'keyword', 'index' => true],
'userid_dialog_id' => ['type' => 'keyword', 'index' => true],
] ]
]; ];
$result = ZincSearchBase::createIndex(self::$indexName, $mappings); $result = ZincSearchBase::createIndex(self::$indexName, $mappings);
@ -115,19 +113,25 @@ class ZincSearchUserMsg
*/ */
public static function searchByKeyword(string $userid, string $keyword, int $from = 0, int $size = 20): array public static function searchByKeyword(string $userid, string $keyword, int $from = 0, int $size = 20): array
{ {
// 构建复杂的搜索查询
$searchParams = [ $searchParams = [
'search_type' => 'querystring',
'query' => [ 'query' => [
'term' => "+userid:{$userid} +key:*{$keyword}*" 'bool' => [
'must' => [
['term' => ['userid' => $userid]],
['term' => ['bot' => 0]],
['match_phrase' => ['key' => $keyword]]
]
]
], ],
'from' => $from, 'from' => $from,
'max_results' => $size, 'size' => $size,
'sort_fields' => ["updated_at:desc"] 'sort' => [
['updated_at' => 'desc']
]
]; ];
try { try {
return ZincSearchBase::advancedSearch(self::$indexName, $searchParams); return ZincSearchBase::elasticSearch(self::$indexName, $searchParams);
} catch (\Exception $e) { } catch (\Exception $e) {
Log::error('搜索对话消息失败: ' . $e->getMessage()); Log::error('搜索对话消息失败: ' . $e->getMessage());
return [ return [
@ -149,9 +153,9 @@ class ZincSearchUserMsg
* @param WebSocketDialogUser $dialogUser * @param WebSocketDialogUser $dialogUser
* @return string * @return string
*/ */
private static function generateUseridMsgId(WebSocketDialogMsg $dialogMsg, WebSocketDialogUser $dialogUser): string private static function generateDocId(WebSocketDialogMsg $dialogMsg, WebSocketDialogUser $dialogUser): string
{ {
return "{$dialogUser->userid}_{$dialogMsg->id}"; return "{$dialogMsg->id}_{$dialogUser->userid}";
} }
/** /**
@ -160,9 +164,9 @@ class ZincSearchUserMsg
* @param WebSocketDialogUser $dialogUser * @param WebSocketDialogUser $dialogUser
* @return string * @return string
*/ */
private static function generateUseridDialogId(WebSocketDialogUser $dialogUser): string private static function generateDialogUserid(WebSocketDialogUser $dialogUser): string
{ {
return "{$dialogUser->userid}_{$dialogUser->dialog_id}"; return "{$dialogUser->dialog_id}_{$dialogUser->userid}";
} }
/** /**
@ -175,9 +179,8 @@ class ZincSearchUserMsg
private static function generateMsgFormat(WebSocketDialogMsg $dialogMsg, WebSocketDialogUser $dialogUser): array private static function generateMsgFormat(WebSocketDialogMsg $dialogMsg, WebSocketDialogUser $dialogUser): array
{ {
return [ return [
'dialog_id' => $dialogMsg->dialog_id, '_id' => self::generateDocId($dialogMsg, $dialogUser),
'created_at' => $dialogMsg->created_at, '_dialog_userid' => self::generateDialogUserid($dialogUser),
'updated_at' => $dialogMsg->updated_at,
'userid' => $dialogUser->userid, 'userid' => $dialogUser->userid,
'top_at' => $dialogUser->top_at, 'top_at' => $dialogUser->top_at,
@ -188,13 +191,13 @@ class ZincSearchUserMsg
'color' => $dialogUser->color, 'color' => $dialogUser->color,
'msg_id' => $dialogMsg->id, 'msg_id' => $dialogMsg->id,
'dialog_id' => $dialogMsg->dialog_id,
'sender_userid' => $dialogMsg->userid, 'sender_userid' => $dialogMsg->userid,
'msg_type' => $dialogMsg->type, 'msg_type' => $dialogMsg->type,
'key' => $dialogMsg->key, 'key' => $dialogMsg->key,
'bot' => $dialogMsg->bot ? 1 : 0, 'bot' => $dialogMsg->bot ? 1 : 0,
'created_at' => $dialogMsg->created_at,
'userid_msg_id' => self::generateUseridMsgId($dialogMsg, $dialogUser), 'updated_at' => $dialogMsg->updated_at,
'userid_dialog_id' => self::generateUseridDialogId($dialogUser),
]; ];
} }
@ -220,6 +223,14 @@ class ZincSearchUserMsg
$docs = []; $docs = [];
foreach ($dialogUsers as $dialogUser) { foreach ($dialogUsers as $dialogUser) {
if (empty($dialogMsg->key)) {
// 如果消息没有关键词,跳过
continue;
}
if ($dialogUser->userid == 0) {
// 跳过系统用户
continue;
}
$docs[] = self::generateMsgFormat($dialogMsg, $dialogUser); $docs[] = self::generateMsgFormat($dialogMsg, $dialogUser);
} }
@ -270,11 +281,21 @@ class ZincSearchUserMsg
// 为每条消息准备所有相关用户的文档 // 为每条消息准备所有相关用户的文档
foreach ($dialogMsgs as $dialogMsg) { foreach ($dialogMsgs as $dialogMsg) {
if (isset($userDialogs[$dialogMsg->dialog_id])) { if (!isset($userDialogs[$dialogMsg->dialog_id])) {
foreach ($userDialogs[$dialogMsg->dialog_id] as $dialogUser) { // 如果该会话没有用户,跳过
$docs[] = self::generateMsgFormat($dialogMsg, $dialogUser); continue;
$count++; }
foreach ($userDialogs[$dialogMsg->dialog_id] as $dialogUser) {
if (empty($dialogMsg->key)) {
// 如果消息没有关键词,跳过
continue;
} }
if ($dialogUser->userid == 0) {
// 跳过系统用户
continue;
}
$docs[] = self::generateMsgFormat($dialogMsg, $dialogUser);
$count++;
} }
} }
@ -297,7 +318,7 @@ class ZincSearchUserMsg
*/ */
public static function deleteMsg(WebSocketDialogMsg $dialogMsg): int public static function deleteMsg(WebSocketDialogMsg $dialogMsg): int
{ {
$batchSize = 1000; // 每批处理的文档数量 $batchSize = 500; // 每批处理的文档数量
$totalDeleted = 0; // 总共删除的文档数量 $totalDeleted = 0; // 总共删除的文档数量
$from = 0; $from = 0;
@ -308,7 +329,7 @@ class ZincSearchUserMsg
'search_type' => 'term', 'search_type' => 'term',
'query' => [ 'query' => [
'field' => 'msg_id', 'field' => 'msg_id',
'term' => $dialogMsg->id 'term' => (string) $dialogMsg->id
], ],
'from' => $from, 'from' => $from,
'max_results' => $batchSize 'max_results' => $batchSize
@ -355,7 +376,7 @@ class ZincSearchUserMsg
*/ */
public static function syncUser(WebSocketDialogUser $dialogUser): void public static function syncUser(WebSocketDialogUser $dialogUser): void
{ {
$batchSize = 1000; // 每批处理的文档数量 $batchSize = 500; // 每批处理的文档数量
$lastId = 0; // 上次处理的最后ID $lastId = 0; // 上次处理的最后ID
do { do {
@ -398,7 +419,7 @@ class ZincSearchUserMsg
*/ */
public static function deleteUser(WebSocketDialogUser $dialogUser): int public static function deleteUser(WebSocketDialogUser $dialogUser): int
{ {
$batchSize = 1000; // 每批处理的文档数量 $batchSize = 500; // 每批处理的文档数量
$totalDeleted = 0; // 总共删除的文档数量 $totalDeleted = 0; // 总共删除的文档数量
$from = 0; $from = 0;
@ -408,8 +429,8 @@ class ZincSearchUserMsg
$result = ZincSearchBase::advancedSearch(self::$indexName, [ $result = ZincSearchBase::advancedSearch(self::$indexName, [
'search_type' => 'term', 'search_type' => 'term',
'query' => [ 'query' => [
'field' => 'userid_dialog_id', 'field' => '_dialog_userid',
'term' => self::generateUseridDialogId($dialogUser), 'term' => self::generateDialogUserid($dialogUser),
], ],
'from' => $from, 'from' => $from,
'max_results' => $batchSize 'max_results' => $batchSize