dootask/app/Module/Manticore/ManticoreBase.php
2026-01-04 13:30:03 +00:00

2171 lines
65 KiB
PHP
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
namespace App\Module\Manticore;
use App\Module\Apps;
use App\Module\Base;
use App\Module\AI;
use PDO;
use PDOException;
use Illuminate\Support\Facades\Log;
/**
* Manticore Search 基础类
*
* Manticore Search 兼容 MySQL 协议,可以直接使用 PDO 连接
* 默认端口 9306 为 MySQL 协议端口
*/
class ManticoreBase
{
private static ?PDO $pdo = null;
private static bool $initialized = false;
private string $host;
private int $port;
/**
* 构造函数
*/
public function __construct()
{
$this->host = env('MANTICORE_HOST', 'manticore');
$this->port = (int) env('MANTICORE_PORT', 9306);
}
/**
* 获取 PDO 连接
*/
private function getConnection(): ?PDO
{
if (!Apps::isInstalled("manticore")) {
return null;
}
if (self::$pdo === null) {
try {
// Manticore 使用 MySQL 协议,不需要用户名密码
$dsn = "mysql:host={$this->host};port={$this->port}";
$pdo = new PDO($dsn, '', '', [
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
PDO::ATTR_DEFAULT_FETCH_MODE => PDO::FETCH_ASSOC,
PDO::ATTR_TIMEOUT => 30,
]);
// 初始化表结构
if (!self::$initialized) {
$this->initializeTables($pdo);
self::$initialized = true;
}
self::$pdo = $pdo;
} catch (PDOException $e) {
Log::error('Manticore connection failed: ' . $e->getMessage());
return null;
}
}
return self::$pdo;
}
/**
* 初始化表结构
*/
private function initializeTables(PDO $pdo): void
{
try {
// 创建文件向量表
$pdo->exec("
CREATE TABLE IF NOT EXISTS file_vectors (
id BIGINT,
file_id BIGINT,
userid BIGINT,
pshare BIGINT,
file_name TEXT,
file_type STRING,
file_ext STRING,
content TEXT,
allowed_users MULTI,
content_vector float_vector knn_type='hnsw' knn_dims='1536' hnsw_similarity='cosine'
) charset_table='chinese' morphology='icu_chinese'
");
// 创建键值存储表
$pdo->exec("
CREATE TABLE IF NOT EXISTS key_values (
id BIGINT,
k STRING,
v TEXT
)
");
// 创建用户向量表
$pdo->exec("
CREATE TABLE IF NOT EXISTS user_vectors (
id BIGINT,
userid BIGINT,
nickname TEXT,
email STRING,
profession TEXT,
tags TEXT,
introduction TEXT,
content_vector float_vector knn_type='hnsw' knn_dims='1536' hnsw_similarity='cosine'
) charset_table='chinese' morphology='icu_chinese'
");
// 创建项目向量表
$pdo->exec("
CREATE TABLE IF NOT EXISTS project_vectors (
id BIGINT,
project_id BIGINT,
userid BIGINT,
personal INTEGER,
project_name TEXT,
project_desc TEXT,
allowed_users MULTI,
content_vector float_vector knn_type='hnsw' knn_dims='1536' hnsw_similarity='cosine'
) charset_table='chinese' morphology='icu_chinese'
");
// 创建任务向量表
$pdo->exec("
CREATE TABLE IF NOT EXISTS task_vectors (
id BIGINT,
task_id BIGINT,
project_id BIGINT,
userid BIGINT,
visibility INTEGER,
task_name TEXT,
task_desc TEXT,
task_content TEXT,
allowed_users MULTI,
content_vector float_vector knn_type='hnsw' knn_dims='1536' hnsw_similarity='cosine'
) charset_table='chinese' morphology='icu_chinese'
");
// 创建消息向量表
$pdo->exec("
CREATE TABLE IF NOT EXISTS msg_vectors (
id BIGINT,
msg_id BIGINT,
dialog_id BIGINT,
userid BIGINT,
msg_type STRING,
content TEXT,
allowed_users MULTI,
created_at BIGINT,
content_vector float_vector knn_type='hnsw' knn_dims='1536' hnsw_similarity='cosine'
) charset_table='chinese' morphology='icu_chinese'
");
Log::info('Manticore tables initialized successfully');
} catch (PDOException $e) {
Log::warning('Manticore initialization warning: ' . $e->getMessage());
// 不抛出异常,表可能已存在
}
}
/**
* 重置连接(在长连接环境中使用)
*/
public static function resetConnection(): void
{
self::$pdo = null;
self::$initialized = false;
}
/**
* 检查是否已安装
*/
public static function isInstalled(): bool
{
return Apps::isInstalled("manticore");
}
/**
* 执行 SQL不返回结果
*
* @param string $sql SQL语句
* @param array $params 参数
* @return bool 是否成功
*/
public function execute(string $sql, array $params = []): bool
{
$pdo = $this->getConnection();
if (!$pdo) {
return false;
}
try {
$stmt = $pdo->prepare($sql);
$this->bindParams($stmt, $params);
return $stmt->execute();
} catch (PDOException $e) {
Log::error('Manticore execute error: ' . $e->getMessage(), [
'sql' => $sql,
'params' => $params
]);
return false;
}
}
/**
* 执行 SQL 并返回影响行数
*
* @param string $sql SQL语句
* @param array $params 参数
* @return int 影响行数,-1 表示失败
*/
public function executeWithRowCount(string $sql, array $params = []): int
{
$pdo = $this->getConnection();
if (!$pdo) {
return -1;
}
try {
$stmt = $pdo->prepare($sql);
$this->bindParams($stmt, $params);
$stmt->execute();
return $stmt->rowCount();
} catch (PDOException $e) {
Log::error('Manticore execute error: ' . $e->getMessage(), [
'sql' => $sql,
'params' => $params
]);
return -1;
}
}
/**
* 查询并返回结果
*
* @param string $sql SQL语句
* @param array $params 参数
* @return array 查询结果
*/
public function query(string $sql, array $params = []): array
{
$pdo = $this->getConnection();
if (!$pdo) {
return [];
}
try {
$stmt = $pdo->prepare($sql);
$this->bindParams($stmt, $params);
$stmt->execute();
return $this->convertNumericTypes($stmt->fetchAll());
} catch (PDOException $e) {
Log::error('Manticore query error: ' . $e->getMessage(), [
'sql' => $sql,
'params' => $params
]);
return [];
}
}
/**
* 查询单行
*
* @param string $sql SQL语句
* @param array $params 参数
* @return array|null 单行结果
*/
public function queryOne(string $sql, array $params = []): ?array
{
$pdo = $this->getConnection();
if (!$pdo) {
return null;
}
try {
$stmt = $pdo->prepare($sql);
$this->bindParams($stmt, $params);
$stmt->execute();
$result = $stmt->fetch();
return $result ? $this->convertNumericTypesRow($result) : null;
} catch (PDOException $e) {
Log::error('Manticore queryOne error: ' . $e->getMessage(), [
'sql' => $sql,
'params' => $params
]);
return null;
}
}
/**
* 转换结果集中的数值类型
* PDO 默认将 BIGINT 等数值类型返回为字符串,这里统一转换
*
* @param array $rows 结果集
* @return array 转换后的结果集
*/
private function convertNumericTypes(array $rows): array
{
return array_map([$this, 'convertNumericTypesRow'], $rows);
}
/**
* 转换单行数据中的数值类型
*
* @param array $row 单行数据
* @return array 转换后的数据
*/
private function convertNumericTypesRow(array $row): array
{
foreach ($row as $key => $value) {
if (is_string($value) && is_numeric($value) && !str_contains($value, '.')) {
$row[$key] = (int) $value;
}
}
return $row;
}
/**
* 绑定参数到预处理语句
* Manticore 对参数类型敏感,需要明确指定 INT 类型
* 注意:只有原生 int 类型才绑定为 PARAM_INT字符串形式的数字保持为字符串
*
* @param \PDOStatement $stmt 预处理语句
* @param array $params 参数数组
*/
private function bindParams(\PDOStatement $stmt, array $params): void
{
$index = 1;
foreach ($params as $value) {
if (is_int($value)) {
// 只有原生整数类型才绑定为 INT
$stmt->bindValue($index, $value, PDO::PARAM_INT);
} elseif (is_float($value)) {
// 浮点数作为字符串传递
$stmt->bindValue($index, (string)$value, PDO::PARAM_STR);
} elseif (is_null($value)) {
$stmt->bindValue($index, null, PDO::PARAM_NULL);
} else {
// 字符串(包括数字字符串)保持为字符串
$stmt->bindValue($index, (string)$value, PDO::PARAM_STR);
}
$index++;
}
}
/**
* 转义 Manticore 全文搜索关键词
*
* @param string $keyword 原始关键词
* @return string 转义后的关键词
*/
public static function escapeMatch(string $keyword): string
{
// Manticore 特殊字符转义(完整列表)
// 参考: https://manual.manticoresearch.com/Searching/Full_text_matching/Escaping
$special = [
'\\', // 反斜杠(必须最先处理)
'(', ')', '[', ']', // 括号
'|', '-', '!', '@', '~', '^', '$', '*', '?', // 操作符
'"', '\'', // 引号
'&', '/', '=', '<', '>', ':', // 其他特殊字符
];
foreach ($special as $char) {
$keyword = str_replace($char, '\\' . $char, $keyword);
}
return $keyword;
}
// ==============================
// 文件向量相关方法
// ==============================
/**
* 全文搜索文件
*
* @param string $keyword 关键词
* @param int $userid 用户ID0表示不限制权限
* @param int $limit 返回数量
* @param int $offset 偏移量
* @return array 搜索结果
*/
public static function fullTextSearch(string $keyword, int $userid = 0, int $limit = 20, int $offset = 0): array
{
if (empty($keyword)) {
return [];
}
$instance = new self();
$escapedKeyword = self::escapeMatch($keyword);
if ($userid > 0) {
// 使用 MVA 权限过滤allowed_users = 0公开或 allowed_users = userid
$sql = "
SELECT
id,
file_id,
userid,
pshare,
file_name,
file_type,
file_ext,
content,
WEIGHT() as relevance
FROM file_vectors
WHERE MATCH('@(file_name,content) {$escapedKeyword}')
AND (allowed_users = 0 OR allowed_users = " . (int)$userid . ")
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
} else {
// 不限制权限
$sql = "
SELECT
id,
file_id,
userid,
pshare,
file_name,
file_type,
file_ext,
content,
WEIGHT() as relevance
FROM file_vectors
WHERE MATCH('@(file_name,content) {$escapedKeyword}')
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
}
return $instance->query($sql);
}
/**
* 向量相似度搜索
*
* @param array $queryVector 查询向量
* @param int $userid 用户ID0表示不限制权限
* @param int $limit 返回数量
* @return array 搜索结果
*/
public static function vectorSearch(array $queryVector, int $userid = 0, int $limit = 20): array
{
if (empty($queryVector)) {
return [];
}
$instance = new self();
$vectorStr = '(' . implode(',', $queryVector) . ')';
// KNN 搜索需要先获取更多结果,再在应用层过滤权限
// 因为 KNN 的 WHERE 条件在 Manticore 中有限制
$fetchLimit = $userid > 0 ? $limit * 5 : $limit;
$sql = "
SELECT
id,
file_id,
userid,
pshare,
file_name,
file_type,
file_ext,
content,
KNN_DIST() as distance
FROM file_vectors
WHERE KNN(content_vector, " . (int)$fetchLimit . ", {$vectorStr})
ORDER BY distance ASC
";
$results = $instance->query($sql);
// 转换 distance 为 similarity1 - distance 用于余弦距离)
foreach ($results as &$item) {
$item['similarity'] = 1 - ($item['distance'] ?? 0);
}
// MVA 权限过滤
if ($userid > 0 && !empty($results)) {
// 获取有权限的文件列表allowed_users 包含 0 或 userid
$allowedFileIds = $instance->query(
"SELECT file_id FROM file_vectors WHERE allowed_users = 0 OR allowed_users = ? LIMIT 100000",
[$userid]
);
$allowedIds = array_column($allowedFileIds, 'file_id');
$results = array_filter($results, function ($item) use ($allowedIds) {
return in_array($item['file_id'], $allowedIds);
});
$results = array_values($results);
}
return array_slice($results, 0, $limit);
}
/**
* 混合搜索(全文 + 向量,使用 RRF 融合)
*
* @param string $keyword 关键词
* @param array $queryVector 查询向量
* @param int $userid 用户ID0表示不限制权限
* @param int $limit 返回数量
* @param float $textWeight 全文搜索权重
* @param float $vectorWeight 向量搜索权重
* @return array 搜索结果
*/
public static function hybridSearch(
string $keyword,
array $queryVector,
int $userid = 0,
int $limit = 20,
float $textWeight = 0.5,
float $vectorWeight = 0.5
): array {
// 分别执行两种搜索(已包含权限过滤)
$textResults = self::fullTextSearch($keyword, $userid, 50, 0);
$vectorResults = !empty($queryVector)
? self::vectorSearch($queryVector, $userid, 50)
: [];
// 使用 RRF (Reciprocal Rank Fusion) 融合结果
$scores = [];
$items = [];
$k = 60; // RRF 常数
// 处理全文搜索结果
foreach ($textResults as $rank => $item) {
$fileId = $item['file_id'];
$scores[$fileId] = ($scores[$fileId] ?? 0) + $textWeight / ($k + $rank + 1);
$items[$fileId] = $item;
}
// 处理向量搜索结果
foreach ($vectorResults as $rank => $item) {
$fileId = $item['file_id'];
$scores[$fileId] = ($scores[$fileId] ?? 0) + $vectorWeight / ($k + $rank + 1);
if (!isset($items[$fileId])) {
$items[$fileId] = $item;
}
}
// 按融合分数排序
arsort($scores);
// 构建最终结果
$results = [];
$count = 0;
foreach ($scores as $fileId => $score) {
if ($count >= $limit) {
break;
}
$item = $items[$fileId];
$item['rrf_score'] = $score;
$results[] = $item;
$count++;
}
return $results;
}
/**
* 插入或更新文件向量
*
* @param array $data 文件数据,包含:
* - file_id: 文件ID
* - userid: 所有者ID
* - pshare: 共享文件夹ID
* - file_name: 文件名
* - file_type: 文件类型
* - file_ext: 文件扩展名
* - content: 文件内容
* - content_vector: 向量值
* - allowed_users: 有权限的用户ID数组0表示公开
* @return bool 是否成功
*/
public static function upsertFileVector(array $data): bool
{
$instance = new self();
$fileId = $data['file_id'] ?? 0;
if ($fileId <= 0) {
return false;
}
// 先尝试删除已存在的记录
$instance->execute("DELETE FROM file_vectors WHERE file_id = ?", [$fileId]);
// 构建 allowed_users MVA 值
$allowedUsers = $data['allowed_users'] ?? [];
$allowedUsersStr = !empty($allowedUsers) ? '(' . implode(',', array_map('intval', $allowedUsers)) . ')' : '()';
// 插入新记录
$vectorValue = $data['content_vector'] ?? null;
if ($vectorValue) {
$vectorValue = str_replace(['[', ']'], ['(', ')'], $vectorValue);
$sql = "INSERT INTO file_vectors
(id, file_id, userid, pshare, file_name, file_type, file_ext, content, allowed_users, content_vector)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, {$allowedUsersStr}, {$vectorValue})";
} else {
$sql = "INSERT INTO file_vectors
(id, file_id, userid, pshare, file_name, file_type, file_ext, content, allowed_users)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, {$allowedUsersStr})";
}
$params = [
$fileId,
$fileId,
$data['userid'] ?? 0,
$data['pshare'] ?? 0,
$data['file_name'] ?? '',
$data['file_type'] ?? '',
$data['file_ext'] ?? '',
$data['content'] ?? ''
];
return $instance->execute($sql, $params);
}
/**
* 更新文件的 allowed_users 权限列表
*
* @param int $fileId 文件ID
* @param array $userids 有权限的用户ID数组
* @return bool 是否成功
*/
public static function updateFileAllowedUsers(int $fileId, array $userids): bool
{
if ($fileId <= 0) {
return false;
}
$instance = new self();
$allowedUsersStr = !empty($userids) ? '(' . implode(',', array_map('intval', $userids)) . ')' : '()';
return $instance->execute(
"UPDATE file_vectors SET allowed_users = {$allowedUsersStr} WHERE file_id = ?",
[$fileId]
);
}
/**
* 删除文件向量
*
* @param int $fileId 文件ID
* @return bool 是否成功
*/
public static function deleteFileVector(int $fileId): bool
{
if ($fileId <= 0) {
return false;
}
$instance = new self();
return $instance->execute("DELETE FROM file_vectors WHERE file_id = ?", [$fileId]);
}
/**
* 批量删除文件向量
*
* @param array $fileIds 文件ID列表
* @return int 删除数量
*/
public static function batchDeleteFileVectors(array $fileIds): int
{
if (empty($fileIds)) {
return 0;
}
$instance = new self();
$placeholders = implode(',', array_map('intval', $fileIds));
return $instance->executeWithRowCount(
"DELETE FROM file_vectors WHERE file_id IN ({$placeholders})"
);
}
/**
* 批量更新文件的 pshare 值
*
* @param array $fileIds 文件ID列表
* @param int $pshare 新的 pshare 值
* @return int 更新数量
*/
public static function batchUpdatePshare(array $fileIds, int $pshare): int
{
if (empty($fileIds)) {
return 0;
}
// Manticore 不支持批量 UPDATE需要逐个更新
$instance = new self();
$count = 0;
foreach ($fileIds as $fileId) {
$result = $instance->execute(
"UPDATE file_vectors SET pshare = ? WHERE file_id = ?",
[$pshare, (int)$fileId]
);
if ($result) {
$count++;
}
}
return $count;
}
/**
* 清空所有文件向量
*
* @return bool 是否成功
*/
public static function clearAllFileVectors(): bool
{
$instance = new self();
return $instance->execute("TRUNCATE TABLE file_vectors");
}
/**
* 获取已索引的文件数量
*
* @return int 文件数量
*/
public static function getIndexedFileCount(): int
{
$instance = new self();
$result = $instance->queryOne("SELECT COUNT(*) as cnt FROM file_vectors");
return $result ? (int) $result['cnt'] : 0;
}
/**
* 获取最后索引的文件ID
*
* @return int 文件ID
*/
public static function getLastIndexedFileId(): int
{
$instance = new self();
$result = $instance->queryOne("SELECT MAX(file_id) as max_id FROM file_vectors");
return $result ? (int) ($result['max_id'] ?? 0) : 0;
}
// ==============================
// 用户向量方法
// ==============================
/**
* 用户全文搜索
*
* @param string $keyword 关键词
* @param int $limit 返回数量
* @param int $offset 偏移量
* @return array 搜索结果
*/
public static function userFullTextSearch(string $keyword, int $limit = 20, int $offset = 0): array
{
if (empty($keyword)) {
return [];
}
$instance = new self();
$escapedKeyword = self::escapeMatch($keyword);
$sql = "
SELECT
id,
userid,
nickname,
email,
profession,
tags,
introduction,
WEIGHT() as relevance
FROM user_vectors
WHERE MATCH('@(nickname,profession,tags,introduction) {$escapedKeyword}')
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
return $instance->query($sql);
}
/**
* 用户向量搜索
*
* @param array $queryVector 查询向量
* @param int $limit 返回数量
* @return array 搜索结果
*/
public static function userVectorSearch(array $queryVector, int $limit = 20): array
{
if (empty($queryVector)) {
return [];
}
$instance = new self();
$vectorStr = '(' . implode(',', $queryVector) . ')';
$sql = "
SELECT
id,
userid,
nickname,
email,
profession,
tags,
introduction,
KNN_DIST() as distance
FROM user_vectors
WHERE KNN(content_vector, " . (int)$limit . ", {$vectorStr})
ORDER BY distance ASC
";
$results = $instance->query($sql);
// 转换 distance 为 similarity
foreach ($results as &$item) {
$item['similarity'] = 1 - ($item['distance'] ?? 0);
}
return $results;
}
/**
* 用户混合搜索
*
* @param string $keyword 关键词
* @param array $queryVector 查询向量
* @param int $limit 返回数量
* @return array 搜索结果
*/
public static function userHybridSearch(string $keyword, array $queryVector, int $limit = 20): array
{
$textResults = self::userFullTextSearch($keyword, 50, 0);
$vectorResults = !empty($queryVector) ? self::userVectorSearch($queryVector, 50) : [];
// RRF 融合
$scores = [];
$items = [];
$k = 60;
foreach ($textResults as $rank => $item) {
$id = $item['userid'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
$items[$id] = $item;
}
foreach ($vectorResults as $rank => $item) {
$id = $item['userid'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
if (!isset($items[$id])) {
$items[$id] = $item;
}
}
arsort($scores);
$results = [];
$count = 0;
foreach ($scores as $id => $score) {
if ($count >= $limit) break;
$item = $items[$id];
$item['rrf_score'] = $score;
$results[] = $item;
$count++;
}
return $results;
}
/**
* 插入或更新用户向量
*
* @param array $data 用户数据
* @return bool 是否成功
*/
public static function upsertUserVector(array $data): bool
{
$instance = new self();
$userid = $data['userid'] ?? 0;
if ($userid <= 0) {
return false;
}
// 先删除已存在的记录
$instance->execute("DELETE FROM user_vectors WHERE userid = ?", [$userid]);
// 插入新记录
$vectorValue = $data['content_vector'] ?? null;
if ($vectorValue) {
$vectorValue = str_replace(['[', ']'], ['(', ')'], $vectorValue);
$sql = "INSERT INTO user_vectors
(id, userid, nickname, email, profession, tags, introduction, content_vector)
VALUES (?, ?, ?, ?, ?, ?, ?, {$vectorValue})";
$params = [
$userid,
$userid,
$data['nickname'] ?? '',
$data['email'] ?? '',
$data['profession'] ?? '',
$data['tags'] ?? '',
$data['introduction'] ?? ''
];
} else {
$sql = "INSERT INTO user_vectors
(id, userid, nickname, email, profession, tags, introduction)
VALUES (?, ?, ?, ?, ?, ?, ?)";
$params = [
$userid,
$userid,
$data['nickname'] ?? '',
$data['email'] ?? '',
$data['profession'] ?? '',
$data['tags'] ?? '',
$data['introduction'] ?? ''
];
}
return $instance->execute($sql, $params);
}
/**
* 删除用户向量
*
* @param int $userid 用户ID
* @return bool 是否成功
*/
public static function deleteUserVector(int $userid): bool
{
if ($userid <= 0) {
return false;
}
$instance = new self();
return $instance->execute("DELETE FROM user_vectors WHERE userid = ?", [$userid]);
}
/**
* 清空所有用户向量
*
* @return bool 是否成功
*/
public static function clearAllUserVectors(): bool
{
$instance = new self();
return $instance->execute("TRUNCATE TABLE user_vectors");
}
/**
* 获取已索引的用户数量
*
* @return int 用户数量
*/
public static function getIndexedUserCount(): int
{
$instance = new self();
$result = $instance->queryOne("SELECT COUNT(*) as cnt FROM user_vectors");
return $result ? (int) $result['cnt'] : 0;
}
// ==============================
// 项目向量方法
// ==============================
/**
* 项目全文搜索
*
* @param string $keyword 关键词
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @param int $offset 偏移量
* @return array 搜索结果
*/
public static function projectFullTextSearch(string $keyword, int $userid = 0, int $limit = 20, int $offset = 0): array
{
if (empty($keyword)) {
return [];
}
$instance = new self();
$escapedKeyword = self::escapeMatch($keyword);
if ($userid > 0) {
// 使用 MVA 权限过滤
$sql = "
SELECT
id,
project_id,
userid,
personal,
project_name,
project_desc,
WEIGHT() as relevance
FROM project_vectors
WHERE MATCH('@(project_name,project_desc) {$escapedKeyword}')
AND allowed_users = " . (int)$userid . "
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
} else {
$sql = "
SELECT
id,
project_id,
userid,
personal,
project_name,
project_desc,
WEIGHT() as relevance
FROM project_vectors
WHERE MATCH('@(project_name,project_desc) {$escapedKeyword}')
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
}
return $instance->query($sql);
}
/**
* 项目向量搜索
*
* @param array $queryVector 查询向量
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @return array 搜索结果
*/
public static function projectVectorSearch(array $queryVector, int $userid = 0, int $limit = 20): array
{
if (empty($queryVector)) {
return [];
}
$instance = new self();
$vectorStr = '(' . implode(',', $queryVector) . ')';
// KNN 搜索需要先获取更多结果,再在应用层过滤权限
$fetchLimit = $userid > 0 ? $limit * 5 : $limit;
$sql = "
SELECT
id,
project_id,
userid,
personal,
project_name,
project_desc,
KNN_DIST() as distance
FROM project_vectors
WHERE KNN(content_vector, " . (int)$fetchLimit . ", {$vectorStr})
ORDER BY distance ASC
";
$results = $instance->query($sql);
foreach ($results as &$item) {
$item['similarity'] = 1 - ($item['distance'] ?? 0);
}
// MVA 权限过滤
if ($userid > 0 && !empty($results)) {
$allowedProjectIds = $instance->query(
"SELECT project_id FROM project_vectors WHERE allowed_users = ? LIMIT 100000",
[$userid]
);
$allowedIds = array_column($allowedProjectIds, 'project_id');
$results = array_filter($results, function ($item) use ($allowedIds) {
return in_array($item['project_id'], $allowedIds);
});
$results = array_values($results);
}
return array_slice($results, 0, $limit);
}
/**
* 项目混合搜索
*
* @param string $keyword 关键词
* @param array $queryVector 查询向量
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @return array 搜索结果
*/
public static function projectHybridSearch(string $keyword, array $queryVector, int $userid = 0, int $limit = 20): array
{
$textResults = self::projectFullTextSearch($keyword, $userid, 50, 0);
$vectorResults = !empty($queryVector) ? self::projectVectorSearch($queryVector, $userid, 50) : [];
$scores = [];
$items = [];
$k = 60;
foreach ($textResults as $rank => $item) {
$id = $item['project_id'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
$items[$id] = $item;
}
foreach ($vectorResults as $rank => $item) {
$id = $item['project_id'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
if (!isset($items[$id])) {
$items[$id] = $item;
}
}
arsort($scores);
$results = [];
$count = 0;
foreach ($scores as $id => $score) {
if ($count >= $limit) break;
$item = $items[$id];
$item['rrf_score'] = $score;
$results[] = $item;
$count++;
}
return $results;
}
/**
* 插入或更新项目向量
*
* @param array $data 项目数据,包含:
* - project_id: 项目ID
* - userid: 创建者ID
* - personal: 是否个人项目
* - project_name: 项目名称
* - project_desc: 项目描述
* - content_vector: 向量值
* - allowed_users: 有权限的用户ID数组
* @return bool 是否成功
*/
public static function upsertProjectVector(array $data): bool
{
$instance = new self();
$projectId = $data['project_id'] ?? 0;
if ($projectId <= 0) {
return false;
}
// 先删除已存在的记录
$instance->execute("DELETE FROM project_vectors WHERE project_id = ?", [$projectId]);
// 构建 allowed_users MVA 值
$allowedUsers = $data['allowed_users'] ?? [];
$allowedUsersStr = !empty($allowedUsers) ? '(' . implode(',', array_map('intval', $allowedUsers)) . ')' : '()';
// 插入新记录
$vectorValue = $data['content_vector'] ?? null;
if ($vectorValue) {
$vectorValue = str_replace(['[', ']'], ['(', ')'], $vectorValue);
$sql = "INSERT INTO project_vectors
(id, project_id, userid, personal, project_name, project_desc, allowed_users, content_vector)
VALUES (?, ?, ?, ?, ?, ?, {$allowedUsersStr}, {$vectorValue})";
} else {
$sql = "INSERT INTO project_vectors
(id, project_id, userid, personal, project_name, project_desc, allowed_users)
VALUES (?, ?, ?, ?, ?, ?, {$allowedUsersStr})";
}
$params = [
$projectId,
$projectId,
$data['userid'] ?? 0,
$data['personal'] ?? 0,
$data['project_name'] ?? '',
$data['project_desc'] ?? ''
];
return $instance->execute($sql, $params);
}
/**
* 更新项目的 allowed_users 权限列表
*
* @param int $projectId 项目ID
* @param array $userids 有权限的用户ID数组
* @return bool 是否成功
*/
public static function updateProjectAllowedUsers(int $projectId, array $userids): bool
{
if ($projectId <= 0) {
return false;
}
$instance = new self();
$allowedUsersStr = !empty($userids) ? '(' . implode(',', array_map('intval', $userids)) . ')' : '()';
return $instance->execute(
"UPDATE project_vectors SET allowed_users = {$allowedUsersStr} WHERE project_id = ?",
[$projectId]
);
}
/**
* 删除项目向量
*
* @param int $projectId 项目ID
* @return bool 是否成功
*/
public static function deleteProjectVector(int $projectId): bool
{
if ($projectId <= 0) {
return false;
}
$instance = new self();
return $instance->execute("DELETE FROM project_vectors WHERE project_id = ?", [$projectId]);
}
/**
* 清空所有项目向量
*
* @return bool 是否成功
*/
public static function clearAllProjectVectors(): bool
{
$instance = new self();
return $instance->execute("TRUNCATE TABLE project_vectors");
}
/**
* 获取已索引的项目数量
*
* @return int 项目数量
*/
public static function getIndexedProjectCount(): int
{
$instance = new self();
$result = $instance->queryOne("SELECT COUNT(*) as cnt FROM project_vectors");
return $result ? (int) $result['cnt'] : 0;
}
// ==============================
// 任务向量方法
// ==============================
/**
* 任务全文搜索
*
* @param string $keyword 关键词
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @param int $offset 偏移量
* @return array 搜索结果
*/
public static function taskFullTextSearch(string $keyword, int $userid = 0, int $limit = 20, int $offset = 0): array
{
if (empty($keyword)) {
return [];
}
$instance = new self();
$escapedKeyword = self::escapeMatch($keyword);
if ($userid > 0) {
// 使用 MVA 权限过滤
$sql = "
SELECT
id,
task_id,
project_id,
userid,
visibility,
task_name,
task_desc,
task_content,
WEIGHT() as relevance
FROM task_vectors
WHERE MATCH('@(task_name,task_desc,task_content) {$escapedKeyword}')
AND allowed_users = " . (int)$userid . "
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
} else {
$sql = "
SELECT
id,
task_id,
project_id,
userid,
visibility,
task_name,
task_desc,
task_content,
WEIGHT() as relevance
FROM task_vectors
WHERE MATCH('@(task_name,task_desc,task_content) {$escapedKeyword}')
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
}
return $instance->query($sql);
}
/**
* 任务向量搜索
*
* @param array $queryVector 查询向量
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @return array 搜索结果
*/
public static function taskVectorSearch(array $queryVector, int $userid = 0, int $limit = 20): array
{
if (empty($queryVector)) {
return [];
}
$instance = new self();
$vectorStr = '(' . implode(',', $queryVector) . ')';
// KNN 搜索需要先获取更多结果,再在应用层过滤权限
$fetchLimit = $userid > 0 ? $limit * 5 : $limit;
$sql = "
SELECT
id,
task_id,
project_id,
userid,
visibility,
task_name,
task_desc,
task_content,
KNN_DIST() as distance
FROM task_vectors
WHERE KNN(content_vector, " . (int)$fetchLimit . ", {$vectorStr})
ORDER BY distance ASC
";
$results = $instance->query($sql);
foreach ($results as &$item) {
$item['similarity'] = 1 - ($item['distance'] ?? 0);
}
// MVA 权限过滤
if ($userid > 0 && !empty($results)) {
$allowedTaskIds = $instance->query(
"SELECT task_id FROM task_vectors WHERE allowed_users = ? LIMIT 100000",
[$userid]
);
$allowedIds = array_column($allowedTaskIds, 'task_id');
$results = array_filter($results, function ($item) use ($allowedIds) {
return in_array($item['task_id'], $allowedIds);
});
$results = array_values($results);
}
return array_slice($results, 0, $limit);
}
/**
* 任务混合搜索
*
* @param string $keyword 关键词
* @param array $queryVector 查询向量
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @return array 搜索结果
*/
public static function taskHybridSearch(string $keyword, array $queryVector, int $userid = 0, int $limit = 20): array
{
$textResults = self::taskFullTextSearch($keyword, $userid, 50, 0);
$vectorResults = !empty($queryVector) ? self::taskVectorSearch($queryVector, $userid, 50) : [];
$scores = [];
$items = [];
$k = 60;
foreach ($textResults as $rank => $item) {
$id = $item['task_id'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
$items[$id] = $item;
}
foreach ($vectorResults as $rank => $item) {
$id = $item['task_id'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
if (!isset($items[$id])) {
$items[$id] = $item;
}
}
arsort($scores);
$results = [];
$count = 0;
foreach ($scores as $id => $score) {
if ($count >= $limit) break;
$item = $items[$id];
$item['rrf_score'] = $score;
$results[] = $item;
$count++;
}
return $results;
}
/**
* 插入或更新任务向量
*
* @param array $data 任务数据,包含:
* - task_id: 任务ID
* - project_id: 项目ID
* - userid: 创建者ID
* - visibility: 可见性
* - task_name: 任务名称
* - task_desc: 任务描述
* - task_content: 任务内容
* - content_vector: 向量值
* - allowed_users: 有权限的用户ID数组
* @return bool 是否成功
*/
public static function upsertTaskVector(array $data): bool
{
$instance = new self();
$taskId = $data['task_id'] ?? 0;
if ($taskId <= 0) {
return false;
}
// 先删除已存在的记录
$instance->execute("DELETE FROM task_vectors WHERE task_id = ?", [$taskId]);
// 构建 allowed_users MVA 值
$allowedUsers = $data['allowed_users'] ?? [];
$allowedUsersStr = !empty($allowedUsers) ? '(' . implode(',', array_map('intval', $allowedUsers)) . ')' : '()';
// 插入新记录
$vectorValue = $data['content_vector'] ?? null;
if ($vectorValue) {
$vectorValue = str_replace(['[', ']'], ['(', ')'], $vectorValue);
$sql = "INSERT INTO task_vectors
(id, task_id, project_id, userid, visibility, task_name, task_desc, task_content, allowed_users, content_vector)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, {$allowedUsersStr}, {$vectorValue})";
} else {
$sql = "INSERT INTO task_vectors
(id, task_id, project_id, userid, visibility, task_name, task_desc, task_content, allowed_users)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, {$allowedUsersStr})";
}
$params = [
$taskId,
$taskId,
$data['project_id'] ?? 0,
$data['userid'] ?? 0,
$data['visibility'] ?? 1,
$data['task_name'] ?? '',
$data['task_desc'] ?? '',
$data['task_content'] ?? ''
];
return $instance->execute($sql, $params);
}
/**
* 更新任务的 allowed_users 权限列表
*
* @param int $taskId 任务ID
* @param array $userids 有权限的用户ID数组
* @return bool 是否成功
*/
public static function updateTaskAllowedUsers(int $taskId, array $userids): bool
{
if ($taskId <= 0) {
return false;
}
$instance = new self();
$allowedUsersStr = !empty($userids) ? '(' . implode(',', array_map('intval', $userids)) . ')' : '()';
return $instance->execute(
"UPDATE task_vectors SET allowed_users = {$allowedUsersStr} WHERE task_id = ?",
[$taskId]
);
}
/**
* 更新任务可见性
*
* @param int $taskId 任务ID
* @param int $visibility 可见性
* @return bool 是否成功
*/
public static function updateTaskVisibility(int $taskId, int $visibility): bool
{
if ($taskId <= 0) {
return false;
}
$instance = new self();
return $instance->execute(
"UPDATE task_vectors SET visibility = ? WHERE task_id = ?",
[$visibility, $taskId]
);
}
/**
* 删除任务向量
*
* @param int $taskId 任务ID
* @return bool 是否成功
*/
public static function deleteTaskVector(int $taskId): bool
{
if ($taskId <= 0) {
return false;
}
$instance = new self();
return $instance->execute("DELETE FROM task_vectors WHERE task_id = ?", [$taskId]);
}
/**
* 清空所有任务向量
*
* @return bool 是否成功
*/
public static function clearAllTaskVectors(): bool
{
$instance = new self();
return $instance->execute("TRUNCATE TABLE task_vectors");
}
/**
* 获取已索引的任务数量
*
* @return int 任务数量
*/
public static function getIndexedTaskCount(): int
{
$instance = new self();
$result = $instance->queryOne("SELECT COUNT(*) as cnt FROM task_vectors");
return $result ? (int) $result['cnt'] : 0;
}
// ==============================
// 消息向量方法
// ==============================
/**
* 消息全文搜索
*
* @param string $keyword 关键词
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @param int $offset 偏移量
* @param int $dialogId 对话ID0表示不限制
* @return array 搜索结果
*/
public static function msgFullTextSearch(string $keyword, int $userid = 0, int $limit = 20, int $offset = 0, int $dialogId = 0): array
{
if (empty($keyword)) {
return [];
}
$instance = new self();
$escapedKeyword = self::escapeMatch($keyword);
// 构建过滤条件
$conditions = ["MATCH('@content {$escapedKeyword}')"];
if ($userid > 0) {
$conditions[] = "allowed_users = " . (int)$userid;
}
if ($dialogId > 0) {
$conditions[] = "dialog_id = " . (int)$dialogId;
}
$whereClause = implode(' AND ', $conditions);
$sql = "
SELECT
id,
msg_id,
dialog_id,
userid,
msg_type,
content,
created_at,
WEIGHT() as relevance
FROM msg_vectors
WHERE {$whereClause}
ORDER BY relevance DESC
LIMIT " . (int)$limit . " OFFSET " . (int)$offset;
return $instance->query($sql);
}
/**
* 消息向量搜索
*
* @param array $queryVector 查询向量
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @param int $dialogId 对话ID0表示不限制
* @return array 搜索结果
*/
public static function msgVectorSearch(array $queryVector, int $userid = 0, int $limit = 20, int $dialogId = 0): array
{
if (empty($queryVector)) {
return [];
}
$instance = new self();
$vectorStr = '(' . implode(',', $queryVector) . ')';
// KNN 搜索需要先获取更多结果,再在应用层过滤权限和对话
$needFilter = $userid > 0 || $dialogId > 0;
$fetchLimit = $needFilter ? $limit * 5 : $limit;
$sql = "
SELECT
id,
msg_id,
dialog_id,
userid,
msg_type,
content,
created_at,
KNN_DIST() as distance
FROM msg_vectors
WHERE KNN(content_vector, " . (int)$fetchLimit . ", {$vectorStr})
ORDER BY distance ASC
";
$results = $instance->query($sql);
foreach ($results as &$item) {
$item['similarity'] = 1 - ($item['distance'] ?? 0);
}
// MVA 权限过滤
if ($userid > 0 && !empty($results)) {
$allowedMsgIds = $instance->query(
"SELECT msg_id FROM msg_vectors WHERE allowed_users = ? LIMIT 100000",
[$userid]
);
$allowedIds = array_column($allowedMsgIds, 'msg_id');
$results = array_filter($results, function ($item) use ($allowedIds) {
return in_array($item['msg_id'], $allowedIds);
});
$results = array_values($results);
}
// 对话过滤
if ($dialogId > 0 && !empty($results)) {
$results = array_filter($results, function ($item) use ($dialogId) {
return $item['dialog_id'] == $dialogId;
});
$results = array_values($results);
}
return array_slice($results, 0, $limit);
}
/**
* 消息混合搜索
*
* @param string $keyword 关键词
* @param array $queryVector 查询向量
* @param int $userid 用户ID权限过滤
* @param int $limit 返回数量
* @param int $dialogId 对话ID0表示不限制
* @return array 搜索结果
*/
public static function msgHybridSearch(string $keyword, array $queryVector, int $userid = 0, int $limit = 20, int $dialogId = 0): array
{
$textResults = self::msgFullTextSearch($keyword, $userid, 50, 0, $dialogId);
$vectorResults = !empty($queryVector) ? self::msgVectorSearch($queryVector, $userid, 50, $dialogId) : [];
$scores = [];
$items = [];
$k = 60;
foreach ($textResults as $rank => $item) {
$id = $item['msg_id'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
$items[$id] = $item;
}
foreach ($vectorResults as $rank => $item) {
$id = $item['msg_id'];
$scores[$id] = ($scores[$id] ?? 0) + 0.5 / ($k + $rank + 1);
if (!isset($items[$id])) {
$items[$id] = $item;
}
}
arsort($scores);
$results = [];
$count = 0;
foreach ($scores as $id => $score) {
if ($count >= $limit) break;
$item = $items[$id];
$item['rrf_score'] = $score;
$results[] = $item;
$count++;
}
return $results;
}
/**
* 插入或更新消息向量
*
* @param array $data 消息数据,包含:
* - msg_id: 消息ID
* - dialog_id: 对话ID
* - userid: 发送者ID
* - msg_type: 消息类型
* - content: 消息内容
* - content_vector: 向量值
* - allowed_users: 有权限的用户ID数组
* - created_at: 创建时间戳
* @return bool 是否成功
*/
public static function upsertMsgVector(array $data): bool
{
$instance = new self();
$msgId = $data['msg_id'] ?? 0;
if ($msgId <= 0) {
return false;
}
// 先删除已存在的记录
$instance->execute("DELETE FROM msg_vectors WHERE msg_id = ?", [$msgId]);
// 构建 allowed_users MVA 值
$allowedUsers = $data['allowed_users'] ?? [];
$allowedUsersStr = !empty($allowedUsers) ? '(' . implode(',', array_map('intval', $allowedUsers)) . ')' : '()';
// 插入新记录
$vectorValue = $data['content_vector'] ?? null;
if ($vectorValue) {
$vectorValue = str_replace(['[', ']'], ['(', ')'], $vectorValue);
$sql = "INSERT INTO msg_vectors
(id, msg_id, dialog_id, userid, msg_type, content, allowed_users, created_at, content_vector)
VALUES (?, ?, ?, ?, ?, ?, {$allowedUsersStr}, ?, {$vectorValue})";
} else {
$sql = "INSERT INTO msg_vectors
(id, msg_id, dialog_id, userid, msg_type, content, allowed_users, created_at)
VALUES (?, ?, ?, ?, ?, ?, {$allowedUsersStr}, ?)";
}
$params = [
$msgId,
$msgId,
$data['dialog_id'] ?? 0,
$data['userid'] ?? 0,
$data['msg_type'] ?? 'text',
$data['content'] ?? '',
$data['created_at'] ?? time()
];
return $instance->execute($sql, $params);
}
/**
* 更新对话的 allowed_users 权限列表(批量更新该对话下所有消息)
*
* @param int $dialogId 对话ID
* @param array $userids 有权限的用户ID数组
* @return int 更新的消息数量
*/
public static function updateDialogAllowedUsers(int $dialogId, array $userids): int
{
if ($dialogId <= 0) {
return 0;
}
$instance = new self();
$allowedUsersStr = !empty($userids) ? '(' . implode(',', array_map('intval', $userids)) . ')' : '()';
// Manticore 支持按条件批量更新
return $instance->executeWithRowCount(
"UPDATE msg_vectors SET allowed_users = {$allowedUsersStr} WHERE dialog_id = ?",
[$dialogId]
);
}
/**
* 删除消息向量
*
* @param int $msgId 消息ID
* @return bool 是否成功
*/
public static function deleteMsgVector(int $msgId): bool
{
if ($msgId <= 0) {
return false;
}
$instance = new self();
return $instance->execute("DELETE FROM msg_vectors WHERE msg_id = ?", [$msgId]);
}
/**
* 批量删除对话下的所有消息向量
*
* @param int $dialogId 对话ID
* @return int 删除数量
*/
public static function deleteDialogMsgVectors(int $dialogId): int
{
if ($dialogId <= 0) {
return 0;
}
$instance = new self();
return $instance->executeWithRowCount(
"DELETE FROM msg_vectors WHERE dialog_id = ?",
[$dialogId]
);
}
/**
* 清空所有消息向量
*
* @return bool 是否成功
*/
public static function clearAllMsgVectors(): bool
{
$instance = new self();
return $instance->execute("TRUNCATE TABLE msg_vectors");
}
/**
* 获取已索引的消息数量
*
* @return int 消息数量
*/
public static function getIndexedMsgCount(): int
{
$instance = new self();
$result = $instance->queryOne("SELECT COUNT(*) as cnt FROM msg_vectors");
return $result ? (int) $result['cnt'] : 0;
}
/**
* 获取对话的已索引消息数量
*
* @param int $dialogId 对话ID
* @return int 消息数量
*/
public static function getDialogIndexedMsgCount(int $dialogId): int
{
if ($dialogId <= 0) {
return 0;
}
$instance = new self();
$result = $instance->queryOne(
"SELECT COUNT(*) as cnt FROM msg_vectors WHERE dialog_id = ?",
[$dialogId]
);
return $result ? (int) $result['cnt'] : 0;
}
/**
* 获取最后索引的消息ID
*
* @return int 消息ID
*/
public static function getLastIndexedMsgId(): int
{
$instance = new self();
$result = $instance->queryOne("SELECT MAX(msg_id) as max_id FROM msg_vectors");
return $result ? (int) ($result['max_id'] ?? 0) : 0;
}
// ==============================
// 向量更新方法
// ==============================
/**
* 更新消息的向量
*
* @param int $msgId 消息ID
* @param string $vectorStr 向量字符串,格式如 '[0.1,0.2,...]'
* @return bool 是否成功
*/
public static function updateMsgVector(int $msgId, string $vectorStr): bool
{
if ($msgId <= 0 || empty($vectorStr)) {
return false;
}
$instance = new self();
// 查询现有记录
$existing = $instance->queryOne(
"SELECT * FROM msg_vectors WHERE msg_id = ?",
[$msgId]
);
if (!$existing) {
return false;
}
// 删除旧记录
$instance->execute("DELETE FROM msg_vectors WHERE msg_id = ?", [$msgId]);
// Manticore 的向量需要使用 () 格式
$vectorStr = str_replace(['[', ']'], ['(', ')'], $vectorStr);
// 构建 allowed_users MVA 值
$allowedUsersStr = !empty($existing['allowed_users'])
? '(' . $existing['allowed_users'] . ')'
: '()';
// 重新插入
$sql = "INSERT INTO msg_vectors
(id, msg_id, dialog_id, userid, msg_type, content, allowed_users, created_at, content_vector)
VALUES (?, ?, ?, ?, ?, ?, {$allowedUsersStr}, ?, {$vectorStr})";
return $instance->execute($sql, [
$existing['id'],
$existing['msg_id'],
$existing['dialog_id'],
$existing['userid'],
$existing['msg_type'],
$existing['content'],
$existing['created_at'] ?? time(),
]);
}
/**
* 更新文件的向量
*
* @param int $fileId 文件ID
* @param string $vectorStr 向量字符串,格式如 '[0.1,0.2,...]'
* @return bool 是否成功
*/
public static function updateFileVector(int $fileId, string $vectorStr): bool
{
if ($fileId <= 0 || empty($vectorStr)) {
return false;
}
$instance = new self();
// 查询现有记录
$existing = $instance->queryOne(
"SELECT * FROM file_vectors WHERE file_id = ?",
[$fileId]
);
if (!$existing) {
return false;
}
// 删除旧记录
$instance->execute("DELETE FROM file_vectors WHERE file_id = ?", [$fileId]);
// Manticore 的向量需要使用 () 格式
$vectorStr = str_replace(['[', ']'], ['(', ')'], $vectorStr);
// 构建 allowed_users MVA 值
$allowedUsersStr = !empty($existing['allowed_users'])
? '(' . $existing['allowed_users'] . ')'
: '()';
// 重新插入
$sql = "INSERT INTO file_vectors
(id, file_id, userid, pshare, file_name, file_type, file_ext, content, allowed_users, content_vector)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, {$allowedUsersStr}, {$vectorStr})";
return $instance->execute($sql, [
$existing['id'],
$existing['file_id'],
$existing['userid'],
$existing['pshare'] ?? 0,
$existing['file_name'],
$existing['file_type'],
$existing['file_ext'],
$existing['content'],
]);
}
/**
* 更新任务的向量
*
* @param int $taskId 任务ID
* @param string $vectorStr 向量字符串,格式如 '[0.1,0.2,...]'
* @return bool 是否成功
*/
public static function updateTaskVector(int $taskId, string $vectorStr): bool
{
if ($taskId <= 0 || empty($vectorStr)) {
return false;
}
$instance = new self();
// 查询现有记录
$existing = $instance->queryOne(
"SELECT * FROM task_vectors WHERE task_id = ?",
[$taskId]
);
if (!$existing) {
return false;
}
// 删除旧记录
$instance->execute("DELETE FROM task_vectors WHERE task_id = ?", [$taskId]);
// Manticore 的向量需要使用 () 格式
$vectorStr = str_replace(['[', ']'], ['(', ')'], $vectorStr);
// 构建 allowed_users MVA 值
$allowedUsersStr = !empty($existing['allowed_users'])
? '(' . $existing['allowed_users'] . ')'
: '()';
// 重新插入
$sql = "INSERT INTO task_vectors
(id, task_id, project_id, userid, visibility, task_name, task_desc, task_content, allowed_users, content_vector)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, {$allowedUsersStr}, {$vectorStr})";
return $instance->execute($sql, [
$existing['id'],
$existing['task_id'],
$existing['project_id'],
$existing['userid'] ?? 0,
$existing['visibility'] ?? 1,
$existing['task_name'],
$existing['task_desc'],
$existing['task_content'],
]);
}
/**
* 更新项目的向量
*
* @param int $projectId 项目ID
* @param string $vectorStr 向量字符串,格式如 '[0.1,0.2,...]'
* @return bool 是否成功
*/
public static function updateProjectVector(int $projectId, string $vectorStr): bool
{
if ($projectId <= 0 || empty($vectorStr)) {
return false;
}
$instance = new self();
// 查询现有记录
$existing = $instance->queryOne(
"SELECT * FROM project_vectors WHERE project_id = ?",
[$projectId]
);
if (!$existing) {
return false;
}
// 删除旧记录
$instance->execute("DELETE FROM project_vectors WHERE project_id = ?", [$projectId]);
// Manticore 的向量需要使用 () 格式
$vectorStr = str_replace(['[', ']'], ['(', ')'], $vectorStr);
// 构建 allowed_users MVA 值
$allowedUsersStr = !empty($existing['allowed_users'])
? '(' . $existing['allowed_users'] . ')'
: '()';
// 重新插入
$sql = "INSERT INTO project_vectors
(id, project_id, userid, personal, project_name, project_desc, allowed_users, content_vector)
VALUES (?, ?, ?, ?, ?, ?, {$allowedUsersStr}, {$vectorStr})";
return $instance->execute($sql, [
$existing['id'],
$existing['project_id'],
$existing['userid'] ?? 0,
$existing['personal'] ?? 0,
$existing['project_name'],
$existing['project_desc'],
]);
}
/**
* 更新用户的向量
*
* @param int $userid 用户ID
* @param string $vectorStr 向量字符串,格式如 '[0.1,0.2,...]'
* @return bool 是否成功
*/
public static function updateUserVector(int $userid, string $vectorStr): bool
{
if ($userid <= 0 || empty($vectorStr)) {
return false;
}
$instance = new self();
// 查询现有记录
$existing = $instance->queryOne(
"SELECT * FROM user_vectors WHERE userid = ?",
[$userid]
);
if (!$existing) {
return false;
}
// 删除旧记录
$instance->execute("DELETE FROM user_vectors WHERE userid = ?", [$userid]);
// Manticore 的向量需要使用 () 格式
$vectorStr = str_replace(['[', ']'], ['(', ')'], $vectorStr);
// 重新插入
$sql = "INSERT INTO user_vectors
(id, userid, nickname, email, profession, tags, introduction, content_vector)
VALUES (?, ?, ?, ?, ?, ?, ?, {$vectorStr})";
return $instance->execute($sql, [
$existing['id'],
$existing['userid'],
$existing['nickname'],
$existing['email'],
$existing['profession'],
$existing['tags'] ?? '',
$existing['introduction'],
]);
}
// ==============================
// 通用工具方法
// ==============================
/**
* 获取文本的 Embedding 向量
*
* @param string $text 文本
* @return array 向量数组(空数组表示失败)
*/
public static function getEmbedding(string $text): array
{
if (empty($text)) {
return [];
}
try {
$result = AI::getEmbedding($text);
if (Base::isSuccess($result)) {
return $result['data'] ?? [];
}
} catch (\Exception $e) {
Log::warning('Get embedding error: ' . $e->getMessage());
}
return [];
}
}