mirror of
https://github.com/kuaifan/dootask.git
synced 2026-03-17 03:03:41 +00:00
feat: 升级语音识别模型并优化转写逻辑
- 语音识别模型从 whisper-1 升级到 gpt-4o-mini-transcribe - 根据用户语言设置自动添加简繁体中文提示词 - 录音转文字新增 dialog_id 参数,支持获取对话上下文提高识别准确率 - 移除前端语言手动选择功能,简化用户操作 - 添加参数空值保护 - 优化 reasoning_effort 参数逻辑,区分 gpt-5 和 gpt-5.1+ 版本
This commit is contained in:
parent
4b0f4e388c
commit
42a2eb56c7
@ -1306,11 +1306,7 @@ class DialogController extends AbstractController
|
|||||||
*
|
*
|
||||||
* @apiParam {String} base64 语音base64
|
* @apiParam {String} base64 语音base64
|
||||||
* @apiParam {Number} duration 语音时长(毫秒)
|
* @apiParam {Number} duration 语音时长(毫秒)
|
||||||
* @apiParam {String} [language] 识别语言
|
* @apiParam {Number} [dialog_id] 会话ID,用于获取上下文提高识别准确率
|
||||||
* - 比如:zh
|
|
||||||
* - 默认:自动识别
|
|
||||||
* - 格式:符合 ISO_639 标准
|
|
||||||
* - 此参数不一定起效果,AI会根据语音和language参考翻译识别结果
|
|
||||||
* @apiParam {String} [translate] 翻译识别结果
|
* @apiParam {String} [translate] 翻译识别结果
|
||||||
* - 比如:zh
|
* - 比如:zh
|
||||||
* - 默认:不翻译结果
|
* - 默认:不翻译结果
|
||||||
@ -1327,9 +1323,9 @@ class DialogController extends AbstractController
|
|||||||
//
|
//
|
||||||
$path = "uploads/tmp/chat/" . date("Ym") . "/" . $user->userid . "/";
|
$path = "uploads/tmp/chat/" . date("Ym") . "/" . $user->userid . "/";
|
||||||
$base64 = Request::input('base64');
|
$base64 = Request::input('base64');
|
||||||
$language = Request::input('language');
|
|
||||||
$translate = Request::input('translate');
|
$translate = Request::input('translate');
|
||||||
$duration = intval(Request::input('duration'));
|
$duration = intval(Request::input('duration'));
|
||||||
|
$dialogId = intval(Request::input('dialog_id'));
|
||||||
if ($duration < 600) {
|
if ($duration < 600) {
|
||||||
return Base::retError('说话时间太短');
|
return Base::retError('说话时间太短');
|
||||||
}
|
}
|
||||||
@ -1342,17 +1338,35 @@ class DialogController extends AbstractController
|
|||||||
return Base::retError($data['msg']);
|
return Base::retError($data['msg']);
|
||||||
}
|
}
|
||||||
$recordData = $data['data'];
|
$recordData = $data['data'];
|
||||||
|
// 构建上下文提示词
|
||||||
|
$promptParts = [];
|
||||||
|
if ($user->lang === 'zh') {
|
||||||
|
$promptParts[] = "如果识别到中文,优先使用简体中文输出";
|
||||||
|
} elseif ($user->lang === 'zh-CHT') {
|
||||||
|
$promptParts[] = "如果識別到中文,優先使用繁體中文輸出";
|
||||||
|
}
|
||||||
|
// 获取最近的聊天上下文
|
||||||
|
if ($dialogId > 0) {
|
||||||
|
$contextTexts = WebSocketDialogMsg::whereDialogId($dialogId)
|
||||||
|
->whereIn('type', ['text'])
|
||||||
|
->orderByDesc('id')
|
||||||
|
->limit(5)
|
||||||
|
->get()
|
||||||
|
->reverse()
|
||||||
|
->map(fn($msg) => $msg->extractMessageContent(100))
|
||||||
|
->filter()
|
||||||
|
->values()
|
||||||
|
->toArray();
|
||||||
|
if (!empty($contextTexts)) {
|
||||||
|
$promptParts[] = "对话上下文:" . implode(";", $contextTexts) . "。";
|
||||||
|
}
|
||||||
|
}
|
||||||
// 转文字
|
// 转文字
|
||||||
$extParams = [];
|
$extParams = [];
|
||||||
if ($language) {
|
if (!empty($promptParts)) {
|
||||||
$extParams = [
|
$extParams['prompt'] = implode("\n\n", $promptParts);
|
||||||
'language' => $language === 'zh-CHT' ? 'zh' : $language,
|
|
||||||
'prompt' => "将此语音识别为“" . Doo::getLanguages($language) . "”。",
|
|
||||||
];
|
|
||||||
}
|
}
|
||||||
$result = AI::transcriptions($recordData['file'], $extParams, [
|
$result = AI::transcriptions($recordData['file'], $extParams);
|
||||||
'accept-language' => Request::header('Accept-Language', 'zh')
|
|
||||||
]);
|
|
||||||
if (Base::isError($result)) {
|
if (Base::isError($result)) {
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
@ -1944,10 +1958,15 @@ class DialogController extends AbstractController
|
|||||||
return Base::retSuccess("success", $msg);
|
return Base::retSuccess("success", $msg);
|
||||||
}
|
}
|
||||||
WebSocketDialog::checkDialog($msg->dialog_id);
|
WebSocketDialog::checkDialog($msg->dialog_id);
|
||||||
|
// 根据用户语言构建提示词
|
||||||
|
$extParams = [];
|
||||||
|
if ($user->lang === 'zh') {
|
||||||
|
$extParams['prompt'] = "如果识别到中文,优先使用简体中文输出";
|
||||||
|
} elseif ($user->lang === 'zh-CHT') {
|
||||||
|
$extParams['prompt'] = "如果識別到中文,優先使用繁體中文輸出";
|
||||||
|
}
|
||||||
//
|
//
|
||||||
$result = AI::transcriptions(public_path($msgData['path']), [], [
|
$result = AI::transcriptions(public_path($msgData['path']), $extParams);
|
||||||
'accept-language' => Request::header('Accept-Language', 'zh')
|
|
||||||
]);
|
|
||||||
if (Base::isError($result)) {
|
if (Base::isError($result)) {
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -271,6 +271,9 @@ class AI
|
|||||||
{
|
{
|
||||||
Apps::isInstalledThrow('ai');
|
Apps::isInstalledThrow('ai');
|
||||||
|
|
||||||
|
$extParams = $extParams ?: [];
|
||||||
|
$extHeaders = $extHeaders ?: [];
|
||||||
|
|
||||||
if (!file_exists($filePath)) {
|
if (!file_exists($filePath)) {
|
||||||
return Base::retError("语音文件不存在");
|
return Base::retError("语音文件不存在");
|
||||||
}
|
}
|
||||||
@ -287,7 +290,7 @@ class AI
|
|||||||
$result = Cache::remember($cacheKey, Carbon::now()->addDays(), function () use ($extParams, $extHeaders, $filePath, $audioProvider) {
|
$result = Cache::remember($cacheKey, Carbon::now()->addDays(), function () use ($extParams, $extHeaders, $filePath, $audioProvider) {
|
||||||
$post = array_merge($extParams, [
|
$post = array_merge($extParams, [
|
||||||
'file' => new \CURLFile($filePath),
|
'file' => new \CURLFile($filePath),
|
||||||
'model' => 'whisper-1',
|
'model' => 'gpt-4o-mini-transcribe',
|
||||||
]);
|
]);
|
||||||
$header = array_merge($extHeaders, [
|
$header = array_merge($extHeaders, [
|
||||||
'Content-Type' => 'multipart/form-data',
|
'Content-Type' => 'multipart/form-data',
|
||||||
@ -373,8 +376,9 @@ class AI
|
|||||||
]
|
]
|
||||||
],
|
],
|
||||||
];
|
];
|
||||||
if (self::shouldSendReasoningEffort($provider)) {
|
$reasoningEffort = self::getReasoningEffort($provider);
|
||||||
$payload['reasoning_effort'] = 'minimal';
|
if ($reasoningEffort !== null) {
|
||||||
|
$payload['reasoning_effort'] = $reasoningEffort;
|
||||||
}
|
}
|
||||||
$post = json_encode($payload);
|
$post = json_encode($payload);
|
||||||
|
|
||||||
@ -454,8 +458,9 @@ class AI
|
|||||||
]
|
]
|
||||||
],
|
],
|
||||||
];
|
];
|
||||||
if (self::shouldSendReasoningEffort($provider)) {
|
$reasoningEffort = self::getReasoningEffort($provider);
|
||||||
$payload['reasoning_effort'] = 'minimal';
|
if ($reasoningEffort !== null) {
|
||||||
|
$payload['reasoning_effort'] = $reasoningEffort;
|
||||||
}
|
}
|
||||||
$post = json_encode($payload);
|
$post = json_encode($payload);
|
||||||
|
|
||||||
@ -542,8 +547,9 @@ class AI
|
|||||||
]
|
]
|
||||||
],
|
],
|
||||||
];
|
];
|
||||||
if (self::shouldSendReasoningEffort($provider)) {
|
$reasoningEffort = self::getReasoningEffort($provider);
|
||||||
$payload['reasoning_effort'] = 'minimal';
|
if ($reasoningEffort !== null) {
|
||||||
|
$payload['reasoning_effort'] = $reasoningEffort;
|
||||||
}
|
}
|
||||||
$post = json_encode($payload);
|
$post = json_encode($payload);
|
||||||
|
|
||||||
@ -712,7 +718,7 @@ class AI
|
|||||||
|
|
||||||
return [
|
return [
|
||||||
'vendor' => 'openai',
|
'vendor' => 'openai',
|
||||||
'model' => 'whisper-1',
|
'model' => 'gpt-4o-mini-transcribe',
|
||||||
'api_key' => $key,
|
'api_key' => $key,
|
||||||
'base_url' => rtrim($baseUrl, '/'),
|
'base_url' => rtrim($baseUrl, '/'),
|
||||||
'agency' => $agency,
|
'agency' => $agency,
|
||||||
@ -720,23 +726,37 @@ class AI
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* 是否需要附加 reasoning_effort 参数
|
* 获取 reasoning_effort 参数值
|
||||||
* @param array $provider
|
* @param array $provider
|
||||||
* @return bool
|
* @return string|null 返回 'none'/'low' 或 null(不需要此参数)
|
||||||
*/
|
*/
|
||||||
protected static function shouldSendReasoningEffort(array $provider): bool
|
protected static function getReasoningEffort(array $provider): ?string
|
||||||
{
|
{
|
||||||
if (($provider['vendor'] ?? '') !== 'openai') {
|
if (($provider['vendor'] ?? '') !== 'openai') {
|
||||||
return false;
|
return null;
|
||||||
}
|
}
|
||||||
$model = $provider['model'] ?? '';
|
$model = $provider['model'] ?? '';
|
||||||
|
|
||||||
// 匹配 gpt- 开头后跟数字的模型名称
|
// gpt-5.1 及之后版本支持 none
|
||||||
if (preg_match('/^gpt-(\d+)/', $model, $matches)) {
|
if (preg_match('/^gpt-(\d+)\.(\d+)/', $model, $matches)) {
|
||||||
return intval($matches[1]) >= 5;
|
$major = intval($matches[1]);
|
||||||
|
$minor = intval($matches[2]);
|
||||||
|
if ($major > 5 || ($major === 5 && $minor >= 1)) {
|
||||||
|
return 'none';
|
||||||
|
}
|
||||||
|
if ($major === 5) {
|
||||||
|
return 'low';
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
// gpt-5 (无小版本号) 使用 low
|
||||||
|
if (preg_match('/^gpt-(\d+)(?![.\d])/', $model, $matches)) {
|
||||||
|
if (intval($matches[1]) >= 5) {
|
||||||
|
return 'low';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
@ -262,8 +262,7 @@
|
|||||||
<div class="convert-body">
|
<div class="convert-body">
|
||||||
<div class="convert-content">
|
<div class="convert-content">
|
||||||
<div v-if="recordConvertSetting" class="convert-setting">
|
<div v-if="recordConvertSetting" class="convert-setting">
|
||||||
<i class="taskfont" :class="{active: !!cacheTranscriptionLanguage}" @click="convertSetting('transcription', $event)"></i>
|
<i class="taskfont" :class="{active: !!recordConvertTranslate}" @click="convertSetting($event)"></i>
|
||||||
<i class="taskfont" :class="{active: !!recordConvertTranslate}" @click="convertSetting('translate', $event)"></i>
|
|
||||||
</div>
|
</div>
|
||||||
<div class="convert-input">
|
<div class="convert-input">
|
||||||
<Input
|
<Input
|
||||||
@ -599,7 +598,6 @@ export default {
|
|||||||
'cacheDialogs',
|
'cacheDialogs',
|
||||||
'dialogMsgs',
|
'dialogMsgs',
|
||||||
|
|
||||||
'cacheTranscriptionLanguage',
|
|
||||||
'cacheKeyboard',
|
'cacheKeyboard',
|
||||||
'keyboardShow',
|
'keyboardShow',
|
||||||
'keyboardHeight',
|
'keyboardHeight',
|
||||||
@ -1686,7 +1684,6 @@ export default {
|
|||||||
dialog_id: this.dialogId,
|
dialog_id: this.dialogId,
|
||||||
base64: reader.result,
|
base64: reader.result,
|
||||||
duration: this.recordDuration,
|
duration: this.recordDuration,
|
||||||
language: this.cacheTranscriptionLanguage,
|
|
||||||
translate: this.recordConvertTranslate
|
translate: this.recordConvertTranslate
|
||||||
},
|
},
|
||||||
method: 'post',
|
method: 'post',
|
||||||
@ -1707,7 +1704,7 @@ export default {
|
|||||||
reader.readAsDataURL(this.recordBlob);
|
reader.readAsDataURL(this.recordBlob);
|
||||||
},
|
},
|
||||||
|
|
||||||
async convertSetting(type, event) {
|
async convertSetting(event) {
|
||||||
if (this.recordConvertStatus !== 1) {
|
if (this.recordConvertStatus !== 1) {
|
||||||
$A.messageWarning("请稍后再试...")
|
$A.messageWarning("请稍后再试...")
|
||||||
return;
|
return;
|
||||||
@ -1717,33 +1714,17 @@ export default {
|
|||||||
label: languageList[item],
|
label: languageList[item],
|
||||||
value: item
|
value: item
|
||||||
}))
|
}))
|
||||||
let active
|
list.unshift(...[
|
||||||
if (type === 'transcription') {
|
{label: this.$L('选择翻译结果'), value: '', disabled: true},
|
||||||
// 语音转文字
|
{label: this.$L('不翻译结果'), value: '', divided: true},
|
||||||
list.unshift(...[
|
])
|
||||||
{label: this.$L('选择识别语言'), value: '', disabled: true},
|
|
||||||
{label: this.$L('自动识别'), value: '', divided: true},
|
|
||||||
])
|
|
||||||
active = this.cacheTranscriptionLanguage
|
|
||||||
} else {
|
|
||||||
// 翻译
|
|
||||||
list.unshift(...[
|
|
||||||
{label: this.$L('选择翻译结果'), value: '', disabled: true},
|
|
||||||
{label: this.$L('不翻译结果'), value: '', divided: true},
|
|
||||||
])
|
|
||||||
active = this.recordConvertTranslate
|
|
||||||
}
|
|
||||||
this.$store.commit('menu/operation', {
|
this.$store.commit('menu/operation', {
|
||||||
event,
|
event,
|
||||||
list,
|
list,
|
||||||
active,
|
active: this.recordConvertTranslate,
|
||||||
language: false,
|
language: false,
|
||||||
onUpdate: async (language) => {
|
onUpdate: (language) => {
|
||||||
if (type === 'transcription') {
|
this.recordConvertTranslate = language
|
||||||
await this.$store.dispatch('setTranscriptionLanguage', language)
|
|
||||||
} else {
|
|
||||||
this.recordConvertTranslate = language
|
|
||||||
}
|
|
||||||
this.convertRecord()
|
this.convertRecord()
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|||||||
19
resources/assets/js/store/actions.js
vendored
19
resources/assets/js/store/actions.js
vendored
@ -1135,7 +1135,6 @@ export default {
|
|||||||
'cacheLoginEmail',
|
'cacheLoginEmail',
|
||||||
'cacheFileSort',
|
'cacheFileSort',
|
||||||
'cacheTranslationLanguage',
|
'cacheTranslationLanguage',
|
||||||
'cacheTranscriptionLanguage',
|
|
||||||
'cacheTranslations',
|
'cacheTranslations',
|
||||||
'cacheEmojis',
|
'cacheEmojis',
|
||||||
'userInfo',
|
'userInfo',
|
||||||
@ -1174,8 +1173,7 @@ export default {
|
|||||||
'clientId',
|
'clientId',
|
||||||
'cacheServerUrl',
|
'cacheServerUrl',
|
||||||
'cacheCalendarView',
|
'cacheCalendarView',
|
||||||
'cacheTranslationLanguage',
|
'cacheTranslationLanguage'
|
||||||
'cacheTranscriptionLanguage'
|
|
||||||
],
|
],
|
||||||
array: [
|
array: [
|
||||||
'cacheUserBasic',
|
'cacheUserBasic',
|
||||||
@ -1231,11 +1229,6 @@ export default {
|
|||||||
state.cacheTranslationLanguage = languageName;
|
state.cacheTranslationLanguage = languageName;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TranscriptionLanguage 检查
|
|
||||||
if (typeof languageList[state.cacheTranscriptionLanguage] === "undefined") {
|
|
||||||
state.cacheTranscriptionLanguage = '';
|
|
||||||
}
|
|
||||||
|
|
||||||
// 处理用户信息
|
// 处理用户信息
|
||||||
if (state.userInfo.userid) {
|
if (state.userInfo.userid) {
|
||||||
state.userId = state.userInfo.userid = $A.runNum(state.userInfo.userid);
|
state.userId = state.userInfo.userid = $A.runNum(state.userInfo.userid);
|
||||||
@ -4290,16 +4283,6 @@ export default {
|
|||||||
$A.IDBSave('cacheTranslationLanguage', language);
|
$A.IDBSave('cacheTranslationLanguage', language);
|
||||||
},
|
},
|
||||||
|
|
||||||
/**
|
|
||||||
* 设置语音转文字语言
|
|
||||||
* @param state
|
|
||||||
* @param language
|
|
||||||
*/
|
|
||||||
setTranscriptionLanguage({state}, language) {
|
|
||||||
state.cacheTranscriptionLanguage = language
|
|
||||||
$A.IDBSave('cacheTranscriptionLanguage', language);
|
|
||||||
},
|
|
||||||
|
|
||||||
/** *****************************************************************************************/
|
/** *****************************************************************************************/
|
||||||
/** ************************************* loads *********************************************/
|
/** ************************************* loads *********************************************/
|
||||||
/** *****************************************************************************************/
|
/** *****************************************************************************************/
|
||||||
|
|||||||
3
resources/assets/js/store/state.js
vendored
3
resources/assets/js/store/state.js
vendored
@ -264,9 +264,6 @@ export default {
|
|||||||
cacheTranslationLanguage: '',
|
cacheTranslationLanguage: '',
|
||||||
cacheTranslations: [],
|
cacheTranslations: [],
|
||||||
|
|
||||||
// 语音转文字(识别语言)
|
|
||||||
cacheTranscriptionLanguage: '',
|
|
||||||
|
|
||||||
// 下拉菜单操作
|
// 下拉菜单操作
|
||||||
menuOperation: {},
|
menuOperation: {},
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user