feat: 升级语音识别模型并优化转写逻辑

- 语音识别模型从 whisper-1 升级到 gpt-4o-mini-transcribe
   - 根据用户语言设置自动添加简繁体中文提示词
   - 录音转文字新增 dialog_id 参数,支持获取对话上下文提高识别准确率
   - 移除前端语言手动选择功能,简化用户操作
   - 添加参数空值保护
   - 优化 reasoning_effort 参数逻辑,区分 gpt-5 和 gpt-5.1+ 版本
This commit is contained in:
kuaifan 2026-01-05 02:26:36 +00:00
parent 4b0f4e388c
commit 42a2eb56c7
5 changed files with 84 additions and 84 deletions

View File

@ -1306,11 +1306,7 @@ class DialogController extends AbstractController
*
* @apiParam {String} base64 语音base64
* @apiParam {Number} duration 语音时长(毫秒)
* @apiParam {String} [language] 识别语言
* - 比如zh
* - 默认:自动识别
* - 格式:符合 ISO_639 标准
* - 此参数不一定起效果AI会根据语音和language参考翻译识别结果
* @apiParam {Number} [dialog_id] 会话ID用于获取上下文提高识别准确率
* @apiParam {String} [translate] 翻译识别结果
* - 比如zh
* - 默认:不翻译结果
@ -1327,9 +1323,9 @@ class DialogController extends AbstractController
//
$path = "uploads/tmp/chat/" . date("Ym") . "/" . $user->userid . "/";
$base64 = Request::input('base64');
$language = Request::input('language');
$translate = Request::input('translate');
$duration = intval(Request::input('duration'));
$dialogId = intval(Request::input('dialog_id'));
if ($duration < 600) {
return Base::retError('说话时间太短');
}
@ -1342,17 +1338,35 @@ class DialogController extends AbstractController
return Base::retError($data['msg']);
}
$recordData = $data['data'];
// 构建上下文提示词
$promptParts = [];
if ($user->lang === 'zh') {
$promptParts[] = "如果识别到中文,优先使用简体中文输出";
} elseif ($user->lang === 'zh-CHT') {
$promptParts[] = "如果識別到中文,優先使用繁體中文輸出";
}
// 获取最近的聊天上下文
if ($dialogId > 0) {
$contextTexts = WebSocketDialogMsg::whereDialogId($dialogId)
->whereIn('type', ['text'])
->orderByDesc('id')
->limit(5)
->get()
->reverse()
->map(fn($msg) => $msg->extractMessageContent(100))
->filter()
->values()
->toArray();
if (!empty($contextTexts)) {
$promptParts[] = "对话上下文:" . implode("", $contextTexts) . "";
}
}
// 转文字
$extParams = [];
if ($language) {
$extParams = [
'language' => $language === 'zh-CHT' ? 'zh' : $language,
'prompt' => "将此语音识别为“" . Doo::getLanguages($language) . "”。",
];
if (!empty($promptParts)) {
$extParams['prompt'] = implode("\n\n", $promptParts);
}
$result = AI::transcriptions($recordData['file'], $extParams, [
'accept-language' => Request::header('Accept-Language', 'zh')
]);
$result = AI::transcriptions($recordData['file'], $extParams);
if (Base::isError($result)) {
return $result;
}
@ -1944,10 +1958,15 @@ class DialogController extends AbstractController
return Base::retSuccess("success", $msg);
}
WebSocketDialog::checkDialog($msg->dialog_id);
// 根据用户语言构建提示词
$extParams = [];
if ($user->lang === 'zh') {
$extParams['prompt'] = "如果识别到中文,优先使用简体中文输出";
} elseif ($user->lang === 'zh-CHT') {
$extParams['prompt'] = "如果識別到中文,優先使用繁體中文輸出";
}
//
$result = AI::transcriptions(public_path($msgData['path']), [], [
'accept-language' => Request::header('Accept-Language', 'zh')
]);
$result = AI::transcriptions(public_path($msgData['path']), $extParams);
if (Base::isError($result)) {
return $result;
}

View File

@ -271,6 +271,9 @@ class AI
{
Apps::isInstalledThrow('ai');
$extParams = $extParams ?: [];
$extHeaders = $extHeaders ?: [];
if (!file_exists($filePath)) {
return Base::retError("语音文件不存在");
}
@ -287,7 +290,7 @@ class AI
$result = Cache::remember($cacheKey, Carbon::now()->addDays(), function () use ($extParams, $extHeaders, $filePath, $audioProvider) {
$post = array_merge($extParams, [
'file' => new \CURLFile($filePath),
'model' => 'whisper-1',
'model' => 'gpt-4o-mini-transcribe',
]);
$header = array_merge($extHeaders, [
'Content-Type' => 'multipart/form-data',
@ -373,8 +376,9 @@ class AI
]
],
];
if (self::shouldSendReasoningEffort($provider)) {
$payload['reasoning_effort'] = 'minimal';
$reasoningEffort = self::getReasoningEffort($provider);
if ($reasoningEffort !== null) {
$payload['reasoning_effort'] = $reasoningEffort;
}
$post = json_encode($payload);
@ -454,8 +458,9 @@ class AI
]
],
];
if (self::shouldSendReasoningEffort($provider)) {
$payload['reasoning_effort'] = 'minimal';
$reasoningEffort = self::getReasoningEffort($provider);
if ($reasoningEffort !== null) {
$payload['reasoning_effort'] = $reasoningEffort;
}
$post = json_encode($payload);
@ -542,8 +547,9 @@ class AI
]
],
];
if (self::shouldSendReasoningEffort($provider)) {
$payload['reasoning_effort'] = 'minimal';
$reasoningEffort = self::getReasoningEffort($provider);
if ($reasoningEffort !== null) {
$payload['reasoning_effort'] = $reasoningEffort;
}
$post = json_encode($payload);
@ -712,7 +718,7 @@ class AI
return [
'vendor' => 'openai',
'model' => 'whisper-1',
'model' => 'gpt-4o-mini-transcribe',
'api_key' => $key,
'base_url' => rtrim($baseUrl, '/'),
'agency' => $agency,
@ -720,23 +726,37 @@ class AI
}
/**
* 是否需要附加 reasoning_effort 参数
* 获取 reasoning_effort 参数值
* @param array $provider
* @return bool
* @return string|null 返回 'none'/'low' null(不需要此参数)
*/
protected static function shouldSendReasoningEffort(array $provider): bool
protected static function getReasoningEffort(array $provider): ?string
{
if (($provider['vendor'] ?? '') !== 'openai') {
return false;
return null;
}
$model = $provider['model'] ?? '';
// 匹配 gpt- 开头后跟数字的模型名称
if (preg_match('/^gpt-(\d+)/', $model, $matches)) {
return intval($matches[1]) >= 5;
// gpt-5.1 及之后版本支持 none
if (preg_match('/^gpt-(\d+)\.(\d+)/', $model, $matches)) {
$major = intval($matches[1]);
$minor = intval($matches[2]);
if ($major > 5 || ($major === 5 && $minor >= 1)) {
return 'none';
}
if ($major === 5) {
return 'low';
}
}
return false;
// gpt-5 (无小版本号) 使用 low
if (preg_match('/^gpt-(\d+)(?![.\d])/', $model, $matches)) {
if (intval($matches[1]) >= 5) {
return 'low';
}
}
return null;
}
/**

View File

@ -262,8 +262,7 @@
<div class="convert-body">
<div class="convert-content">
<div v-if="recordConvertSetting" class="convert-setting">
<i class="taskfont" :class="{active: !!cacheTranscriptionLanguage}" @click="convertSetting('transcription', $event)">&#xe628;</i>
<i class="taskfont" :class="{active: !!recordConvertTranslate}" @click="convertSetting('translate', $event)">&#xe795;</i>
<i class="taskfont" :class="{active: !!recordConvertTranslate}" @click="convertSetting($event)">&#xe795;</i>
</div>
<div class="convert-input">
<Input
@ -599,7 +598,6 @@ export default {
'cacheDialogs',
'dialogMsgs',
'cacheTranscriptionLanguage',
'cacheKeyboard',
'keyboardShow',
'keyboardHeight',
@ -1686,7 +1684,6 @@ export default {
dialog_id: this.dialogId,
base64: reader.result,
duration: this.recordDuration,
language: this.cacheTranscriptionLanguage,
translate: this.recordConvertTranslate
},
method: 'post',
@ -1707,7 +1704,7 @@ export default {
reader.readAsDataURL(this.recordBlob);
},
async convertSetting(type, event) {
async convertSetting(event) {
if (this.recordConvertStatus !== 1) {
$A.messageWarning("请稍后再试...")
return;
@ -1717,33 +1714,17 @@ export default {
label: languageList[item],
value: item
}))
let active
if (type === 'transcription') {
//
list.unshift(...[
{label: this.$L('选择识别语言'), value: '', disabled: true},
{label: this.$L('自动识别'), value: '', divided: true},
])
active = this.cacheTranscriptionLanguage
} else {
//
list.unshift(...[
{label: this.$L('选择翻译结果'), value: '', disabled: true},
{label: this.$L('不翻译结果'), value: '', divided: true},
])
active = this.recordConvertTranslate
}
list.unshift(...[
{label: this.$L('选择翻译结果'), value: '', disabled: true},
{label: this.$L('不翻译结果'), value: '', divided: true},
])
this.$store.commit('menu/operation', {
event,
list,
active,
active: this.recordConvertTranslate,
language: false,
onUpdate: async (language) => {
if (type === 'transcription') {
await this.$store.dispatch('setTranscriptionLanguage', language)
} else {
this.recordConvertTranslate = language
}
onUpdate: (language) => {
this.recordConvertTranslate = language
this.convertRecord()
}
})

View File

@ -1135,7 +1135,6 @@ export default {
'cacheLoginEmail',
'cacheFileSort',
'cacheTranslationLanguage',
'cacheTranscriptionLanguage',
'cacheTranslations',
'cacheEmojis',
'userInfo',
@ -1174,8 +1173,7 @@ export default {
'clientId',
'cacheServerUrl',
'cacheCalendarView',
'cacheTranslationLanguage',
'cacheTranscriptionLanguage'
'cacheTranslationLanguage'
],
array: [
'cacheUserBasic',
@ -1231,11 +1229,6 @@ export default {
state.cacheTranslationLanguage = languageName;
}
// TranscriptionLanguage 检查
if (typeof languageList[state.cacheTranscriptionLanguage] === "undefined") {
state.cacheTranscriptionLanguage = '';
}
// 处理用户信息
if (state.userInfo.userid) {
state.userId = state.userInfo.userid = $A.runNum(state.userInfo.userid);
@ -4290,16 +4283,6 @@ export default {
$A.IDBSave('cacheTranslationLanguage', language);
},
/**
* 设置语音转文字语言
* @param state
* @param language
*/
setTranscriptionLanguage({state}, language) {
state.cacheTranscriptionLanguage = language
$A.IDBSave('cacheTranscriptionLanguage', language);
},
/** *****************************************************************************************/
/** ************************************* loads *********************************************/
/** *****************************************************************************************/

View File

@ -264,9 +264,6 @@ export default {
cacheTranslationLanguage: '',
cacheTranslations: [],
// 语音转文字(识别语言)
cacheTranscriptionLanguage: '',
// 下拉菜单操作
menuOperation: {},