mirror of
https://github.com/kuaifan/dootask.git
synced 2026-03-03 16:02:08 +00:00
feat: 升级语音识别模型并优化转写逻辑
- 语音识别模型从 whisper-1 升级到 gpt-4o-mini-transcribe - 根据用户语言设置自动添加简繁体中文提示词 - 录音转文字新增 dialog_id 参数,支持获取对话上下文提高识别准确率 - 移除前端语言手动选择功能,简化用户操作 - 添加参数空值保护 - 优化 reasoning_effort 参数逻辑,区分 gpt-5 和 gpt-5.1+ 版本
This commit is contained in:
parent
4b0f4e388c
commit
42a2eb56c7
@ -1306,11 +1306,7 @@ class DialogController extends AbstractController
|
||||
*
|
||||
* @apiParam {String} base64 语音base64
|
||||
* @apiParam {Number} duration 语音时长(毫秒)
|
||||
* @apiParam {String} [language] 识别语言
|
||||
* - 比如:zh
|
||||
* - 默认:自动识别
|
||||
* - 格式:符合 ISO_639 标准
|
||||
* - 此参数不一定起效果,AI会根据语音和language参考翻译识别结果
|
||||
* @apiParam {Number} [dialog_id] 会话ID,用于获取上下文提高识别准确率
|
||||
* @apiParam {String} [translate] 翻译识别结果
|
||||
* - 比如:zh
|
||||
* - 默认:不翻译结果
|
||||
@ -1327,9 +1323,9 @@ class DialogController extends AbstractController
|
||||
//
|
||||
$path = "uploads/tmp/chat/" . date("Ym") . "/" . $user->userid . "/";
|
||||
$base64 = Request::input('base64');
|
||||
$language = Request::input('language');
|
||||
$translate = Request::input('translate');
|
||||
$duration = intval(Request::input('duration'));
|
||||
$dialogId = intval(Request::input('dialog_id'));
|
||||
if ($duration < 600) {
|
||||
return Base::retError('说话时间太短');
|
||||
}
|
||||
@ -1342,17 +1338,35 @@ class DialogController extends AbstractController
|
||||
return Base::retError($data['msg']);
|
||||
}
|
||||
$recordData = $data['data'];
|
||||
// 构建上下文提示词
|
||||
$promptParts = [];
|
||||
if ($user->lang === 'zh') {
|
||||
$promptParts[] = "如果识别到中文,优先使用简体中文输出";
|
||||
} elseif ($user->lang === 'zh-CHT') {
|
||||
$promptParts[] = "如果識別到中文,優先使用繁體中文輸出";
|
||||
}
|
||||
// 获取最近的聊天上下文
|
||||
if ($dialogId > 0) {
|
||||
$contextTexts = WebSocketDialogMsg::whereDialogId($dialogId)
|
||||
->whereIn('type', ['text'])
|
||||
->orderByDesc('id')
|
||||
->limit(5)
|
||||
->get()
|
||||
->reverse()
|
||||
->map(fn($msg) => $msg->extractMessageContent(100))
|
||||
->filter()
|
||||
->values()
|
||||
->toArray();
|
||||
if (!empty($contextTexts)) {
|
||||
$promptParts[] = "对话上下文:" . implode(";", $contextTexts) . "。";
|
||||
}
|
||||
}
|
||||
// 转文字
|
||||
$extParams = [];
|
||||
if ($language) {
|
||||
$extParams = [
|
||||
'language' => $language === 'zh-CHT' ? 'zh' : $language,
|
||||
'prompt' => "将此语音识别为“" . Doo::getLanguages($language) . "”。",
|
||||
];
|
||||
if (!empty($promptParts)) {
|
||||
$extParams['prompt'] = implode("\n\n", $promptParts);
|
||||
}
|
||||
$result = AI::transcriptions($recordData['file'], $extParams, [
|
||||
'accept-language' => Request::header('Accept-Language', 'zh')
|
||||
]);
|
||||
$result = AI::transcriptions($recordData['file'], $extParams);
|
||||
if (Base::isError($result)) {
|
||||
return $result;
|
||||
}
|
||||
@ -1944,10 +1958,15 @@ class DialogController extends AbstractController
|
||||
return Base::retSuccess("success", $msg);
|
||||
}
|
||||
WebSocketDialog::checkDialog($msg->dialog_id);
|
||||
// 根据用户语言构建提示词
|
||||
$extParams = [];
|
||||
if ($user->lang === 'zh') {
|
||||
$extParams['prompt'] = "如果识别到中文,优先使用简体中文输出";
|
||||
} elseif ($user->lang === 'zh-CHT') {
|
||||
$extParams['prompt'] = "如果識別到中文,優先使用繁體中文輸出";
|
||||
}
|
||||
//
|
||||
$result = AI::transcriptions(public_path($msgData['path']), [], [
|
||||
'accept-language' => Request::header('Accept-Language', 'zh')
|
||||
]);
|
||||
$result = AI::transcriptions(public_path($msgData['path']), $extParams);
|
||||
if (Base::isError($result)) {
|
||||
return $result;
|
||||
}
|
||||
|
||||
@ -271,6 +271,9 @@ class AI
|
||||
{
|
||||
Apps::isInstalledThrow('ai');
|
||||
|
||||
$extParams = $extParams ?: [];
|
||||
$extHeaders = $extHeaders ?: [];
|
||||
|
||||
if (!file_exists($filePath)) {
|
||||
return Base::retError("语音文件不存在");
|
||||
}
|
||||
@ -287,7 +290,7 @@ class AI
|
||||
$result = Cache::remember($cacheKey, Carbon::now()->addDays(), function () use ($extParams, $extHeaders, $filePath, $audioProvider) {
|
||||
$post = array_merge($extParams, [
|
||||
'file' => new \CURLFile($filePath),
|
||||
'model' => 'whisper-1',
|
||||
'model' => 'gpt-4o-mini-transcribe',
|
||||
]);
|
||||
$header = array_merge($extHeaders, [
|
||||
'Content-Type' => 'multipart/form-data',
|
||||
@ -373,8 +376,9 @@ class AI
|
||||
]
|
||||
],
|
||||
];
|
||||
if (self::shouldSendReasoningEffort($provider)) {
|
||||
$payload['reasoning_effort'] = 'minimal';
|
||||
$reasoningEffort = self::getReasoningEffort($provider);
|
||||
if ($reasoningEffort !== null) {
|
||||
$payload['reasoning_effort'] = $reasoningEffort;
|
||||
}
|
||||
$post = json_encode($payload);
|
||||
|
||||
@ -454,8 +458,9 @@ class AI
|
||||
]
|
||||
],
|
||||
];
|
||||
if (self::shouldSendReasoningEffort($provider)) {
|
||||
$payload['reasoning_effort'] = 'minimal';
|
||||
$reasoningEffort = self::getReasoningEffort($provider);
|
||||
if ($reasoningEffort !== null) {
|
||||
$payload['reasoning_effort'] = $reasoningEffort;
|
||||
}
|
||||
$post = json_encode($payload);
|
||||
|
||||
@ -542,8 +547,9 @@ class AI
|
||||
]
|
||||
],
|
||||
];
|
||||
if (self::shouldSendReasoningEffort($provider)) {
|
||||
$payload['reasoning_effort'] = 'minimal';
|
||||
$reasoningEffort = self::getReasoningEffort($provider);
|
||||
if ($reasoningEffort !== null) {
|
||||
$payload['reasoning_effort'] = $reasoningEffort;
|
||||
}
|
||||
$post = json_encode($payload);
|
||||
|
||||
@ -712,7 +718,7 @@ class AI
|
||||
|
||||
return [
|
||||
'vendor' => 'openai',
|
||||
'model' => 'whisper-1',
|
||||
'model' => 'gpt-4o-mini-transcribe',
|
||||
'api_key' => $key,
|
||||
'base_url' => rtrim($baseUrl, '/'),
|
||||
'agency' => $agency,
|
||||
@ -720,23 +726,37 @@ class AI
|
||||
}
|
||||
|
||||
/**
|
||||
* 是否需要附加 reasoning_effort 参数
|
||||
* 获取 reasoning_effort 参数值
|
||||
* @param array $provider
|
||||
* @return bool
|
||||
* @return string|null 返回 'none'/'low' 或 null(不需要此参数)
|
||||
*/
|
||||
protected static function shouldSendReasoningEffort(array $provider): bool
|
||||
protected static function getReasoningEffort(array $provider): ?string
|
||||
{
|
||||
if (($provider['vendor'] ?? '') !== 'openai') {
|
||||
return false;
|
||||
return null;
|
||||
}
|
||||
$model = $provider['model'] ?? '';
|
||||
|
||||
// 匹配 gpt- 开头后跟数字的模型名称
|
||||
if (preg_match('/^gpt-(\d+)/', $model, $matches)) {
|
||||
return intval($matches[1]) >= 5;
|
||||
|
||||
// gpt-5.1 及之后版本支持 none
|
||||
if (preg_match('/^gpt-(\d+)\.(\d+)/', $model, $matches)) {
|
||||
$major = intval($matches[1]);
|
||||
$minor = intval($matches[2]);
|
||||
if ($major > 5 || ($major === 5 && $minor >= 1)) {
|
||||
return 'none';
|
||||
}
|
||||
if ($major === 5) {
|
||||
return 'low';
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
// gpt-5 (无小版本号) 使用 low
|
||||
if (preg_match('/^gpt-(\d+)(?![.\d])/', $model, $matches)) {
|
||||
if (intval($matches[1]) >= 5) {
|
||||
return 'low';
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@ -262,8 +262,7 @@
|
||||
<div class="convert-body">
|
||||
<div class="convert-content">
|
||||
<div v-if="recordConvertSetting" class="convert-setting">
|
||||
<i class="taskfont" :class="{active: !!cacheTranscriptionLanguage}" @click="convertSetting('transcription', $event)"></i>
|
||||
<i class="taskfont" :class="{active: !!recordConvertTranslate}" @click="convertSetting('translate', $event)"></i>
|
||||
<i class="taskfont" :class="{active: !!recordConvertTranslate}" @click="convertSetting($event)"></i>
|
||||
</div>
|
||||
<div class="convert-input">
|
||||
<Input
|
||||
@ -599,7 +598,6 @@ export default {
|
||||
'cacheDialogs',
|
||||
'dialogMsgs',
|
||||
|
||||
'cacheTranscriptionLanguage',
|
||||
'cacheKeyboard',
|
||||
'keyboardShow',
|
||||
'keyboardHeight',
|
||||
@ -1686,7 +1684,6 @@ export default {
|
||||
dialog_id: this.dialogId,
|
||||
base64: reader.result,
|
||||
duration: this.recordDuration,
|
||||
language: this.cacheTranscriptionLanguage,
|
||||
translate: this.recordConvertTranslate
|
||||
},
|
||||
method: 'post',
|
||||
@ -1707,7 +1704,7 @@ export default {
|
||||
reader.readAsDataURL(this.recordBlob);
|
||||
},
|
||||
|
||||
async convertSetting(type, event) {
|
||||
async convertSetting(event) {
|
||||
if (this.recordConvertStatus !== 1) {
|
||||
$A.messageWarning("请稍后再试...")
|
||||
return;
|
||||
@ -1717,33 +1714,17 @@ export default {
|
||||
label: languageList[item],
|
||||
value: item
|
||||
}))
|
||||
let active
|
||||
if (type === 'transcription') {
|
||||
// 语音转文字
|
||||
list.unshift(...[
|
||||
{label: this.$L('选择识别语言'), value: '', disabled: true},
|
||||
{label: this.$L('自动识别'), value: '', divided: true},
|
||||
])
|
||||
active = this.cacheTranscriptionLanguage
|
||||
} else {
|
||||
// 翻译
|
||||
list.unshift(...[
|
||||
{label: this.$L('选择翻译结果'), value: '', disabled: true},
|
||||
{label: this.$L('不翻译结果'), value: '', divided: true},
|
||||
])
|
||||
active = this.recordConvertTranslate
|
||||
}
|
||||
list.unshift(...[
|
||||
{label: this.$L('选择翻译结果'), value: '', disabled: true},
|
||||
{label: this.$L('不翻译结果'), value: '', divided: true},
|
||||
])
|
||||
this.$store.commit('menu/operation', {
|
||||
event,
|
||||
list,
|
||||
active,
|
||||
active: this.recordConvertTranslate,
|
||||
language: false,
|
||||
onUpdate: async (language) => {
|
||||
if (type === 'transcription') {
|
||||
await this.$store.dispatch('setTranscriptionLanguage', language)
|
||||
} else {
|
||||
this.recordConvertTranslate = language
|
||||
}
|
||||
onUpdate: (language) => {
|
||||
this.recordConvertTranslate = language
|
||||
this.convertRecord()
|
||||
}
|
||||
})
|
||||
|
||||
19
resources/assets/js/store/actions.js
vendored
19
resources/assets/js/store/actions.js
vendored
@ -1135,7 +1135,6 @@ export default {
|
||||
'cacheLoginEmail',
|
||||
'cacheFileSort',
|
||||
'cacheTranslationLanguage',
|
||||
'cacheTranscriptionLanguage',
|
||||
'cacheTranslations',
|
||||
'cacheEmojis',
|
||||
'userInfo',
|
||||
@ -1174,8 +1173,7 @@ export default {
|
||||
'clientId',
|
||||
'cacheServerUrl',
|
||||
'cacheCalendarView',
|
||||
'cacheTranslationLanguage',
|
||||
'cacheTranscriptionLanguage'
|
||||
'cacheTranslationLanguage'
|
||||
],
|
||||
array: [
|
||||
'cacheUserBasic',
|
||||
@ -1231,11 +1229,6 @@ export default {
|
||||
state.cacheTranslationLanguage = languageName;
|
||||
}
|
||||
|
||||
// TranscriptionLanguage 检查
|
||||
if (typeof languageList[state.cacheTranscriptionLanguage] === "undefined") {
|
||||
state.cacheTranscriptionLanguage = '';
|
||||
}
|
||||
|
||||
// 处理用户信息
|
||||
if (state.userInfo.userid) {
|
||||
state.userId = state.userInfo.userid = $A.runNum(state.userInfo.userid);
|
||||
@ -4290,16 +4283,6 @@ export default {
|
||||
$A.IDBSave('cacheTranslationLanguage', language);
|
||||
},
|
||||
|
||||
/**
|
||||
* 设置语音转文字语言
|
||||
* @param state
|
||||
* @param language
|
||||
*/
|
||||
setTranscriptionLanguage({state}, language) {
|
||||
state.cacheTranscriptionLanguage = language
|
||||
$A.IDBSave('cacheTranscriptionLanguage', language);
|
||||
},
|
||||
|
||||
/** *****************************************************************************************/
|
||||
/** ************************************* loads *********************************************/
|
||||
/** *****************************************************************************************/
|
||||
|
||||
3
resources/assets/js/store/state.js
vendored
3
resources/assets/js/store/state.js
vendored
@ -264,9 +264,6 @@ export default {
|
||||
cacheTranslationLanguage: '',
|
||||
cacheTranslations: [],
|
||||
|
||||
// 语音转文字(识别语言)
|
||||
cacheTranscriptionLanguage: '',
|
||||
|
||||
// 下拉菜单操作
|
||||
menuOperation: {},
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user