dootask/language/translate-gpt.php
2024-10-29 19:02:23 +08:00

145 lines
5.1 KiB
PHP
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
@error_reporting(E_ALL & ~E_NOTICE & ~E_WARNING);
require __DIR__ . '/vendor/autoload.php';
use Orhanerday\OpenAi\OpenAi;
require_once("config.php");
// 读取所有要翻译的内容
$array = [];
foreach (['api', 'web'] as $type) {
$content = file_exists("original-{$type}.txt") ? file_get_contents("original-{$type}.txt") : "";
$array = array_merge($array, array_values(array_filter(array_unique(explode("\n", $content)))));
}
// 判定是否存在translate.json文件
if (!file_exists("translate.json")) {
print_r("translate.json not exists");
exit;
}
$translations = []; // 翻译数据
$regrror = []; // 正则匹配错误的数据
$redundants = []; // 多余的数据
$needs = []; // 需要翻译的数据
$tmps = json_decode(file_get_contents("translate.json"), true);
foreach ($tmps as $tmp) {
if (!isset($tmp['key'])) {
continue;
}
$key = $tmp['key'];
$translations[$key] = $tmp;
if (in_array($key, $array)) {
$count = substr_count($key, '(*)');
if ($count > 0) {
foreach ($tmp as $k => $v) {
if ($k == 'zh' || $k == 'key') {
continue;
}
if ($count != substr_count($v, '(*)')) {
$regrror[$key] = $tmp;
continue 2;
}
}
}
} else {
$redundants[$key] = $tmp;
}
}
foreach ($array as $text) {
$key = trim($text);
if (!isset($translations[$key])) {
$needs[$key] = $key;
}
}
if (count($needs) > 0) {
$waits = array_chunk($needs, 100, true);
foreach ($waits as $index => $items) {
if ($index > 0) {
print_r("\n");
}
print_r("正在翻译:" . count($items) . "/" . count($needs) . "\n");
$content = implode("\n", $items);
$open_ai = new OpenAi(OPEN_AI_KEY);
$open_ai->setProxy(OPEN_AI_PROXY);
$chat = $open_ai->chat([
'model' => 'gpt-4o',
'messages' => [
[
"role" => "system",
"content" => <<<EOF
你是一个专业的翻译器,翻译的结果尽量符合“项目任务管理系统”的使用,将提供的文本按每行一个翻译成:
```json
[
{
"key": "",
"zh": "",
"zh-CHT": "",
"en": "",
"ko": "",
"ja": "",
"de": "",
"fr": "",
"id": "",
"ru": ""
}
]
```
key原文本zh留空(不用翻译)zh-CHT繁体中文en英语ko韩语ja日语de德语fr法语id印度尼西亚语ru俄语。
另外要注意的是其中的(*)为占位符翻译时不要删除也不要翻译这个占位符。请直接返回文本不需要使用markdown。
EOF,
],
[
"role" => "user",
"content" => $content,
],
],
'temperature' => 1.0,
'max_tokens' => 4000,
'frequency_penalty' => 0,
'presence_penalty' => 0,
]);
$obj = json_decode($chat);
$arr = json_decode($obj->choices[0]->message->content, true);
if (!$arr || !is_array($arr)) {
print_r("翻译失败:\n");
print_r($content . "\n");
continue;
}
foreach ($arr as $item) {
foreach (['key', 'zh', 'zh-CHT', 'en', 'ko', 'ja', 'de', 'fr', 'id', 'ru'] as $lang) {
if (!isset($item[$lang])) {
print_r("翻译结果不符合规范:{$item['key']},缺少:{$lang}\n");
continue 2;
}
}
if (empty($item['key'])) {
print_r("翻译结果不符合规范:{$item['key']}key为空\n");
continue;
}
$count = substr_count($item['key'], '(*)');
if ($count > 0) {
foreach ($item as $k => $v) {
if ($k == 'zh' || $k == 'key') {
continue;
}
if ($count != substr_count($v, '(*)')) {
print_r("翻译结果不符合规范:{$item['key']},正则匹配错误:{$k} => {$v}\n");
continue 2;
}
}
}
$item['zh'] = "";
$translations[$item['key']] = $item;
}
file_put_contents("translate.json", json_encode(array_values($translations), JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
print_r("翻译完成:" . count($items) . "/" . count($needs) . "\n");
}
}