dootask/language/translate.php
2025-11-14 01:17:48 +00:00

348 lines
12 KiB
PHP
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php
@error_reporting(E_ALL & ~E_NOTICE & ~E_WARNING);
require __DIR__ . '/vendor/autoload.php';
use Orhanerday\OpenAi\OpenAi;
// 读取 .env 文件的简单工具函数
function language_parse_env_file(string $path): array
{
$env = [];
$lines = @file($path, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);
if ($lines === false) {
return $env;
}
foreach ($lines as $line) {
$line = trim($line);
if ($line === '' || $line[0] === '#') {
continue;
}
$delimiterPosition = strpos($line, '=');
if ($delimiterPosition === false) {
continue;
}
$name = trim(substr($line, 0, $delimiterPosition));
if (strpos($name, 'export ') === 0) {
$name = trim(substr($name, 7));
}
if ($name === '') {
continue;
}
$value = trim(substr($line, $delimiterPosition + 1));
$length = strlen($value);
if ($length >= 2) {
$first = $value[0];
$last = $value[$length - 1];
if (($first === '"' && $last === '"') || ($first === "'" && $last === "'")) {
$value = substr($value, 1, $length - 2);
}
}
$env[$name] = $value;
}
return $env;
}
// 获取环境变量值的简单工具函数
function language_env_value(string $key, array $env): ?string
{
if (array_key_exists($key, $env)) {
return $env[$key];
}
$value = getenv($key);
if ($value !== false) {
return $value;
}
return null;
}
// 读取语言环境配置
$languageEnvFile = dirname(__DIR__) . '/.env';
$languageEnv = is_readable($languageEnvFile) ? language_parse_env_file($languageEnvFile) : [];
// 优先从 .env 读取 OPENAI 配置,未找到时再次尝试 getenv 覆盖
$openAiKey = trim(language_env_value('OPENAI_API_KEY', $languageEnv) ?? '');
if ($openAiKey === '') {
fwrite(STDERR, "OPENAI_API_KEY 未设置,请在项目根目录的 .env 中配置。\n");
exit(1);
}
$openAiProxy = trim(language_env_value('OPENAI_PROXY_URL', $languageEnv) ?? '');
// 读取所有要翻译的内容
$originals = [];
$generateds = [];
foreach (['web', 'api'] as $type) {
$content = file_exists("original-{$type}.txt") ? file_get_contents("original-{$type}.txt") : "";
$array = array_values(array_filter(array_unique(explode("\n", $content))));
$generateds[$type] = $array;
$originals = array_merge($originals, $array);
}
// 判定是否存在translate.json文件
if (!file_exists("translate.json")) {
print_r("translate.json not exists");
exit;
}
$translations = []; // 翻译数据
$regrror = []; // 正则匹配错误的数据
$redundants = []; // 多余的数据
$needs = []; // 需要翻译的数据
// 读取翻译数据
$tmps = json_decode(file_get_contents("translate.json"), true);
foreach ($tmps as $obj) {
if (!isset($obj['key'])) {
continue;
}
$currentKey = $obj['key'];
$originalKey = preg_replace(["/\(%T\d+\)/", "/\(%M\d+\)/"], ["(*)", "(**)"], $currentKey);
$translations[$originalKey] = $obj;
if (!in_array($originalKey, $originals)) {
unset($translations[$originalKey]);
$redundants[$originalKey] = $obj;
continue;
}
if (preg_match_all('/\(%[TM]\d+\)/', $currentKey, $matches)) {
foreach ($matches[0] as $match) {
foreach ($obj as $k => $v) {
if (empty($v)) {
continue;
}
if (!str_contains($v, $match)) {
// 正则匹配错误
$regrror[$originalKey] = [
$k => $v,
'match' => $match,
'key' => $currentKey,
];
continue 2;
}
}
}
}
}
if (count($regrror) > 0) {
print_r("正则匹配错误的数据:\n");
print_r($regrror);
exit();
}
if (count($redundants) > 0) {
print_r("多余的数据:\n");
print_r(implode(", ", array_keys($redundants)) . "\n\n");
}
// 需要翻译的数据
foreach ($originals as $text) {
$key = trim($text);
if (!isset($translations[$key])) {
$needs[$key] = $key;
}
}
if (count($needs) > 0) {
$array = array_chunk($needs, 10, true);
$success = [];
$error = [];
$done = 0;
foreach ($array as $index => $keys) {
// 生成翻译内容
foreach ($keys as &$key) {
$c = 1;
$key = preg_replace_callback('/\((\*+)\)/', function ($m) use (&$c) {
$label = strlen($m[1]) > 1 ? "M" : "T";
return "(%" . $label . $c++ . ")";
}, $key);
}
$content = implode("\n", $keys);
// 开始翻译
print_r("正在翻译:" . (count($keys) + $done) . "/" . count($needs) . "...\n");
$openAi = new OpenAi($openAiKey);
if ($openAiProxy !== '') {
$openAi->setProxy($openAiProxy);
}
$result = $openAi->chat([
"model" => "gpt-5.1",
"reasoning_effort" => "low",
'messages' => [
[
"role" => "system",
"content" => <<<EOF
你是一个专业的翻译器,翻译的结果尽量符合 “项目任务管理系统” 的使用,请将提供的内容按每行一个翻译成:
```json
[
{
"key": "", // 原文本
"zh": "", // 留空(不用翻译)
"zh-CHT": "", // 繁体中文
"en": "", // 英语
"ko": "", // 韩语
"ja": "", // 日语
"de": "", // 德语
"fr": "", // 法语
"id": "", // 印度尼西亚语
"ru": "" // 俄语
}
]
```
请注意:(%T1)(%T2)(%T3)(%M1)(%M2) ...... 这类以 `小括号(%+内容)` 的字符组合是一个变量,翻译时请保留。
例子1
原文:此(%T1)已经处于【(%T2)】共享文件夹中,无法重复共享。
翻译成英语This (%T1) is already in the (%T2) shared folder and cannot be shared again。
例子2
原文:(%T1)的周报[(%T2)][(%T3)月第(%T4)]
翻译成英语Weekly report of (%T1) [(%T2)] [(Week (%T4) of (%T3) month)]
例子3
原文:(%T1)提交的「(%M2)」待你审批
翻译成英语:'(%M2)' submitted by (%T1) is waiting for your approval
例子4
原文:您发起的「(%M1)」已通过
翻译成英语The '(%M1)' you initiated has been approved
EOF,
],
[
"role" => "user",
"content" => $content,
],
]
]);
// 处理结果
$obj = json_decode($result);
$txt = preg_replace('/(^\s*```json\s*|\s*```\s*$)/', "", $obj->choices[0]->message->content);
$txt = preg_replace('/\(([TM]\d+)\)/', '(%$1)', $txt);
$arr = json_decode($txt, true);
if (!$arr || !is_array($arr)) {
$error = array_merge($error, array_flip($keys));
print_r("翻译失败:\n" . $content . "\n\n");
file_put_contents("translate-gpt.log", json_encode($obj, JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT) . "\n\n", FILE_APPEND);
continue;
}
// 验证结果
foreach ($arr as $item) {
if (empty($item['key'])) {
print_r("翻译结果不符合规范key为空。\n");
print_r($item);
continue;
}
foreach (['key', 'zh', 'zh-CHT', 'en', 'ko', 'ja', 'de', 'fr', 'id', 'ru'] as $lang) {
if (!isset($item[$lang])) {
print_r("翻译结果不符合规范:{$item['key']},缺少:{$lang} 的值。\n");
continue 2;
}
}
$currentKey = $item['key'];
$originalKey = preg_replace(["/\(%T\d+\)/", "/\(%M\d+\)/"], ["(*)", "(**)"], $currentKey);
if (preg_match_all('/\(%[TM]\d+\)/', $currentKey, $matches)) {
foreach ($matches[0] as $match) {
foreach ($item as $k => $v) {
if (empty($v)) {
continue;
}
if (!str_contains($v, $match)) {
// 正则匹配错误
$error[$originalKey] = [
'key' => $currentKey,
$k => $v,
'match' => $match,
];
continue 3;
}
}
}
}
$item['zh'] = "";
$translations[$originalKey] = $item;
$success[$originalKey] = $item;
}
print_r("翻译完成:" . (count($keys) + $done) . "/" . count($needs) . "\n\n");
$done += count($keys);
}
if (count($error) > 0) {
print_r("正则匹配错误的数据:\n");
print_r(json_encode(array_values($error), JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT) . "\n\n");
}
// 保存翻译结果
file_put_contents("translate.json", json_encode(array_values($translations), JSON_UNESCAPED_UNICODE | JSON_PRETTY_PRINT));
print_r("----------------\n\n");
print_r("总翻译:" . count($needs) . "\n");
print_r("成功:" . count($success) . "\n");
print_r("错误:" . count($error) . "\n\n");
print_r("----------------\n\n");
}
// 生成前端使用的文件
foreach ($generateds as $type => $array) {
$datas = [];
foreach ($array as $text) {
$text = trim($text);
if (isset($translations[$text])) {
$datas[] = $translations[$text];
}
}
// 按长度排序
$inOrder = [];
foreach ($datas as $index => $item) {
if (preg_match('/\(%[TM]\d+\)/', $item['key'])) {
$inOrder[$index] = strlen($item['key']);
} else {
$inOrder[$index] = strlen($item['key']) + 10000000000;
}
}
array_multisort($inOrder, SORT_DESC, $datas);
// 合成数组
$results = [];
$index = 0;
foreach ($datas as $items) {
foreach ($items as $kk => $item) {
if (!isset($results)) {
$results[$kk] = [];
}
$results[$kk][] = $item;
}
}
// 生成文件
if ($type === 'api') {
if (!is_dir("../public/language/api")) {
mkdir("../public/language/api", 0777, true);
}
foreach ($results as $kk => $item) {
$file = "../public/language/api/$kk.json";
file_put_contents($file, json_encode($item, JSON_UNESCAPED_UNICODE));
}
} elseif ($type === 'web') {
if (!is_dir("../public/language/web")) {
mkdir("../public/language/web", 0777, true);
}
foreach ($results as $kk => $item) {
$file = "../public/language/web/$kk.js";
file_put_contents($file, "if(typeof window.LANGUAGE_DATA===\"undefined\")window.LANGUAGE_DATA={};window.LANGUAGE_DATA[\"{$kk}\"]=" . json_encode($item, JSON_UNESCAPED_UNICODE));
}
}
print_r("[$type] total: " . count($results['key']) . "\n");
}
print_r("\n任务结束\n");