mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-25 11:18:22 +00:00
fix(ui): avoid over-segmenting cjk messages (#1726)
This commit is contained in:
parent
8128a3bc57
commit
952059eb51
@ -3,6 +3,9 @@ import { useMemo } from "react";
|
||||
import { visit } from "unist-util-visit";
|
||||
import type { BuildVisitor } from "unist-util-visit";
|
||||
|
||||
const CJK_TEXT_RE =
|
||||
/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
|
||||
|
||||
export function rehypeSplitWordsIntoSpans() {
|
||||
return (tree: Root) => {
|
||||
visit(tree, "element", ((node: Element) => {
|
||||
@ -15,6 +18,10 @@ export function rehypeSplitWordsIntoSpans() {
|
||||
const newChildren: Array<ElementContent> = [];
|
||||
node.children.forEach((child) => {
|
||||
if (child.type === "text") {
|
||||
if (CJK_TEXT_RE.test(child.value)) {
|
||||
newChildren.push(child);
|
||||
return;
|
||||
}
|
||||
const segmenter = new Intl.Segmenter("zh", { granularity: "word" });
|
||||
const segments = segmenter.segment(child.value);
|
||||
const words = Array.from(segments)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user