mirror of
https://github.com/bytedance/deer-flow.git
synced 2026-04-25 11:18:22 +00:00
fix(ui): avoid over-segmenting cjk messages (#1726)
This commit is contained in:
parent
8128a3bc57
commit
952059eb51
@ -3,6 +3,9 @@ import { useMemo } from "react";
|
|||||||
import { visit } from "unist-util-visit";
|
import { visit } from "unist-util-visit";
|
||||||
import type { BuildVisitor } from "unist-util-visit";
|
import type { BuildVisitor } from "unist-util-visit";
|
||||||
|
|
||||||
|
const CJK_TEXT_RE =
|
||||||
|
/[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}]/u;
|
||||||
|
|
||||||
export function rehypeSplitWordsIntoSpans() {
|
export function rehypeSplitWordsIntoSpans() {
|
||||||
return (tree: Root) => {
|
return (tree: Root) => {
|
||||||
visit(tree, "element", ((node: Element) => {
|
visit(tree, "element", ((node: Element) => {
|
||||||
@ -15,6 +18,10 @@ export function rehypeSplitWordsIntoSpans() {
|
|||||||
const newChildren: Array<ElementContent> = [];
|
const newChildren: Array<ElementContent> = [];
|
||||||
node.children.forEach((child) => {
|
node.children.forEach((child) => {
|
||||||
if (child.type === "text") {
|
if (child.type === "text") {
|
||||||
|
if (CJK_TEXT_RE.test(child.value)) {
|
||||||
|
newChildren.push(child);
|
||||||
|
return;
|
||||||
|
}
|
||||||
const segmenter = new Intl.Segmenter("zh", { granularity: "word" });
|
const segmenter = new Intl.Segmenter("zh", { granularity: "word" });
|
||||||
const segments = segmenter.segment(child.value);
|
const segments = segmenter.segment(child.value);
|
||||||
const words = Array.from(segments)
|
const words = Array.from(segments)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user