ChatDev/entity/configs/edge/edge_processor.py
2026-01-07 16:24:01 +08:00

335 lines
12 KiB
Python
Executable File

"""Edge payload processor configuration dataclasses."""
from dataclasses import dataclass, field, fields, replace
from typing import Any, Dict, Mapping, Type, TypeVar, cast
from entity.enum_options import enum_options_from_values
from utils.function_catalog import get_function_catalog
from utils.function_manager import EDGE_PROCESSOR_FUNCTION_DIR
from schema_registry import (
SchemaLookupError,
get_edge_processor_schema,
iter_edge_processor_schemas,
)
from entity.configs.base import (
BaseConfig,
ChildKey,
ConfigError,
ConfigFieldSpec,
ensure_list,
optional_bool,
optional_str,
require_mapping,
require_str,
extend_path,
)
def _serialize_config(config: BaseConfig) -> Dict[str, Any]:
payload: Dict[str, Any] = {}
for field_obj in fields(config):
if field_obj.name == "path":
continue
payload[field_obj.name] = getattr(config, field_obj.name)
return payload
class EdgeProcessorTypeConfig(BaseConfig):
"""Base helper class for payload processor configs."""
def display_label(self) -> str:
return self.__class__.__name__
def to_external_value(self) -> Any:
return _serialize_config(self)
_NO_MATCH_DESCRIPTIONS = {
"pass": "Leave the payload untouched when no match is found.",
"default": "Apply default_value (or empty string) if nothing matches.",
"drop": "Discard the payload entirely when the regex does not match.",
}
@dataclass
class RegexEdgeProcessorConfig(EdgeProcessorTypeConfig):
"""Configuration for regex-based payload extraction."""
pattern: str = ""
group: str | int | None = None
case_sensitive: bool = True
multiline: bool = False
dotall: bool = False
multiple: bool = False
template: str | None = None
on_no_match: str = "pass"
default_value: str | None = None
FIELD_SPECS = {
"pattern": ConfigFieldSpec(
name="pattern",
display_name="Regex Pattern",
type_hint="str",
required=True,
description="Python regular expression used to extract content.",
),
"group": ConfigFieldSpec(
name="group",
display_name="Capture Group",
type_hint="str",
required=False,
description="Capture group name or index. Defaults to the entire match.",
),
"case_sensitive": ConfigFieldSpec(
name="case_sensitive",
display_name="Case Sensitive",
type_hint="bool",
required=False,
default=True,
description="Whether the regex should be case sensitive.",
),
"multiline": ConfigFieldSpec(
name="multiline",
display_name="Multiline Flag",
type_hint="bool",
required=False,
default=False,
description="Enable multiline mode (re.MULTILINE).",
advance=True,
),
"dotall": ConfigFieldSpec(
name="dotall",
display_name="Dotall Flag",
type_hint="bool",
required=False,
default=False,
description="Enable dotall mode (re.DOTALL).",
advance=True,
),
"multiple": ConfigFieldSpec(
name="multiple",
display_name="Return Multiple Matches",
type_hint="bool",
required=False,
default=False,
description="Whether to collect all matches instead of only the first.",
advance=True,
),
"template": ConfigFieldSpec(
name="template",
display_name="Output Template",
type_hint="str",
required=False,
description="Optional template applied to the extracted value. Use '{match}' placeholder.",
advance=True,
),
"on_no_match": ConfigFieldSpec(
name="on_no_match",
display_name="No Match Behavior",
type_hint="enum",
required=False,
default="pass",
enum=["pass", "default", "drop"],
description="Behavior when no match is found.",
enum_options=enum_options_from_values(
list(_NO_MATCH_DESCRIPTIONS.keys()),
_NO_MATCH_DESCRIPTIONS,
preserve_label_case=True,
),
advance=True,
),
"default_value": ConfigFieldSpec(
name="default_value",
display_name="Default Value",
type_hint="str",
required=False,
description="Fallback content when on_no_match=default.",
advance=True,
),
}
@classmethod
def from_dict(cls, data: Mapping[str, Any], *, path: str) -> "RegexEdgeProcessorConfig":
mapping = require_mapping(data, path)
pattern = require_str(mapping, "pattern", path, allow_empty=False)
group_value = mapping.get("group")
group_normalized: str | int | None = None
if group_value is not None:
if isinstance(group_value, int):
group_normalized = group_value
elif isinstance(group_value, str):
if group_value.isdigit():
group_normalized = int(group_value)
else:
group_normalized = group_value
else:
raise ConfigError("group must be str or int", extend_path(path, "group"))
multiple = optional_bool(mapping, "multiple", path, default=False)
case_sensitive = optional_bool(mapping, "case_sensitive", path, default=True)
multiline = optional_bool(mapping, "multiline", path, default=False)
dotall = optional_bool(mapping, "dotall", path, default=False)
on_no_match = optional_str(mapping, "on_no_match", path) or "pass"
if on_no_match not in {"pass", "default", "drop"}:
raise ConfigError("on_no_match must be pass, default or drop", extend_path(path, "on_no_match"))
template = optional_str(mapping, "template", path)
default_value = optional_str(mapping, "default_value", path)
return cls(
pattern=pattern,
group=group_normalized,
case_sensitive=True if case_sensitive is None else bool(case_sensitive),
multiline=bool(multiline) if multiline is not None else False,
dotall=bool(dotall) if dotall is not None else False,
multiple=bool(multiple) if multiple is not None else False,
template=template,
on_no_match=on_no_match,
default_value=default_value,
path=path,
)
def display_label(self) -> str:
return f"regex({self.pattern})"
@dataclass
class FunctionEdgeProcessorConfig(EdgeProcessorTypeConfig):
"""Configuration for function-based payload processors."""
name: str = ""
FIELD_SPECS = {
"name": ConfigFieldSpec(
name="name",
display_name="Function Name",
type_hint="str",
required=True,
description="Name of the Python function located in functions/edge_processor.",
)
}
@classmethod
def field_specs(cls) -> Dict[str, ConfigFieldSpec]:
specs = super().field_specs()
name_spec = specs.get("name")
if not name_spec:
return specs
catalog = get_function_catalog(EDGE_PROCESSOR_FUNCTION_DIR)
names = catalog.list_function_names()
metadata = catalog.list_metadata()
description = name_spec.description or "Processor function name"
if catalog.load_error:
description = f"{description} (Loading failed: {catalog.load_error})"
elif not names:
description = f"{description} (No processor functions found in functions/edge_processor)"
descriptions = {}
for name in names:
meta = metadata.get(name)
descriptions[name] = (meta.description if meta else None) or "No description provided."
specs["name"] = replace(
name_spec,
enum=names or None,
enum_options=enum_options_from_values(names, descriptions, preserve_label_case=True) if names else None,
description=description,
)
return specs
@classmethod
def from_dict(cls, data: Mapping[str, Any], *, path: str) -> "FunctionEdgeProcessorConfig":
mapping = require_mapping(data, path)
name = require_str(mapping, "name", path, allow_empty=False)
return cls(name=name, path=path)
def display_label(self) -> str:
return self.name or "function"
def to_external_value(self) -> Any:
return {"name": self.name}
TProcessorConfig = TypeVar("TProcessorConfig", bound=EdgeProcessorTypeConfig)
@dataclass
class EdgeProcessorConfig(BaseConfig):
"""Wrapper config storing processor type and payload."""
type: str
config: EdgeProcessorTypeConfig
FIELD_SPECS = {
"type": ConfigFieldSpec(
name="type",
display_name="Processor Type",
type_hint="str",
required=True,
description="Select which processor implementation to use (regex_extract, function, etc.).",
),
"config": ConfigFieldSpec(
name="config",
display_name="Processor Config",
type_hint="object",
required=True,
description="Payload interpreted by the selected processor.",
),
}
@classmethod
def from_dict(cls, data: Any, *, path: str) -> "EdgeProcessorConfig":
if data is None:
raise ConfigError("processor configuration cannot be null", path)
mapping = require_mapping(data, path)
processor_type = require_str(mapping, "type", path)
config_payload = mapping.get("config")
if config_payload is None:
raise ConfigError("processor config is required", extend_path(path, "config"))
try:
schema = get_edge_processor_schema(processor_type)
except SchemaLookupError as exc:
raise ConfigError(f"unknown processor type '{processor_type}'", extend_path(path, "type")) from exc
processor_config = schema.config_cls.from_dict(config_payload, path=extend_path(path, "config"))
return cls(type=processor_type, config=processor_config, path=path)
@classmethod
def child_routes(cls) -> Dict[ChildKey, Type[BaseConfig]]:
return {
ChildKey(field="config", value=name): schema.config_cls
for name, schema in iter_edge_processor_schemas().items()
}
@classmethod
def field_specs(cls) -> Dict[str, ConfigFieldSpec]:
specs = super().field_specs()
type_spec = specs.get("type")
if type_spec:
registrations = iter_edge_processor_schemas()
names = list(registrations.keys())
descriptions = {name: schema.summary for name, schema in registrations.items()}
specs["type"] = replace(
type_spec,
enum=names,
enum_options=enum_options_from_values(names, descriptions, preserve_label_case=True),
)
return specs
def display_label(self) -> str:
return self.config.display_label()
def to_external_value(self) -> Any:
return {
"type": self.type,
"config": self.config.to_external_value(),
}
def as_config(self, expected_type: Type[TProcessorConfig]) -> TProcessorConfig | None:
config = self.config
if isinstance(config, expected_type):
return cast(TProcessorConfig, config)
return None