mirror of
https://github.com/OpenBMB/ChatDev.git
synced 2026-04-25 11:18:06 +00:00
335 lines
12 KiB
Python
Executable File
335 lines
12 KiB
Python
Executable File
"""Edge payload processor configuration dataclasses."""
|
|
|
|
from dataclasses import dataclass, field, fields, replace
|
|
from typing import Any, Dict, Mapping, Type, TypeVar, cast
|
|
|
|
from entity.enum_options import enum_options_from_values
|
|
from utils.function_catalog import get_function_catalog
|
|
from utils.function_manager import EDGE_PROCESSOR_FUNCTION_DIR
|
|
from schema_registry import (
|
|
SchemaLookupError,
|
|
get_edge_processor_schema,
|
|
iter_edge_processor_schemas,
|
|
)
|
|
from entity.configs.base import (
|
|
BaseConfig,
|
|
ChildKey,
|
|
ConfigError,
|
|
ConfigFieldSpec,
|
|
ensure_list,
|
|
optional_bool,
|
|
optional_str,
|
|
require_mapping,
|
|
require_str,
|
|
extend_path,
|
|
)
|
|
|
|
|
|
def _serialize_config(config: BaseConfig) -> Dict[str, Any]:
|
|
payload: Dict[str, Any] = {}
|
|
for field_obj in fields(config):
|
|
if field_obj.name == "path":
|
|
continue
|
|
payload[field_obj.name] = getattr(config, field_obj.name)
|
|
return payload
|
|
|
|
|
|
class EdgeProcessorTypeConfig(BaseConfig):
|
|
"""Base helper class for payload processor configs."""
|
|
|
|
def display_label(self) -> str:
|
|
return self.__class__.__name__
|
|
|
|
def to_external_value(self) -> Any:
|
|
return _serialize_config(self)
|
|
|
|
|
|
|
|
|
|
_NO_MATCH_DESCRIPTIONS = {
|
|
"pass": "Leave the payload untouched when no match is found.",
|
|
"default": "Apply default_value (or empty string) if nothing matches.",
|
|
"drop": "Discard the payload entirely when the regex does not match.",
|
|
}
|
|
|
|
|
|
@dataclass
|
|
class RegexEdgeProcessorConfig(EdgeProcessorTypeConfig):
|
|
"""Configuration for regex-based payload extraction."""
|
|
|
|
pattern: str = ""
|
|
group: str | int | None = None
|
|
case_sensitive: bool = True
|
|
multiline: bool = False
|
|
dotall: bool = False
|
|
multiple: bool = False
|
|
template: str | None = None
|
|
on_no_match: str = "pass"
|
|
default_value: str | None = None
|
|
|
|
FIELD_SPECS = {
|
|
"pattern": ConfigFieldSpec(
|
|
name="pattern",
|
|
display_name="Regex Pattern",
|
|
type_hint="str",
|
|
required=True,
|
|
description="Python regular expression used to extract content.",
|
|
),
|
|
"group": ConfigFieldSpec(
|
|
name="group",
|
|
display_name="Capture Group",
|
|
type_hint="str",
|
|
required=False,
|
|
description="Capture group name or index. Defaults to the entire match.",
|
|
),
|
|
"case_sensitive": ConfigFieldSpec(
|
|
name="case_sensitive",
|
|
display_name="Case Sensitive",
|
|
type_hint="bool",
|
|
required=False,
|
|
default=True,
|
|
description="Whether the regex should be case sensitive.",
|
|
),
|
|
"multiline": ConfigFieldSpec(
|
|
name="multiline",
|
|
display_name="Multiline Flag",
|
|
type_hint="bool",
|
|
required=False,
|
|
default=False,
|
|
description="Enable multiline mode (re.MULTILINE).",
|
|
advance=True,
|
|
),
|
|
"dotall": ConfigFieldSpec(
|
|
name="dotall",
|
|
display_name="Dotall Flag",
|
|
type_hint="bool",
|
|
required=False,
|
|
default=False,
|
|
description="Enable dotall mode (re.DOTALL).",
|
|
advance=True,
|
|
),
|
|
"multiple": ConfigFieldSpec(
|
|
name="multiple",
|
|
display_name="Return Multiple Matches",
|
|
type_hint="bool",
|
|
required=False,
|
|
default=False,
|
|
description="Whether to collect all matches instead of only the first.",
|
|
advance=True,
|
|
),
|
|
|
|
"template": ConfigFieldSpec(
|
|
name="template",
|
|
display_name="Output Template",
|
|
type_hint="str",
|
|
required=False,
|
|
description="Optional template applied to the extracted value. Use '{match}' placeholder.",
|
|
advance=True,
|
|
),
|
|
"on_no_match": ConfigFieldSpec(
|
|
name="on_no_match",
|
|
display_name="No Match Behavior",
|
|
type_hint="enum",
|
|
required=False,
|
|
default="pass",
|
|
enum=["pass", "default", "drop"],
|
|
description="Behavior when no match is found.",
|
|
enum_options=enum_options_from_values(
|
|
list(_NO_MATCH_DESCRIPTIONS.keys()),
|
|
_NO_MATCH_DESCRIPTIONS,
|
|
preserve_label_case=True,
|
|
),
|
|
advance=True,
|
|
),
|
|
"default_value": ConfigFieldSpec(
|
|
name="default_value",
|
|
display_name="Default Value",
|
|
type_hint="str",
|
|
required=False,
|
|
description="Fallback content when on_no_match=default.",
|
|
advance=True,
|
|
),
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Mapping[str, Any], *, path: str) -> "RegexEdgeProcessorConfig":
|
|
mapping = require_mapping(data, path)
|
|
pattern = require_str(mapping, "pattern", path, allow_empty=False)
|
|
group_value = mapping.get("group")
|
|
group_normalized: str | int | None = None
|
|
if group_value is not None:
|
|
if isinstance(group_value, int):
|
|
group_normalized = group_value
|
|
elif isinstance(group_value, str):
|
|
if group_value.isdigit():
|
|
group_normalized = int(group_value)
|
|
else:
|
|
group_normalized = group_value
|
|
else:
|
|
raise ConfigError("group must be str or int", extend_path(path, "group"))
|
|
multiple = optional_bool(mapping, "multiple", path, default=False)
|
|
case_sensitive = optional_bool(mapping, "case_sensitive", path, default=True)
|
|
multiline = optional_bool(mapping, "multiline", path, default=False)
|
|
dotall = optional_bool(mapping, "dotall", path, default=False)
|
|
on_no_match = optional_str(mapping, "on_no_match", path) or "pass"
|
|
if on_no_match not in {"pass", "default", "drop"}:
|
|
raise ConfigError("on_no_match must be pass, default or drop", extend_path(path, "on_no_match"))
|
|
|
|
template = optional_str(mapping, "template", path)
|
|
default_value = optional_str(mapping, "default_value", path)
|
|
|
|
return cls(
|
|
pattern=pattern,
|
|
group=group_normalized,
|
|
case_sensitive=True if case_sensitive is None else bool(case_sensitive),
|
|
multiline=bool(multiline) if multiline is not None else False,
|
|
dotall=bool(dotall) if dotall is not None else False,
|
|
multiple=bool(multiple) if multiple is not None else False,
|
|
template=template,
|
|
on_no_match=on_no_match,
|
|
default_value=default_value,
|
|
path=path,
|
|
)
|
|
|
|
def display_label(self) -> str:
|
|
return f"regex({self.pattern})"
|
|
|
|
|
|
@dataclass
|
|
class FunctionEdgeProcessorConfig(EdgeProcessorTypeConfig):
|
|
"""Configuration for function-based payload processors."""
|
|
|
|
name: str = ""
|
|
|
|
FIELD_SPECS = {
|
|
"name": ConfigFieldSpec(
|
|
name="name",
|
|
display_name="Function Name",
|
|
type_hint="str",
|
|
required=True,
|
|
description="Name of the Python function located in functions/edge_processor.",
|
|
)
|
|
}
|
|
|
|
@classmethod
|
|
def field_specs(cls) -> Dict[str, ConfigFieldSpec]:
|
|
specs = super().field_specs()
|
|
name_spec = specs.get("name")
|
|
if not name_spec:
|
|
return specs
|
|
|
|
catalog = get_function_catalog(EDGE_PROCESSOR_FUNCTION_DIR)
|
|
names = catalog.list_function_names()
|
|
metadata = catalog.list_metadata()
|
|
description = name_spec.description or "Processor function name"
|
|
if catalog.load_error:
|
|
description = f"{description} (Loading failed: {catalog.load_error})"
|
|
elif not names:
|
|
description = f"{description} (No processor functions found in functions/edge_processor)"
|
|
|
|
descriptions = {}
|
|
for name in names:
|
|
meta = metadata.get(name)
|
|
descriptions[name] = (meta.description if meta else None) or "No description provided."
|
|
|
|
specs["name"] = replace(
|
|
name_spec,
|
|
enum=names or None,
|
|
enum_options=enum_options_from_values(names, descriptions, preserve_label_case=True) if names else None,
|
|
description=description,
|
|
)
|
|
return specs
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Mapping[str, Any], *, path: str) -> "FunctionEdgeProcessorConfig":
|
|
mapping = require_mapping(data, path)
|
|
name = require_str(mapping, "name", path, allow_empty=False)
|
|
return cls(name=name, path=path)
|
|
|
|
def display_label(self) -> str:
|
|
return self.name or "function"
|
|
|
|
def to_external_value(self) -> Any:
|
|
return {"name": self.name}
|
|
|
|
|
|
TProcessorConfig = TypeVar("TProcessorConfig", bound=EdgeProcessorTypeConfig)
|
|
|
|
|
|
@dataclass
|
|
class EdgeProcessorConfig(BaseConfig):
|
|
"""Wrapper config storing processor type and payload."""
|
|
|
|
type: str
|
|
config: EdgeProcessorTypeConfig
|
|
|
|
FIELD_SPECS = {
|
|
"type": ConfigFieldSpec(
|
|
name="type",
|
|
display_name="Processor Type",
|
|
type_hint="str",
|
|
required=True,
|
|
description="Select which processor implementation to use (regex_extract, function, etc.).",
|
|
),
|
|
"config": ConfigFieldSpec(
|
|
name="config",
|
|
display_name="Processor Config",
|
|
type_hint="object",
|
|
required=True,
|
|
description="Payload interpreted by the selected processor.",
|
|
),
|
|
}
|
|
|
|
@classmethod
|
|
def from_dict(cls, data: Any, *, path: str) -> "EdgeProcessorConfig":
|
|
if data is None:
|
|
raise ConfigError("processor configuration cannot be null", path)
|
|
mapping = require_mapping(data, path)
|
|
processor_type = require_str(mapping, "type", path)
|
|
config_payload = mapping.get("config")
|
|
if config_payload is None:
|
|
raise ConfigError("processor config is required", extend_path(path, "config"))
|
|
try:
|
|
schema = get_edge_processor_schema(processor_type)
|
|
except SchemaLookupError as exc:
|
|
raise ConfigError(f"unknown processor type '{processor_type}'", extend_path(path, "type")) from exc
|
|
processor_config = schema.config_cls.from_dict(config_payload, path=extend_path(path, "config"))
|
|
return cls(type=processor_type, config=processor_config, path=path)
|
|
|
|
@classmethod
|
|
def child_routes(cls) -> Dict[ChildKey, Type[BaseConfig]]:
|
|
return {
|
|
ChildKey(field="config", value=name): schema.config_cls
|
|
for name, schema in iter_edge_processor_schemas().items()
|
|
}
|
|
|
|
@classmethod
|
|
def field_specs(cls) -> Dict[str, ConfigFieldSpec]:
|
|
specs = super().field_specs()
|
|
type_spec = specs.get("type")
|
|
if type_spec:
|
|
registrations = iter_edge_processor_schemas()
|
|
names = list(registrations.keys())
|
|
descriptions = {name: schema.summary for name, schema in registrations.items()}
|
|
specs["type"] = replace(
|
|
type_spec,
|
|
enum=names,
|
|
enum_options=enum_options_from_values(names, descriptions, preserve_label_case=True),
|
|
)
|
|
return specs
|
|
|
|
def display_label(self) -> str:
|
|
return self.config.display_label()
|
|
|
|
def to_external_value(self) -> Any:
|
|
return {
|
|
"type": self.type,
|
|
"config": self.config.to_external_value(),
|
|
}
|
|
|
|
def as_config(self, expected_type: Type[TProcessorConfig]) -> TProcessorConfig | None:
|
|
config = self.config
|
|
if isinstance(config, expected_type):
|
|
return cast(TProcessorConfig, config)
|
|
return None
|