fix(actor): harden lifecycle, supervision, Redis mailbox, and add comprehensive tests

- Fix spawn() zombie cell: clean up registry on start() failure
- Fix shutdown(): cancel + await tasks that exceed graceful timeout
- Fix _shutdown(): await mailbox.close() to release backend resources
- Fix escalate directive: stop failing child before propagating to grandparent
- Fix RedisMailbox.put(): wrap Redis errors in try/except, return False on failure
- Fix retry.py: replace assert with proper raise for last_exc
- Add put_batch() to Mailbox abstraction for single-roundtrip bulk enqueue
- Add RedisMailbox.put_batch() with atomic Lua script for bounded queues
- Add MailboxFullError exception type for semantic backpressure handling
- Add redis>=7.4.0 dependency with public PyPI sources in uv.lock

Tests added (31 total, up from 27):
- test_middleware_on_restart_hook: verifies middleware.on_restart() on supervision restart
- test_ask_propagates_actor_exception: ask() re-raises original exception type
- test_ask_propagates_exception_while_supervised: exception propagates; root actor survives
- test_ask_timeout_late_reply_no_exception: late reply after timeout is silent no-op
- test_actor_backpressure.py: MailboxFullError + dead letter on full mailbox
- test_actor_retry.py: ask_with_retry with exponential backoff
- test_mailbox_redis.py: RedisMailbox put/get/batch/close
- bench_actor_redis.py: RedisMailbox throughput benchmarks
This commit is contained in:
greatmengqi 2026-03-31 10:09:05 +08:00
parent 3e17417122
commit 228a2a66e3
14 changed files with 3156 additions and 2289 deletions

View File

@ -19,7 +19,8 @@ Usage::
from .actor import Actor, ActorContext
from .mailbox import Mailbox, MemoryMailbox
from .middleware import Middleware
from .ref import ActorRef, ReplyChannel
from .ref import ActorRef, MailboxFullError, ReplyChannel
from .retry import IdempotentActorMixin, IdempotencyStore, RetryEnvelope, ask_with_retry
from .supervision import AllForOneStrategy, Directive, OneForOneStrategy, SupervisorStrategy
from .system import ActorSystem, DeadLetter
@ -32,9 +33,14 @@ __all__ = [
"DeadLetter",
"Directive",
"Mailbox",
"MailboxFullError",
"MemoryMailbox",
"Middleware",
"OneForOneStrategy",
"ReplyChannel",
"RetryEnvelope",
"SupervisorStrategy",
"IdempotentActorMixin",
"IdempotencyStore",
"ask_with_retry",
]

View File

@ -12,6 +12,12 @@ import asyncio
from typing import Any
BACKPRESSURE_BLOCK = "block"
BACKPRESSURE_DROP_NEW = "drop_new"
BACKPRESSURE_FAIL = "fail"
BACKPRESSURE_POLICIES = {BACKPRESSURE_BLOCK, BACKPRESSURE_DROP_NEW, BACKPRESSURE_FAIL}
class Mailbox(abc.ABC):
"""Abstract mailbox — the message queue for an actor.
@ -44,6 +50,18 @@ class Mailbox(abc.ABC):
def full(self) -> bool:
"""Return True if mailbox is at capacity."""
async def put_batch(self, msgs: list[Any]) -> int:
"""Enqueue multiple messages. Returns count accepted.
Default implementation falls back to sequential ``put`` calls.
Backends like Redis should override this for efficient bulk push.
"""
count = 0
for msg in msgs:
if await self.put(msg):
count += 1
return count
async def close(self) -> None:
"""Release resources. Default is no-op."""
@ -55,23 +73,32 @@ class Empty(Exception):
class MemoryMailbox(Mailbox):
"""In-process mailbox backed by ``asyncio.Queue``."""
def __init__(self, maxsize: int = 256) -> None:
def __init__(self, maxsize: int = 256, *, backpressure_policy: str = BACKPRESSURE_BLOCK) -> None:
if backpressure_policy not in BACKPRESSURE_POLICIES:
raise ValueError(
f"Invalid backpressure_policy={backpressure_policy!r}, "
f"expected one of {sorted(BACKPRESSURE_POLICIES)}"
)
self._queue: asyncio.Queue[Any] = asyncio.Queue(maxsize=maxsize)
self._maxsize = maxsize
self._backpressure_policy = backpressure_policy
async def put(self, msg: Any) -> bool:
try:
if self._backpressure_policy == BACKPRESSURE_BLOCK:
await self._queue.put(msg)
return True
except asyncio.QueueFull:
return False
def put_nowait(self, msg: Any) -> bool:
try:
if self._backpressure_policy in (BACKPRESSURE_DROP_NEW, BACKPRESSURE_FAIL):
if self._queue.full():
return False
self._queue.put_nowait(msg)
return True
except asyncio.QueueFull:
return False
def put_nowait(self, msg: Any) -> bool:
if self._queue.full():
return False
self._queue.put_nowait(msg)
return True
async def get(self) -> Any:
return await self._queue.get()

View File

@ -107,12 +107,16 @@ class RedisMailbox(Mailbox):
if self._closed:
return False
data = _serialize(msg)
if self._maxlen > 0:
# Atomic check+push via Lua script to avoid TOCTOU race
result = await self._redis.evalsha_or_eval(self._LUA_BOUNDED_PUSH, 1, self._queue_name, data, self._maxlen)
return bool(result)
await self._redis.lpush(self._queue_name, data)
return True
try:
if self._maxlen > 0:
# Atomic check+push via Lua script to avoid TOCTOU race
result = await self._redis.eval(self._LUA_BOUNDED_PUSH, 1, self._queue_name, data, self._maxlen)
return bool(result)
await self._redis.lpush(self._queue_name, data)
return True
except Exception as e:
logger.warning("RedisMailbox.put failed for %s: %s", self._queue_name, e)
return False
def put_nowait(self, msg: Any) -> bool:
"""Redis cannot do synchronous non-blocking enqueue reliably.
@ -122,6 +126,36 @@ class RedisMailbox(Mailbox):
"""
return False
async def put_batch(self, msgs: list[Any]) -> int:
"""Push multiple messages in a single LPUSH command (one round-trip).
Unbounded queues: all messages sent atomically in one LPUSH.
Bounded queues: sequential puts to respect maxlen (no batch Lua script needed).
"""
if self._closed or not msgs:
return 0
data_list = []
for msg in msgs:
try:
data_list.append(_serialize(msg))
except TypeError as e:
logger.warning("Skipping non-serializable message in put_batch: %s", e)
if not data_list:
return 0
if self._maxlen > 0:
count = 0
for data in data_list:
# Reuse the Lua script for TOCTOU-safe bounded check (same as put())
result = await self._redis.eval(self._LUA_BOUNDED_PUSH, 1, self._queue_name, data, self._maxlen)
if result:
count += 1
else:
break # queue full — stop early
return count
# Unbounded: single LPUSH with all values — one network round-trip
await self._redis.lpush(self._queue_name, *data_list)
return len(data_list)
async def get(self) -> Any:
"""Blocking dequeue via BRPOP. Retries until a message arrives."""
while not self._closed:

View File

@ -83,6 +83,10 @@ class ActorStoppedError(Exception):
"""Raised when sending to a stopped actor via ask."""
class MailboxFullError(RuntimeError):
"""Raised when a message is rejected because the mailbox is at capacity."""
# ---------------------------------------------------------------------------
# Internal message wrappers (serializable — no Future objects)
# ---------------------------------------------------------------------------

View File

@ -0,0 +1,142 @@
"""Retry + idempotency helpers for Actor ask/tell patterns.
This module provides:
- Message envelope carrying retry/idempotency metadata
- In-memory idempotency store (process-local)
- ask_with_retry helper (bounded retries + exponential backoff + jitter)
Design notes:
- Keep transport-agnostic; works with current in-memory mailbox.
- Business handlers must opt in by using ``IdempotentActorMixin`` and
wrapping logic with ``handle_idempotent``.
"""
from __future__ import annotations
import asyncio
import random
import time
import uuid
from dataclasses import dataclass, field
from typing import Any
@dataclass(slots=True)
class RetryEnvelope:
"""Metadata wrapper for idempotent/retriable messages."""
payload: Any
message_id: str = field(default_factory=lambda: uuid.uuid4().hex)
idempotency_key: str | None = None
attempt: int = 1
max_attempts: int = 1
created_at_ms: int = field(default_factory=lambda: int(time.time() * 1000))
@classmethod
def wrap(
cls,
payload: Any,
*,
idempotency_key: str | None = None,
attempt: int = 1,
max_attempts: int = 1,
) -> "RetryEnvelope":
return cls(
payload=payload,
idempotency_key=idempotency_key,
attempt=attempt,
max_attempts=max_attempts,
)
class IdempotencyStore:
"""Process-local idempotency result store."""
def __init__(self) -> None:
self._results: dict[str, Any] = {}
def has(self, key: str) -> bool:
return key in self._results
def get(self, key: str) -> Any:
return self._results[key]
def set(self, key: str, value: Any) -> None:
self._results[key] = value
class IdempotentActorMixin:
"""Mixin adding idempotent handling utility for actors.
Usage in actor::
class MyActor(IdempotentActorMixin, Actor):
async def on_receive(self, message):
return await self.handle_idempotent(message, self._handle)
async def _handle(self, payload):
...
"""
def _idempotency_store(self) -> IdempotencyStore:
store = getattr(self, "_idem_store", None)
if store is None:
store = IdempotencyStore()
setattr(self, "_idem_store", store)
return store
async def handle_idempotent(self, message: Any, handler):
if not isinstance(message, RetryEnvelope):
return await handler(message)
key = message.idempotency_key
if not key:
return await handler(message.payload)
store = self._idempotency_store()
if store.has(key):
return store.get(key)
result = await handler(message.payload)
store.set(key, result)
return result
async def ask_with_retry(
ref,
payload: Any,
*,
timeout: float = 5.0,
max_attempts: int = 3,
base_backoff_s: float = 0.1,
max_backoff_s: float = 5.0,
jitter_ratio: float = 0.3,
retry_exceptions: tuple[type[BaseException], ...] = (asyncio.TimeoutError,),
idempotency_key: str | None = None,
) -> Any:
"""Ask actor with bounded retries and envelope metadata."""
if max_attempts < 1:
raise ValueError("max_attempts must be >= 1")
key = idempotency_key or uuid.uuid4().hex
last_exc: BaseException | None = None
for attempt in range(1, max_attempts + 1):
msg = RetryEnvelope.wrap(
payload,
idempotency_key=key,
attempt=attempt,
max_attempts=max_attempts,
)
try:
return await ref.ask(msg, timeout=timeout)
except retry_exceptions as exc:
last_exc = exc
if attempt >= max_attempts:
break
backoff = min(max_backoff_s, base_backoff_s * (2 ** (attempt - 1)))
jitter = backoff * jitter_ratio * random.random()
await asyncio.sleep(backoff + jitter)
raise last_exc # type: ignore[misc] # always set: loop runs ≥1 time and sets on last iteration

View File

@ -11,7 +11,7 @@ from typing import Any
from .actor import Actor, ActorContext
from .mailbox import Empty, Mailbox, MemoryMailbox
from .middleware import ActorMailboxContext, Middleware, NextFn, build_middleware_chain
from .ref import ActorRef, ActorStoppedError, ReplyChannel, _Envelope, _ReplyMessage, _ReplyRegistry, _Stop
from .ref import ActorRef, ActorStoppedError, MailboxFullError, ReplyChannel, _Envelope, _ReplyMessage, _ReplyRegistry, _Stop
from .supervision import Directive, SupervisorStrategy
logger = logging.getLogger(__name__)
@ -87,7 +87,11 @@ class ActorSystem:
middlewares=middlewares or [],
)
self._root_cells[name] = cell
await cell.start()
try:
await cell.start()
except Exception:
del self._root_cells[name]
raise
return cell.ref
async def shutdown(self, *, timeout: float = 10.0) -> None:
@ -99,7 +103,12 @@ class ActorSystem:
if cell.task is not None:
tasks.append(cell.task)
if tasks:
await asyncio.wait(tasks, timeout=timeout)
_, pending = await asyncio.wait(tasks, timeout=timeout)
# Cancel tasks that didn't finish within the timeout to prevent zombie tasks
for t in pending:
t.cancel()
if pending:
await asyncio.wait(pending, timeout=2.0)
self._root_cells.clear()
self._replies.reject_all(ActorStoppedError("ActorSystem shutting down"))
await self._reply_channel.stop_listener()
@ -188,16 +197,25 @@ class _ActorCell:
self.task = asyncio.create_task(self._run(), name=f"actor:{self.path}")
async def enqueue(self, msg: _Envelope | _Stop) -> None:
if not self.mailbox.put_nowait(msg):
# Try non-blocking first (fast path for MemoryMailbox)
if self.mailbox.put_nowait(msg):
return
# Fallback to async put (required for Redis and other async backends)
if not await self.mailbox.put(msg):
if isinstance(msg, _Envelope) and msg.correlation_id is not None:
self.system._replies.reject(msg.correlation_id, RuntimeError(f"Mailbox full: {self.path}"))
self.system._replies.reject(msg.correlation_id, MailboxFullError(f"Mailbox full: {self.path}"))
elif isinstance(msg, _Envelope):
self.system._dead_letter(self.ref, msg.payload, msg.sender)
def request_stop(self) -> None:
"""Request graceful shutdown. Falls back to task.cancel() if mailbox full."""
"""Request graceful shutdown.
Tries put_nowait first. If that fails (full or unsupported backend),
cancels the task directly so _run exits via CancelledError finally _shutdown.
"""
if not self.stopped:
if not self.mailbox.put_nowait(_Stop()):
# Redis/async backends can't put_nowait — cancel the task
if self.task is not None and not self.task.done():
self.task.cancel()
else:
@ -223,7 +241,11 @@ class _ActorCell:
middlewares=middlewares or [],
)
self.children[name] = child
await child.start()
try:
await child.start()
except Exception:
del self.children[name]
raise
return child.ref
# -- Processing loop -------------------------------------------------------
@ -310,6 +332,11 @@ class _ActorCell:
# Remove from parent
if self.parent is not None:
self.parent.children.pop(self.name, None)
# Close mailbox to release backend resources (e.g. Redis connections)
try:
await self.mailbox.close()
except Exception:
logger.exception("Error closing mailbox for %s", self.path)
# -- Supervision -----------------------------------------------------------
@ -337,8 +364,16 @@ class _ActorCell:
return
if directive == Directive.escalate:
logger.info("Supervisor %s: escalate %s", self.path, type(error).__name__)
raise error
# Stop the failing child, then propagate failure up the supervision chain.
# We cannot use `raise error` here — that would crash the child's _run
# loop instead of notifying the grandparent's supervisor.
child.request_stop()
if self.parent is not None:
logger.info("Supervisor %s: escalate %s to grandparent %s", self.path, type(error).__name__, self.parent.path)
await self.parent._handle_child_failure(self, error)
else:
logger.error("Uncaught escalation at root actor %s: %s", self.path, error)
return
if directive == Directive.restart:
for name in affected:

View File

@ -19,7 +19,11 @@ dependencies = [
]
[dependency-groups]
dev = ["pytest>=8.0.0", "ruff>=0.14.11"]
dev = [
"pytest>=8.0.0",
"redis>=7.4.0",
"ruff>=0.14.11",
]
[tool.uv.workspace]
members = ["packages/harness"]

View File

@ -2,7 +2,6 @@
import asyncio
import time
import statistics
from deerflow.actor import Actor, ActorSystem, Middleware
@ -17,7 +16,11 @@ class CounterActor(Actor):
self.count = 0
async def on_receive(self, message):
self.count += 1
if message == "inc":
self.count += 1
return self.count
if message == "get":
return self.count
return self.count
@ -69,6 +72,8 @@ async def bench_tell_throughput(n=100_000):
await ref.tell("inc")
# Wait for all messages to be processed
count = await ref.ask("get", timeout=30.0)
if count != n:
print(f" warning: expected {n} processed, got {count}")
elapsed = time.perf_counter() - start
await system.shutdown()

View File

@ -0,0 +1,273 @@
"""RedisMailbox benchmark: throughput, latency, concurrency, backpressure."""
import asyncio
import time
import redis.asyncio as redis
from deerflow.actor import Actor, ActorSystem
from deerflow.actor.mailbox_redis import RedisMailbox
class EchoActor(Actor):
async def on_receive(self, message):
return message
class CounterActor(Actor):
async def on_started(self):
self.count = 0
async def on_receive(self, message):
if message == "inc":
self.count += 1
return self.count
if message == "get":
return self.count
return self.count
def fmt(n):
if n >= 1_000_000:
return f"{n/1_000_000:.1f}M"
if n >= 1_000:
return f"{n/1_000:.0f}K"
return str(n)
async def _redis_client():
client = redis.Redis(host="127.0.0.1", port=6379, decode_responses=False)
await client.ping()
return client
async def bench_redis_ask_throughput(n=20_000):
client = await _redis_client()
queue = "deerflow:bench:redis:ask"
await client.delete(queue)
mailbox = RedisMailbox(client.connection_pool, queue, brpop_timeout=0.05)
system = ActorSystem("bench-redis")
ref = await system.spawn(EchoActor, "echo", mailbox=mailbox)
start = time.perf_counter()
for _ in range(n):
await ref.ask("ping", timeout=5.0)
elapsed = time.perf_counter() - start
await system.shutdown()
rate = n / elapsed
print(f" redis ask throughput: {fmt(n)} msgs in {elapsed:.2f}s = {fmt(int(rate))}/s")
async def bench_redis_tell_throughput(n=50_000):
client = await _redis_client()
queue = "deerflow:bench:redis:tell"
await client.delete(queue)
mailbox = RedisMailbox(client.connection_pool, queue, brpop_timeout=0.05)
system = ActorSystem("bench-redis")
ref = await system.spawn(CounterActor, "counter", mailbox=mailbox)
start = time.perf_counter()
for _ in range(n):
await ref.tell("inc")
count = await ref.ask("get", timeout=30.0)
elapsed = time.perf_counter() - start
await system.shutdown()
rate = n / elapsed
loss = n - count
print(f" redis tell throughput: {fmt(n)} msgs in {elapsed:.2f}s = {fmt(int(rate))}/s (loss: {loss})")
async def bench_redis_ask_latency(n=5_000):
client = await _redis_client()
queue = "deerflow:bench:redis:latency"
await client.delete(queue)
mailbox = RedisMailbox(client.connection_pool, queue, brpop_timeout=0.05)
system = ActorSystem("bench-redis")
ref = await system.spawn(EchoActor, "echo", mailbox=mailbox)
for _ in range(100):
await ref.ask("warmup", timeout=5.0)
latencies = []
for _ in range(n):
t0 = time.perf_counter()
await ref.ask("ping", timeout=5.0)
latencies.append((time.perf_counter() - t0) * 1_000_000)
await system.shutdown()
latencies.sort()
p50 = latencies[len(latencies) // 2]
p99 = latencies[int(len(latencies) * 0.99)]
p999 = latencies[int(len(latencies) * 0.999)]
print(f" redis ask latency: p50={p50:.0f}µs p99={p99:.0f}µs p99.9={p999:.0f}µs")
async def bench_redis_concurrent_actors(num_actors=200, msgs_per_actor=100):
client = await _redis_client()
system = ActorSystem("bench-redis")
refs = []
for i in range(num_actors):
q = f"deerflow:bench:redis:conc:{i}"
await client.delete(q)
mailbox = RedisMailbox(client.connection_pool, q, brpop_timeout=0.05)
refs.append(await system.spawn(CounterActor, f"a{i}", mailbox=mailbox))
start = time.perf_counter()
async def send_batch(ref, n):
for i in range(n):
await ref.tell("inc")
if i % 50 == 49:
await asyncio.sleep(0)
return await ref.ask("get", timeout=30.0)
results = await asyncio.gather(*[send_batch(r, msgs_per_actor) for r in refs])
elapsed = time.perf_counter() - start
total = num_actors * msgs_per_actor
delivered = sum(results)
rate = total / elapsed
loss = total - delivered
print(
f" redis concurrency: {num_actors} actors × {msgs_per_actor} msgs = {fmt(total)} in {elapsed:.2f}s = {fmt(int(rate))}/s (loss: {loss})"
)
await system.shutdown()
async def bench_redis_maxlen_backpressure(total_messages=20_000, maxlen=100, ask_timeout=0.01, ask_concurrency=200):
client = await _redis_client()
queue_tell = "deerflow:bench:redis:bp:tell"
await client.delete(queue_tell)
mailbox_tell = RedisMailbox(client.connection_pool, queue_tell, maxlen=maxlen, brpop_timeout=0.05)
system_tell = ActorSystem("bench-redis-bp-tell")
ref_tell = await system_tell.spawn(CounterActor, "counter", mailbox=mailbox_tell)
# Saturate with tell: dropped messages become dead letters
for _ in range(total_messages):
await ref_tell.tell("inc")
await asyncio.sleep(0.2)
processed = await ref_tell.ask("get", timeout=10.0)
dropped = len(system_tell.dead_letters)
drop_rate = dropped / total_messages if total_messages else 0.0
print(
f" redis maxlen tell: maxlen={maxlen}, sent={fmt(total_messages)}, processed={fmt(processed)}, dropped={fmt(dropped)} ({drop_rate:.1%})"
)
await system_tell.shutdown()
# Ask timeout rate under pressure
queue_ask = "deerflow:bench:redis:bp:ask"
await client.delete(queue_ask)
mailbox_ask = RedisMailbox(client.connection_pool, queue_ask, maxlen=maxlen, brpop_timeout=0.05)
system_ask = ActorSystem("bench-redis-bp-ask")
ref_ask = await system_ask.spawn(EchoActor, "echo", mailbox=mailbox_ask)
async def one_ask(i):
try:
await ref_ask.ask(i, timeout=ask_timeout)
return True, None
except asyncio.TimeoutError:
return False, "timeout"
except Exception: # MailboxFullError or other rejection
return False, "rejected"
sem = asyncio.Semaphore(ask_concurrency)
async def one_ask_limited(i):
async with sem:
return await one_ask(i)
results = await asyncio.gather(*[one_ask_limited(i) for i in range(total_messages)])
ok = sum(1 for r, _ in results if r)
timeout_count = sum(1 for _, reason in results if reason == "timeout")
rejected_count = sum(1 for _, reason in results if reason == "rejected")
fail_rate = (total_messages - ok) / total_messages if total_messages else 0.0
print(
f" redis maxlen ask: maxlen={maxlen}, total={fmt(total_messages)}, ok={fmt(ok)}, "
f"timeout={fmt(timeout_count)}, rejected={fmt(rejected_count)} (fail: {fail_rate:.1%}), "
f"ask_timeout={ask_timeout}s, concurrency={ask_concurrency}"
)
await system_ask.shutdown()
async def bench_redis_put_batch(n=50_000, batch_size=100):
"""put_batch: N messages in N/batch_size round-trips instead of N."""
client = await _redis_client()
queue = "deerflow:bench:redis:batch"
await client.delete(queue)
mailbox = RedisMailbox(client.connection_pool, queue, brpop_timeout=0.05)
system = ActorSystem("bench-redis-batch")
ref = await system.spawn(CounterActor, "counter", mailbox=mailbox)
from deerflow.actor.ref import _Envelope
batches = [
[_Envelope(payload="inc") for _ in range(batch_size)]
for _ in range(n // batch_size)
]
t0 = time.perf_counter()
for batch in batches:
await mailbox.put_batch(batch)
enqueue_elapsed = time.perf_counter() - t0
count = await ref.ask("get", timeout=60.0)
total_elapsed = time.perf_counter() - t0
loss = n - count
enqueue_rate = n / enqueue_elapsed
print(
f" redis put_batch push: {fmt(n)} msgs in {enqueue_elapsed:.3f}s = {fmt(int(enqueue_rate))}/s "
f"(batch={batch_size}, round-trips={n // batch_size})"
)
print(
f" redis put_batch total: end-to-end {total_elapsed:.2f}s = {fmt(int(n / total_elapsed))}/s "
f"(consume bottleneck, loss={loss})"
)
await system.shutdown()
async def main():
print("=" * 72)
print(" RedisMailbox Benchmarks")
print("=" * 72)
print()
await bench_redis_tell_throughput()
await bench_redis_ask_throughput()
await bench_redis_ask_latency()
await bench_redis_concurrent_actors()
await bench_redis_put_batch()
await bench_redis_maxlen_backpressure()
print()
print("=" * 72)
print(" Done")
print("=" * 72)
if __name__ == "__main__":
asyncio.run(main())

View File

@ -440,3 +440,95 @@ class TestMiddleware:
# tell goes through middleware too
assert any("before:" in entry for entry in mw.log) is False
await system.shutdown()
@pytest.mark.anyio
async def test_middleware_on_restart_hook(self):
"""on_restart is called on the middleware when a child actor is restarted."""
class RestartTrackingMiddleware(Middleware):
def __init__(self):
self.restart_errors: list[Exception] = []
async def on_restart(self, actor_ref, error):
self.restart_errors.append(error)
mw = RestartTrackingMiddleware()
class ChildSpawningParent(Actor):
async def on_receive(self, message):
if message == "spawn":
ref = await self.context.spawn(CrashActor, "child", middlewares=[mw])
return ref
system = ActorSystem("test")
parent = await system.spawn(ChildSpawningParent, "parent")
child = await parent.ask("spawn")
# Crash the child — parent supervisor will restart it
try:
await child.ask("crash")
except ValueError:
pass
await asyncio.sleep(0.1)
assert len(mw.restart_errors) == 1
assert isinstance(mw.restart_errors[0], ValueError)
await system.shutdown()
class TestAskErrorPropagation:
@pytest.mark.anyio
async def test_ask_propagates_actor_exception(self):
"""ask() re-raises the original exception type when on_receive crashes."""
class BoomActor(Actor):
async def on_receive(self, message):
raise ValueError("intentional crash")
system = ActorSystem("test")
ref = await system.spawn(BoomActor, "boom")
with pytest.raises(ValueError, match="intentional crash"):
await ref.ask("trigger")
await system.shutdown()
@pytest.mark.anyio
async def test_ask_propagates_exception_while_supervised(self):
"""ask() gets the exception even when the actor is supervised (not stopped)."""
class SometimesCrashActor(Actor):
async def on_receive(self, message):
if message == "crash":
raise RuntimeError("supervised crash")
return "ok"
system = ActorSystem("test")
ref = await system.spawn(SometimesCrashActor, "sca")
with pytest.raises(RuntimeError, match="supervised crash"):
await ref.ask("crash")
# Root actor keeps running after a crash (consecutive_failures, not restart)
result = await ref.ask("hello", timeout=2.0)
assert result == "ok"
await system.shutdown()
@pytest.mark.anyio
async def test_ask_timeout_late_reply_no_exception(self):
"""Late reply arriving after ask() timeout is silently dropped — no exception, no orphaned future."""
class SlowActor(Actor):
async def on_receive(self, message):
await asyncio.sleep(0.3)
return "late"
system = ActorSystem("test")
ref = await system.spawn(SlowActor, "slow")
with pytest.raises(asyncio.TimeoutError):
await ref.ask("go", timeout=0.05)
# Wait for actor to finish processing — late reply arrives, should be a no-op
await asyncio.sleep(0.4)
# System still functional: no orphaned futures, no leaked state
assert ref.is_alive
result = await ref.ask("go", timeout=2.0)
assert result == "late"
await system.shutdown()

View File

@ -0,0 +1,89 @@
import asyncio
import pytest
from deerflow.actor import Actor, ActorSystem, MailboxFullError
from deerflow.actor.mailbox import BACKPRESSURE_BLOCK, BACKPRESSURE_DROP_NEW, BACKPRESSURE_FAIL, MemoryMailbox
class SlowActor(Actor):
async def on_started(self):
self.count = 0
async def on_receive(self, message):
if message == 'inc':
await asyncio.sleep(0.01)
self.count += 1
return None
if message == 'get':
return self.count
return None
@pytest.mark.anyio
async def test_memory_mailbox_drop_new_policy_drops_tell_to_dead_letters():
system = ActorSystem('bp')
ref = await system.spawn(
SlowActor,
'slow',
mailbox=MemoryMailbox(1, backpressure_policy=BACKPRESSURE_DROP_NEW),
)
# Overfill quickly
for _ in range(20):
await ref.tell('inc')
await asyncio.sleep(0.4)
count = await ref.ask('get', timeout=2.0)
await system.shutdown()
# Some messages should be dropped under drop_new
assert count < 20
assert len(system.dead_letters) > 0
@pytest.mark.anyio
async def test_memory_mailbox_fail_policy_rejects_ask_when_full():
system = ActorSystem('bp')
ref = await system.spawn(
SlowActor,
'slow',
mailbox=MemoryMailbox(1, backpressure_policy=BACKPRESSURE_FAIL),
)
# Fill queue with tell first
await ref.tell('inc')
# Then ask may be rejected when queue still full
got_reject = False
for _ in range(30):
try:
await ref.ask('inc', timeout=0.02)
except MailboxFullError:
got_reject = True
break
except asyncio.TimeoutError:
pass
await system.shutdown()
assert got_reject
@pytest.mark.anyio
async def test_memory_mailbox_block_policy_eventually_accepts():
system = ActorSystem('bp')
ref = await system.spawn(
SlowActor,
'slow',
mailbox=MemoryMailbox(1, backpressure_policy=BACKPRESSURE_BLOCK),
)
for _ in range(10):
await ref.tell('inc')
await asyncio.sleep(0.25)
count = await ref.ask('get', timeout=2.0)
await system.shutdown()
# Block policy should avoid dropping on tell path
assert count == 10

View File

@ -0,0 +1,62 @@
import asyncio
import pytest
from deerflow.actor import Actor, ActorSystem, IdempotentActorMixin, RetryEnvelope, ask_with_retry
class FlakyIdempotentActor(IdempotentActorMixin, Actor):
async def on_started(self):
self.calls = 0
async def on_receive(self, message):
return await self.handle_idempotent(message, self._handle)
async def _handle(self, payload):
self.calls += 1
if payload == 'flaky' and self.calls == 1:
await asyncio.sleep(0.02)
return 'late'
return f"ok:{payload}"
@pytest.mark.anyio
async def test_ask_with_retry_timeout_raises():
system = ActorSystem('retry')
ref = await system.spawn(FlakyIdempotentActor, 'a')
with pytest.raises(asyncio.TimeoutError):
await ask_with_retry(
ref,
'flaky',
timeout=0.005,
max_attempts=3,
base_backoff_s=0.001,
max_backoff_s=0.005,
jitter_ratio=0.0,
idempotency_key='k1',
)
# This helper retries timeout, but if each attempt times out it should raise.
assert ref.is_alive
await system.shutdown()
@pytest.mark.anyio
async def test_idempotent_envelope_returns_cached_result():
system = ActorSystem('retry')
ref = await system.spawn(FlakyIdempotentActor, 'a')
m1 = RetryEnvelope.wrap('x', idempotency_key='same-key')
m2 = RetryEnvelope.wrap('x', idempotency_key='same-key', attempt=2, max_attempts=3)
r1 = await ref.ask(m1, timeout=1.0)
r2 = await ref.ask(m2, timeout=1.0)
assert r1 == 'ok:x'
assert r2 == 'ok:x'
# handler should run once due to idempotency cache
actor = ref._cell.actor
assert actor.calls == 1
await system.shutdown()

View File

@ -0,0 +1,83 @@
import asyncio
import pytest
redis = pytest.importorskip("redis.asyncio")
from deerflow.actor.mailbox_redis import RedisMailbox
from deerflow.actor.ref import _Envelope, _Stop
pytestmark = pytest.mark.anyio
async def _make_mailbox(queue_name: str, *, maxlen: int = 0) -> RedisMailbox:
client = redis.Redis(host="127.0.0.1", port=6379, decode_responses=False)
await client.ping()
await client.delete(queue_name)
mailbox = RedisMailbox(client.connection_pool, queue_name, maxlen=maxlen, brpop_timeout=0.2)
return mailbox
async def test_roundtrip_envelope_and_stop():
queue = "deerflow:test:redis-mailbox:roundtrip"
mailbox = await _make_mailbox(queue)
try:
msg = _Envelope(payload={"k": "v"}, correlation_id="c1", reply_to="sysA")
ok = await mailbox.put(msg)
assert ok is True
got = await mailbox.get()
assert isinstance(got, _Envelope)
assert got.payload == {"k": "v"}
assert got.correlation_id == "c1"
assert got.reply_to == "sysA"
ok = await mailbox.put(_Stop())
assert ok is True
stop = await mailbox.get()
assert isinstance(stop, _Stop)
finally:
await mailbox.close()
async def test_bounded_queue_rejects_when_full():
queue = "deerflow:test:redis-mailbox:bounded"
mailbox = await _make_mailbox(queue, maxlen=1)
try:
assert await mailbox.put(_Envelope("m1")) is True
assert await mailbox.put(_Envelope("m2")) is False
finally:
await mailbox.close()
async def test_put_nowait_and_get_nowait_contract():
queue = "deerflow:test:redis-mailbox:nowait"
mailbox = await _make_mailbox(queue)
try:
assert mailbox.put_nowait(_Envelope("x")) is False
with pytest.raises(Exception, match="does not support synchronous get_nowait"):
mailbox.get_nowait()
finally:
await mailbox.close()
async def test_system_enqueue_fallback_with_async_mailbox():
from deerflow.actor import Actor, ActorSystem
class EchoActor(Actor):
async def on_receive(self, message):
return message
queue = "deerflow:test:redis-mailbox:system-fallback"
mailbox = await _make_mailbox(queue)
system = ActorSystem("redis-test")
ref = await system.spawn(EchoActor, "echo", mailbox=mailbox)
try:
# This exercises _ActorCell.enqueue fallback path:
# put_nowait() -> False, then await put() -> True
result = await ref.ask("hello", timeout=3.0)
assert result == "hello"
finally:
await system.shutdown()

4535
backend/uv.lock generated

File diff suppressed because it is too large Load Diff