From 531741545e589a44b036681d7f4e721296c8f808 Mon Sep 17 00:00:00 2001 From: kartik-mem0 Date: Mon, 6 Apr 2026 13:16:46 +0530 Subject: [PATCH] refactor: update Mem0Memory to use independent user/agent scoping and exclude assistant output --- docs/user_guide/en/modules/memory.md | 4 +- docs/user_guide/zh/modules/memory.md | 19 ++++ runtime/node/agent/memory/mem0_memory.py | 62 +++++++----- tests/test_mem0_memory.py | 123 +++++++++++++++++------ yaml_instance/demo_mem0_memory.yaml | 12 +-- 5 files changed, 159 insertions(+), 61 deletions(-) diff --git a/docs/user_guide/en/modules/memory.md b/docs/user_guide/en/modules/memory.md index f494cb2f..d0e66ab7 100755 --- a/docs/user_guide/en/modules/memory.md +++ b/docs/user_guide/en/modules/memory.md @@ -111,9 +111,9 @@ This schema lets multimodal outputs flow into Memory/Thinking modules without ex ### 5.4 Mem0Memory - **Config** – Requires `api_key` (from [app.mem0.ai](https://app.mem0.ai)). Optional `user_id`, `agent_id`, `org_id`, `project_id` for scoping. -- **Important**: `user_id` and `agent_id` are mutually exclusive in Mem0 API calls. If both are configured, two separate searches are made and results merged. For writes, `agent_id` takes precedence. Agent-generated content is stored with `role: "assistant"`. +- **Entity scoping**: `user_id` and `agent_id` are independent dimensions — both can be included simultaneously in `add()` and `search()` calls. When both are configured, retrieval uses an OR filter (`{"OR": [{"user_id": ...}, {"agent_id": ...}]}`) to search across both scopes. Writes include both IDs when available. - **Retrieval** – Uses Mem0's server-side semantic search. Supports `top_k` and `similarity_threshold` via `MemoryAttachmentConfig`. -- **Write** – `update()` sends conversation messages to Mem0 via the SDK. Agent outputs use `role: "assistant"`, user inputs use `role: "user"`. +- **Write** – `update()` sends only user input to Mem0 via the SDK (as `role: "user"` messages). Assistant output is excluded to prevent noise memories from the LLM's responses being extracted as facts. - **Persistence** – Fully cloud-managed. `load()` and `save()` are no-ops. Memories persist across runs and sessions automatically. - **Dependencies** – Requires `mem0ai` package (`pip install mem0ai`). diff --git a/docs/user_guide/zh/modules/memory.md b/docs/user_guide/zh/modules/memory.md index ffb1904f..453e0d4d 100755 --- a/docs/user_guide/zh/modules/memory.md +++ b/docs/user_guide/zh/modules/memory.md @@ -32,12 +32,23 @@ memory: model: text-embedding-3-small ``` +### Mem0 Memory 配置 +```yaml +memory: + - name: agent_memory + type: mem0 + config: + api_key: ${MEM0_API_KEY} + agent_id: my-agent +``` + ## 3. 内置 Memory Store 对比 | 类型 | 路径 | 特点 | 适用场景 | | --- | --- | --- | --- | | `simple` | `node/agent/memory/simple_memory.py` | 运行结束后可选择落盘(JSON);使用向量搜索(FAISS)+语义重打分;支持读写 | 小规模对话记忆、快速原型 | | `file` | `node/agent/memory/file_memory.py` | 将指定文件/目录切片为向量索引,只读;自动检测文件变更并更新索引 | 知识库、文档问答 | | `blackboard` | `node/agent/memory/blackboard_memory.py` | 轻量附加日志,按时间/条数裁剪;不依赖向量检索 | 简易广播板、流水线调试 | +| `mem0` | `node/agent/memory/mem0_memory.py` | 由 Mem0 云端托管;支持语义搜索 + 图关系;无需本地 embedding 或持久化。需安装 `mem0ai` 包。 | 生产级记忆、跨会话持久化、多 Agent 记忆共享 | > 所有内置 store 都会在 `register_memory_store()` 中注册,摘要可通过 `MemoryStoreConfig.field_specs()` 在 UI 中展示。 @@ -100,6 +111,14 @@ nodes: - **检索**:直接返回最近 `top_k` 条,按时间排序。 - **写入**:`update()` 以 append 方式存储最新的输入/输出 snapshot(文本 + 块 + 附件信息),不生成向量,适合事件流或人工批注。 +### 5.4 Mem0Memory +- **配置**:必须提供 `api_key`(从 [app.mem0.ai](https://app.mem0.ai) 获取)。可选参数 `user_id`、`agent_id`、`org_id`、`project_id` 用于记忆范围控制。 +- **实体范围**:`user_id` 和 `agent_id` 是独立的维度,可在 `add()` 和 `search()` 调用中同时使用。若同时配置,检索时使用 OR 过滤器(`{"OR": [{"user_id": ...}, {"agent_id": ...}]}`)在一次 API 调用中搜索两个范围。写入时两个 ID 同时包含。 +- **检索**:使用 Mem0 服务端语义搜索。通过 `MemoryAttachmentConfig` 中的 `top_k` 和 `similarity_threshold` 控制。 +- **写入**:`update()` 仅将用户输入(`role: "user"` 消息)发送至 Mem0。不包含 Agent 输出,以避免 LLM 响应中的内容被提取为噪声记忆。 +- **持久化**:完全由云端托管。`load()` 和 `save()` 为空操作(no-op)。记忆在不同运行和会话间自动持久化。 +- **依赖**:需安装 `mem0ai` 包(`pip install mem0ai`)。 + ## 6. EmbeddingConfig 提示 - 字段:`provider`, `model`, `api_key`, `base_url`, `params`。 - `provider=openai` 时使用 `openai.OpenAI` 客户端,可配置 `base_url` 以兼容兼容层。 diff --git a/runtime/node/agent/memory/mem0_memory.py b/runtime/node/agent/memory/mem0_memory.py index 0c6b809a..6e171e30 100644 --- a/runtime/node/agent/memory/mem0_memory.py +++ b/runtime/node/agent/memory/mem0_memory.py @@ -1,6 +1,7 @@ """Mem0 managed memory store implementation.""" import logging +import re import time import uuid from typing import Any, Dict, List @@ -45,8 +46,8 @@ class Mem0Memory(MemoryBase): Important API constraints: - Agent memories use role="assistant" + agent_id - - user_id and agent_id are stored as separate records in Mem0; - if both are configured, an OR filter is used to search across both scopes. + - user_id and agent_id are independent scoping dimensions and can be + combined in both add() and search() calls. - search() uses filters dict; add() uses top-level kwargs. - SDK returns {"memories": [...]} from search. """ @@ -151,53 +152,68 @@ class Mem0Memory(MemoryBase): # -------- Update -------- def update(self, payload: MemoryWritePayload) -> None: - """Store a memory in Mem0. + """Store user input as a memory in Mem0. - Uses role="assistant" + agent_id for agent-generated memories, - and role="user" + user_id for user-scoped memories. + Only user input is sent for extraction. Assistant output is excluded + to prevent noise memories from the LLM's responses. """ - snapshot = payload.output_snapshot or payload.input_snapshot - if not snapshot or not snapshot.text.strip(): + raw_input = payload.inputs_text or "" + if not raw_input.strip(): return messages = self._build_messages(payload) if not messages: return - add_kwargs: Dict[str, Any] = {"messages": messages} + add_kwargs: Dict[str, Any] = { + "messages": messages, + "infer": True, + } - # Determine scoping: agent_id takes precedence for agent-generated content + # Include both user_id and agent_id when available — they are + # independent scoping dimensions in Mem0, not mutually exclusive. if self.agent_id: add_kwargs["agent_id"] = self.agent_id - elif self.user_id: + if self.user_id: add_kwargs["user_id"] = self.user_id - else: - # Default: use agent_role as agent_id + + # Fallback when neither is configured + if "agent_id" not in add_kwargs and "user_id" not in add_kwargs: add_kwargs["agent_id"] = payload.agent_role try: - self.client.add(**add_kwargs) + result = self.client.add(**add_kwargs) + logger.info("Mem0 add result: %s", result) except Exception as e: logger.error("Mem0 add failed: %s", e) + @staticmethod + def _clean_pipeline_text(text: str) -> str: + """Strip ChatDev pipeline headers so Mem0 sees clean conversational text. + + The executor wraps each input with '=== INPUT FROM () ===' + headers. Mem0's extraction LLM treats these as system metadata and skips + them, resulting in zero memories extracted. + """ + cleaned = re.sub(r"===\s*INPUT FROM\s+\S+\s*\(\w+\)\s*===\s*", "", text) + return cleaned.strip() + def _build_messages(self, payload: MemoryWritePayload) -> List[Dict[str, str]]: """Build Mem0-compatible message list from write payload. - Agent-generated content uses role="assistant". - User input uses role="user". + Only sends user input to Mem0. Assistant output is excluded because + Mem0's extraction LLM processes ALL messages and extracts facts from + assistant responses too, creating noise memories like "Assistant says + Python is fascinating" instead of actual user facts. """ messages: List[Dict[str, str]] = [] - if payload.inputs_text and payload.inputs_text.strip(): + raw_input = payload.inputs_text or "" + clean_input = self._clean_pipeline_text(raw_input) + if clean_input: messages.append({ "role": "user", - "content": payload.inputs_text.strip(), - }) - - if payload.output_snapshot and payload.output_snapshot.text.strip(): - messages.append({ - "role": "assistant", - "content": payload.output_snapshot.text.strip(), + "content": clean_input, }) return messages diff --git a/tests/test_mem0_memory.py b/tests/test_mem0_memory.py index 09617619..da34270f 100644 --- a/tests/test_mem0_memory.py +++ b/tests/test_mem0_memory.py @@ -197,8 +197,8 @@ class TestMem0MemoryRetrieve: class TestMem0MemoryUpdate: - def test_update_with_agent_id_uses_assistant_role(self): - """Agent-scoped update sends role=assistant messages with agent_id.""" + def test_update_sends_only_user_input(self): + """Update sends only user input, not assistant output, to prevent noise.""" memory, client = _make_mem0_memory(agent_id="agent-1") client.add.return_value = [{"id": "new", "event": "ADD"}] @@ -215,8 +215,26 @@ class TestMem0MemoryUpdate: assert call_kwargs["agent_id"] == "agent-1" assert "user_id" not in call_kwargs messages = call_kwargs["messages"] + assert len(messages) == 1 assert messages[0]["role"] == "user" - assert messages[1]["role"] == "assistant" + assert messages[0]["content"] == "Write about AI" + + def test_update_does_not_send_async_mode(self): + """Update does not send deprecated async_mode parameter.""" + memory, client = _make_mem0_memory(agent_id="agent-1") + client.add.return_value = [] + + payload = MemoryWritePayload( + agent_role="writer", + inputs_text="test", + input_snapshot=None, + output_snapshot=MemoryContentSnapshot(text="output"), + ) + memory.update(payload) + + call_kwargs = client.add.call_args[1] + assert "async_mode" not in call_kwargs + assert call_kwargs["infer"] is True def test_update_with_user_id(self): """User-scoped update uses user_id, not agent_id.""" @@ -227,7 +245,7 @@ class TestMem0MemoryUpdate: agent_role="writer", inputs_text="I prefer Python", input_snapshot=None, - output_snapshot=MemoryContentSnapshot(text="Noted your preference"), + output_snapshot=None, ) memory.update(payload) @@ -244,15 +262,15 @@ class TestMem0MemoryUpdate: agent_role="coder", inputs_text="test input", input_snapshot=None, - output_snapshot=MemoryContentSnapshot(text="test output"), + output_snapshot=None, ) memory.update(payload) call_kwargs = client.add.call_args[1] assert call_kwargs["agent_id"] == "coder" - def test_update_with_both_ids_prefers_agent_id(self): - """When both user_id and agent_id configured, agent_id takes precedence for writes.""" + def test_update_with_both_ids_includes_both(self): + """When both user_id and agent_id configured, both are included in add() call.""" memory, client = _make_mem0_memory(user_id="user-1", agent_id="agent-1") client.add.return_value = [] @@ -260,37 +278,37 @@ class TestMem0MemoryUpdate: agent_role="writer", inputs_text="input", input_snapshot=None, - output_snapshot=MemoryContentSnapshot(text="output"), + output_snapshot=None, ) memory.update(payload) call_kwargs = client.add.call_args[1] assert call_kwargs["agent_id"] == "agent-1" - assert "user_id" not in call_kwargs + assert call_kwargs["user_id"] == "user-1" - def test_update_empty_output_is_noop(self): - """Empty output snapshot skips API call.""" + def test_update_empty_input_is_noop(self): + """Empty inputs_text skips API call.""" + memory, client = _make_mem0_memory(agent_id="a1") + + payload = MemoryWritePayload( + agent_role="writer", + inputs_text=" ", + input_snapshot=None, + output_snapshot=MemoryContentSnapshot(text="some output"), + ) + memory.update(payload) + + client.add.assert_not_called() + + def test_update_no_input_is_noop(self): + """No inputs_text skips API call.""" memory, client = _make_mem0_memory(agent_id="a1") payload = MemoryWritePayload( agent_role="writer", inputs_text="", input_snapshot=None, - output_snapshot=MemoryContentSnapshot(text=" "), - ) - memory.update(payload) - - client.add.assert_not_called() - - def test_update_no_snapshot_is_noop(self): - """No snapshot at all skips API call.""" - memory, client = _make_mem0_memory(agent_id="a1") - - payload = MemoryWritePayload( - agent_role="writer", - inputs_text="test", - input_snapshot=None, - output_snapshot=None, + output_snapshot=MemoryContentSnapshot(text="output"), ) memory.update(payload) @@ -303,14 +321,63 @@ class TestMem0MemoryUpdate: payload = MemoryWritePayload( agent_role="writer", - inputs_text="test", + inputs_text="test user input", input_snapshot=None, - output_snapshot=MemoryContentSnapshot(text="output"), + output_snapshot=None, ) # Should not raise memory.update(payload) +class TestMem0MemoryPipelineTextCleaning: + + def test_strips_input_from_task_header(self): + """Pipeline headers like '=== INPUT FROM TASK (user) ===' are stripped.""" + memory, client = _make_mem0_memory(agent_id="a1") + client.add.return_value = [] + + payload = MemoryWritePayload( + agent_role="writer", + inputs_text="=== INPUT FROM TASK (user) ===\n\nMy name is Alex, I love Python", + input_snapshot=None, + output_snapshot=MemoryContentSnapshot(text="Nice to meet you Alex!"), + ) + memory.update(payload) + + call_kwargs = client.add.call_args[1] + messages = call_kwargs["messages"] + assert messages[0]["role"] == "user" + assert messages[0]["content"] == "My name is Alex, I love Python" + assert "INPUT FROM" not in messages[0]["content"] + + def test_strips_multiple_input_headers(self): + """Multiple pipeline headers from different sources are all stripped.""" + memory, client = _make_mem0_memory(agent_id="a1") + client.add.return_value = [] + + payload = MemoryWritePayload( + agent_role="writer", + inputs_text=( + "=== INPUT FROM TASK (user) ===\n\nHello\n\n" + "=== INPUT FROM reviewer (assistant) ===\n\nWorld" + ), + input_snapshot=None, + output_snapshot=MemoryContentSnapshot(text="Hi!"), + ) + memory.update(payload) + + call_kwargs = client.add.call_args[1] + user_content = call_kwargs["messages"][0]["content"] + assert "INPUT FROM" not in user_content + assert "Hello" in user_content + assert "World" in user_content + + def test_clean_text_without_headers_unchanged(self): + """Text without pipeline headers passes through unchanged.""" + from runtime.node.agent.memory.mem0_memory import Mem0Memory + assert Mem0Memory._clean_pipeline_text("Just normal text") == "Just normal text" + + class TestMem0MemoryLoadSave: def test_load_is_noop(self): diff --git a/yaml_instance/demo_mem0_memory.yaml b/yaml_instance/demo_mem0_memory.yaml index 6206e8fa..0bb5e113 100644 --- a/yaml_instance/demo_mem0_memory.yaml +++ b/yaml_instance/demo_mem0_memory.yaml @@ -28,19 +28,15 @@ graph: write: true edges: [] memory: - # Agent-scoped memory: uses agent_id for storing and retrieving + # User-scoped: extracts facts about the user (name, preferences, etc.) + # Agent-scoped: extracts what the agent learned (decisions, context) + # Both can be used together for different memory dimensions. - name: mem0_store type: mem0 config: api_key: ${MEM0_API_KEY} + user_id: project-user-123 agent_id: writer-agent - - # Alternative: User-scoped memory (uncomment to use instead) - # - name: mem0_store - # type: mem0 - # config: - # api_key: ${MEM0_API_KEY} - # user_id: project-user-123 start: - writer end: []